1
0
mirror of https://github.com/hashcat/hashcat.git synced 2025-07-31 02:48:50 +00:00

Merge branch 'master' into rule_purgeclass

This commit is contained in:
Gabriele Gristina 2025-07-09 20:01:58 +02:00 committed by GitHub
commit d5c34631ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
198 changed files with 15033 additions and 2282 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
.DS_Store
*/.DS_Store
*.exe
*.bin
*.app

View File

@ -19,7 +19,7 @@ cd win-iconv/
cmake -D WIN_ICONV_BUILD_EXECUTABLE=OFF -D CMAKE_INSTALL_PREFIX=/opt/win-iconv-64 -D CMAKE_CXX_COMPILER=$(which x86_64-w64-mingw32-g++) -D CMAKE_C_COMPILER=$(which x86_64-w64-mingw32-gcc) -D CMAKE_SYSTEM_NAME=Windows
sudo make install
cd ../
wget https://repo.msys2.org/mingw/mingw64/mingw-w64-x86_64-python-3.12.10-1-any.pkg.tar.zst
wget https://repo.msys2.org/mingw/mingw64/mingw-w64-x86_64-python-3.12.11-1-any.pkg.tar.zst
sudo mkdir /opt/win-python
sudo tar --zstd -xf mingw-w64-x86_64-python-3.12.10-1-any.pkg.tar.zst -C /opt/win-python
```

View File

@ -16,7 +16,7 @@
GLOBAL_AS const bf_t *bfs_buf, \
CONSTANT_AS const u32 &combs_mode, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#else // CUDA, HIP, OpenCL

View File

@ -1946,6 +1946,19 @@ DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c)
#endif
/**
* arithmetic operations
*/
DECLSPEC u32 hc_umulhi (const u32 x, const u32 y)
{
#if defined IS_CUDA || defined IS_HIP
return __umulhi (x, y);
#else
return h32_from_64_S ((u64) x * (u64) y);
#endif
}
/**
* pure scalar functions
*/
@ -41405,7 +41418,6 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
#endif
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
@ -41979,6 +41991,582 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u
w0[1] = 0;
w0[0] = 0;
break;
case 16:
w7[3] = hc_byte_perm_S (w3[2], w3[3], selector);
w7[2] = hc_byte_perm_S (w3[1], w3[2], selector);
w7[1] = hc_byte_perm_S (w3[0], w3[1], selector);
w7[0] = hc_byte_perm_S (w2[3], w3[0], selector);
w6[3] = hc_byte_perm_S (w2[2], w2[3], selector);
w6[2] = hc_byte_perm_S (w2[1], w2[2], selector);
w6[1] = hc_byte_perm_S (w2[0], w2[1], selector);
w6[0] = hc_byte_perm_S (w1[3], w2[0], selector);
w5[3] = hc_byte_perm_S (w1[2], w1[3], selector);
w5[2] = hc_byte_perm_S (w1[1], w1[2], selector);
w5[1] = hc_byte_perm_S (w1[0], w1[1], selector);
w5[0] = hc_byte_perm_S (w0[3], w1[0], selector);
w4[3] = hc_byte_perm_S (w0[2], w0[3], selector);
w4[2] = hc_byte_perm_S (w0[1], w0[2], selector);
w4[1] = hc_byte_perm_S (w0[0], w0[1], selector);
w4[0] = hc_byte_perm_S ( 0, w0[0], selector);
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 17:
w7[3] = hc_byte_perm_S (w3[1], w3[2], selector);
w7[2] = hc_byte_perm_S (w3[0], w3[1], selector);
w7[1] = hc_byte_perm_S (w2[3], w3[0], selector);
w7[0] = hc_byte_perm_S (w2[2], w2[3], selector);
w6[3] = hc_byte_perm_S (w2[1], w2[2], selector);
w6[2] = hc_byte_perm_S (w2[0], w2[1], selector);
w6[1] = hc_byte_perm_S (w1[3], w2[0], selector);
w6[0] = hc_byte_perm_S (w1[2], w1[3], selector);
w5[3] = hc_byte_perm_S (w1[1], w1[2], selector);
w5[2] = hc_byte_perm_S (w1[0], w1[1], selector);
w5[1] = hc_byte_perm_S (w0[3], w1[0], selector);
w5[0] = hc_byte_perm_S (w0[2], w0[3], selector);
w4[3] = hc_byte_perm_S (w0[1], w0[2], selector);
w4[2] = hc_byte_perm_S (w0[0], w0[1], selector);
w4[1] = hc_byte_perm_S ( 0, w0[0], selector);
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 18:
w7[3] = hc_byte_perm_S (w3[0], w3[1], selector);
w7[2] = hc_byte_perm_S (w2[3], w3[0], selector);
w7[1] = hc_byte_perm_S (w2[2], w2[3], selector);
w7[0] = hc_byte_perm_S (w2[1], w2[2], selector);
w6[3] = hc_byte_perm_S (w2[0], w2[1], selector);
w6[2] = hc_byte_perm_S (w1[3], w2[0], selector);
w6[1] = hc_byte_perm_S (w1[2], w1[3], selector);
w6[0] = hc_byte_perm_S (w1[1], w1[2], selector);
w5[3] = hc_byte_perm_S (w1[0], w1[1], selector);
w5[2] = hc_byte_perm_S (w0[3], w1[0], selector);
w5[1] = hc_byte_perm_S (w0[2], w0[3], selector);
w5[0] = hc_byte_perm_S (w0[1], w0[2], selector);
w4[3] = hc_byte_perm_S (w0[0], w0[1], selector);
w4[2] = hc_byte_perm_S ( 0, w0[0], selector);
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 19:
w7[3] = hc_byte_perm_S (w2[3], w3[0], selector);
w7[2] = hc_byte_perm_S (w2[2], w2[3], selector);
w7[1] = hc_byte_perm_S (w2[1], w2[2], selector);
w7[0] = hc_byte_perm_S (w2[0], w2[1], selector);
w6[3] = hc_byte_perm_S (w1[3], w2[0], selector);
w6[2] = hc_byte_perm_S (w1[2], w1[3], selector);
w6[1] = hc_byte_perm_S (w1[1], w1[2], selector);
w6[0] = hc_byte_perm_S (w1[0], w1[1], selector);
w5[3] = hc_byte_perm_S (w0[3], w1[0], selector);
w5[2] = hc_byte_perm_S (w0[2], w0[3], selector);
w5[1] = hc_byte_perm_S (w0[1], w0[2], selector);
w5[0] = hc_byte_perm_S (w0[0], w0[1], selector);
w4[3] = hc_byte_perm_S ( 0, w0[0], selector);
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 20:
w7[3] = hc_byte_perm_S (w2[2], w2[3], selector);
w7[2] = hc_byte_perm_S (w2[1], w2[2], selector);
w7[1] = hc_byte_perm_S (w2[0], w2[1], selector);
w7[0] = hc_byte_perm_S (w1[3], w2[0], selector);
w6[3] = hc_byte_perm_S (w1[2], w1[3], selector);
w6[2] = hc_byte_perm_S (w1[1], w1[2], selector);
w6[1] = hc_byte_perm_S (w1[0], w1[1], selector);
w6[0] = hc_byte_perm_S (w0[3], w1[0], selector);
w5[3] = hc_byte_perm_S (w0[2], w0[3], selector);
w5[2] = hc_byte_perm_S (w0[1], w0[2], selector);
w5[1] = hc_byte_perm_S (w0[0], w0[1], selector);
w5[0] = hc_byte_perm_S ( 0, w0[0], selector);
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 21:
w7[3] = hc_byte_perm_S (w2[1], w2[2], selector);
w7[2] = hc_byte_perm_S (w2[0], w2[1], selector);
w7[1] = hc_byte_perm_S (w1[3], w2[0], selector);
w7[0] = hc_byte_perm_S (w1[2], w1[3], selector);
w6[3] = hc_byte_perm_S (w1[1], w1[2], selector);
w6[2] = hc_byte_perm_S (w1[0], w1[1], selector);
w6[1] = hc_byte_perm_S (w0[3], w1[0], selector);
w6[0] = hc_byte_perm_S (w0[2], w0[3], selector);
w5[3] = hc_byte_perm_S (w0[1], w0[2], selector);
w5[2] = hc_byte_perm_S (w0[0], w0[1], selector);
w5[1] = hc_byte_perm_S ( 0, w0[0], selector);
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 22:
w7[3] = hc_byte_perm_S (w2[0], w2[1], selector);
w7[2] = hc_byte_perm_S (w1[3], w2[0], selector);
w7[1] = hc_byte_perm_S (w1[2], w1[3], selector);
w7[0] = hc_byte_perm_S (w1[1], w1[2], selector);
w6[3] = hc_byte_perm_S (w1[0], w1[1], selector);
w6[2] = hc_byte_perm_S (w0[3], w1[0], selector);
w6[1] = hc_byte_perm_S (w0[2], w0[3], selector);
w6[0] = hc_byte_perm_S (w0[1], w0[2], selector);
w5[3] = hc_byte_perm_S (w0[0], w0[1], selector);
w5[2] = hc_byte_perm_S ( 0, w0[0], selector);
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 23:
w7[3] = hc_byte_perm_S (w1[3], w2[0], selector);
w7[2] = hc_byte_perm_S (w1[2], w1[3], selector);
w7[1] = hc_byte_perm_S (w1[1], w1[2], selector);
w7[0] = hc_byte_perm_S (w1[0], w1[1], selector);
w6[3] = hc_byte_perm_S (w0[3], w1[0], selector);
w6[2] = hc_byte_perm_S (w0[2], w0[3], selector);
w6[1] = hc_byte_perm_S (w0[1], w0[2], selector);
w6[0] = hc_byte_perm_S (w0[0], w0[1], selector);
w5[3] = hc_byte_perm_S ( 0, w0[0], selector);
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 24:
w7[3] = hc_byte_perm_S (w1[2], w1[3], selector);
w7[2] = hc_byte_perm_S (w1[1], w1[2], selector);
w7[1] = hc_byte_perm_S (w1[0], w1[1], selector);
w7[0] = hc_byte_perm_S (w0[3], w1[0], selector);
w6[3] = hc_byte_perm_S (w0[2], w0[3], selector);
w6[2] = hc_byte_perm_S (w0[1], w0[2], selector);
w6[1] = hc_byte_perm_S (w0[0], w0[1], selector);
w6[0] = hc_byte_perm_S ( 0, w0[0], selector);
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 25:
w7[3] = hc_byte_perm_S (w1[1], w1[2], selector);
w7[2] = hc_byte_perm_S (w1[0], w1[1], selector);
w7[1] = hc_byte_perm_S (w0[3], w1[0], selector);
w7[0] = hc_byte_perm_S (w0[2], w0[3], selector);
w6[3] = hc_byte_perm_S (w0[1], w0[2], selector);
w6[2] = hc_byte_perm_S (w0[0], w0[1], selector);
w6[1] = hc_byte_perm_S ( 0, w0[0], selector);
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 26:
w7[3] = hc_byte_perm_S (w1[0], w1[1], selector);
w7[2] = hc_byte_perm_S (w0[3], w1[0], selector);
w7[1] = hc_byte_perm_S (w0[2], w0[3], selector);
w7[0] = hc_byte_perm_S (w0[1], w0[2], selector);
w6[3] = hc_byte_perm_S (w0[0], w0[1], selector);
w6[2] = hc_byte_perm_S ( 0, w0[0], selector);
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 27:
w7[3] = hc_byte_perm_S (w0[3], w1[0], selector);
w7[2] = hc_byte_perm_S (w0[2], w0[3], selector);
w7[1] = hc_byte_perm_S (w0[1], w0[2], selector);
w7[0] = hc_byte_perm_S (w0[0], w0[1], selector);
w6[3] = hc_byte_perm_S ( 0, w0[0], selector);
w6[2] = 0;
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 28:
w7[3] = hc_byte_perm_S (w0[2], w0[3], selector);
w7[2] = hc_byte_perm_S (w0[1], w0[2], selector);
w7[1] = hc_byte_perm_S (w0[0], w0[1], selector);
w7[0] = hc_byte_perm_S ( 0, w0[0], selector);
w6[3] = 0;
w6[2] = 0;
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 29:
w7[3] = hc_byte_perm_S (w0[1], w0[2], selector);
w7[2] = hc_byte_perm_S (w0[0], w0[1], selector);
w7[1] = hc_byte_perm_S ( 0, w0[0], selector);
w7[0] = 0;
w6[3] = 0;
w6[2] = 0;
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 30:
w7[3] = hc_byte_perm_S (w0[0], w0[1], selector);
w7[2] = hc_byte_perm_S ( 0, w0[0], selector);
w7[1] = 0;
w7[0] = 0;
w6[3] = 0;
w6[2] = 0;
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
case 31:
w7[3] = hc_byte_perm_S ( 0, w0[0], selector);
w7[2] = 0;
w7[1] = 0;
w7[0] = 0;
w6[3] = 0;
w6[2] = 0;
w6[1] = 0;
w6[0] = 0;
w5[3] = 0;
w5[2] = 0;
w5[1] = 0;
w5[0] = 0;
w4[3] = 0;
w4[2] = 0;
w4[1] = 0;
w4[0] = 0;
w3[3] = 0;
w3[2] = 0;
w3[1] = 0;
w3[0] = 0;
w2[3] = 0;
w2[2] = 0;
w2[1] = 0;
w2[0] = 0;
w1[3] = 0;
w1[2] = 0;
w1[1] = 0;
w1[0] = 0;
w0[3] = 0;
w0[2] = 0;
w0[1] = 0;
w0[0] = 0;
break;
}
#endif
}

View File

@ -124,10 +124,10 @@
#if defined IS_METAL
#define KERN_ATTR_MAIN_PARAMS \
uint hc_gid [[ thread_position_in_grid ]], \
uint hc_lid [[ thread_position_in_threadgroup ]], \
uint hc_lsz [[ threads_per_threadgroup ]], \
uint hc_bid [[ threadgroup_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]], \
uint3 hc_lid [[ thread_position_in_threadgroup ]], \
uint3 hc_lsz [[ threads_per_threadgroup ]], \
uint3 hc_bid [[ threadgroup_position_in_grid ]]
#endif // IS_METAL
/*
@ -284,6 +284,10 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c);
DECLSPEC u32x hc_lop_0x96 (const u32x a, const u32x b, const u32x c);
DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c);
// arithmetic operations
DECLSPEC u32 hc_umulhi (const u32 x, const u32 y);
// legacy common code
DECLSPEC int ffz (const u32 v);

407
OpenCL/inc_hash_argon2.cl Normal file
View File

@ -0,0 +1,407 @@
/**
* Author......: Netherlands Forensic Institute
* License.....: MIT
*
* Warp code based on original work by Ondrej Mosnáček
*/
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.h"
#include "inc_common.h"
#include "inc_hash_blake2b.h"
#include "inc_hash_argon2.h"
DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks)
{
blake2b_ctx_t ctx;
blake2b_init (&ctx);
u64 blake_buf[16] = { 0 };
blake_buf[0] = sizeof(argon2_block_t);
blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 4);
blake2b_update (&ctx, in, 64);
blake_buf[0] = hl32_to_64 (lane, blocknum);
blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 8);
blake2b_final (&ctx);
GLOBAL_AS u64 *out = blocks[(blocknum * parallelism) + lane].values;
out[0] = ctx.h[0];
out[1] = ctx.h[1];
out[2] = ctx.h[2];
out[3] = ctx.h[3];
for (u32 off = 4; off < 124; off += 4)
{
for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx];
blake2b_init (&ctx);
blake2b_transform (ctx.h, blake_buf, 64, (u64) BLAKE2B_FINAL);
out[off + 0] = ctx.h[0];
out[off + 1] = ctx.h[1];
out[off + 2] = ctx.h[2];
out[off + 3] = ctx.h[3];
}
out[124] = ctx.h[4];
out[125] = ctx.h[5];
out[126] = ctx.h[6];
out[127] = ctx.h[7];
}
DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u64 *blockhash)
{
blake2b_ctx_t ctx;
blake2b_init (&ctx);
u32 option_input[32] = { 0 };
option_input[0] = options->parallelism;
option_input[1] = options->digest_len;
option_input[2] = options->memory_usage_in_kib;
option_input[3] = options->iterations;
option_input[4] = options->version;
option_input[5] = options->type;
blake2b_update (&ctx, option_input, 24);
u32 len_input[32] = { 0 };
len_input[0] = pw->pw_len;
blake2b_update (&ctx, len_input, 4);
blake2b_update_global (&ctx, pw->i, pw->pw_len);
len_input[0] = salt->salt_len;
blake2b_update (&ctx, len_input, 4);
blake2b_update_global (&ctx, salt->salt_buf, salt->salt_len);
len_input[0] = 0;
blake2b_update (&ctx, len_input, 4); // secret (K)
blake2b_update (&ctx, len_input, 4); // associated data (X)
blake2b_final (&ctx);
for (u32 idx = 0; idx < 8; idx++) blockhash[idx] = ctx.h[idx];
}
DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt,
PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out)
{
u64 blockhash[16] = { 0 };
argon2_initial_hash (pw, salt, options, blockhash);
// Generate the first two blocks of each lane
for (u32 lane = 0; lane < options->parallelism; lane++)
{
argon2_initial_block ((PRIVATE_AS u32 *) blockhash, lane, 0, options->parallelism, out);
argon2_initial_block ((PRIVATE_AS u32 *) blockhash, lane, 1, options->parallelism, out);
}
}
// TODO: reconsider 'trunc_mul()'
DECLSPEC u64 trunc_mul (u64 x, u64 y)
{
const u32 xlo = (u32) x;
const u32 ylo = (u32) y;
return hl32_to_64_S (hc_umulhi (xlo, ylo), (u32) (xlo * ylo));
}
DECLSPEC inline u32 argon2_ref_address (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, u32 index, u64 pseudo_random)
{
u32 ref_lane = 0;
u32 ref_area = 0;
u32 ref_index = 0;
if ((pos->pass == 0) && (pos->slice == 0))
{
ref_lane = pos->lane;
}
else
{
ref_lane = h32_from_64_S (pseudo_random) % options->parallelism;
}
ref_area = (pos->pass == 0) ? pos->slice : (ARGON2_SYNC_POINTS - 1);
ref_area *= options->segment_length;
if ((ref_lane == pos->lane) || (index == 0))
{
ref_area += (index - 1);
}
// if ref_area == 0xFFFFFFFF => bug
const u32 j1 = l32_from_64_S (pseudo_random);
ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1)));
if (pos->pass > 0)
{
ref_index += (pos->slice + 1) * options->segment_length;
if (ref_index >= options->lane_length)
{
ref_index -= options->lane_length;
}
}
return (options->parallelism * ref_index) + ref_lane;
}
DECLSPEC void swap_u64 (PRIVATE_AS u64 *x, PRIVATE_AS u64 *y)
{
u64 tmp = *x;
*x = *y;
*y = tmp;
}
DECLSPEC void transpose_permute_block (u64 R[4], int argon2_thread)
{
if (argon2_thread & 0x08)
{
swap_u64 (&R[0], &R[2]);
swap_u64 (&R[1], &R[3]);
}
if (argon2_thread & 0x04)
{
swap_u64 (&R[0], &R[1]);
swap_u64 (&R[2], &R[3]);
}
}
DECLSPEC int argon2_shift (int idx, int argon2_thread)
{
const int delta = ((idx & 0x02) << 3) + (idx & 0x01);
return (argon2_thread & 0x0e) | (((argon2_thread & 0x11) + delta + 0x0e) & 0x11);
}
DECLSPEC void argon2_hash_block (u64 R[4], int argon2_thread, LOCAL_AS u64 *shuffle_buf, int argon2_lsz)
{
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
transpose_permute_block (R, argon2_thread);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
ARGON2_G(R[0], R[1], R[2], R[3]);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], (argon2_thread & 0x1c) | ((argon2_thread + idx) & 0x03), argon2_thread, argon2_lsz);
ARGON2_G(R[0], R[1], R[2], R[3]);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], ((argon2_thread & 0x1c) | ((argon2_thread - idx) & 0x03)) ^ (idx << 2), argon2_thread, argon2_lsz);
transpose_permute_block (R, argon2_thread);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz);
ARGON2_G(R[0], R[1], R[2], R[3]);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift (idx, argon2_thread), argon2_thread, argon2_lsz);
ARGON2_G(R[0], R[1], R[2], R[3]);
for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift ((4 - idx), argon2_thread), argon2_thread, argon2_lsz);
}
DECLSPEC void argon2_next_addresses (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, PRIVATE_AS u32 *addresses, u32 start_index, u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz)
{
u64 Z[4] = { 0 };
u64 tmp[4] = { 0 };
tmp[0] = 0;
tmp[1] = 0;
tmp[2] = 0;
tmp[3] = 0;
switch (argon2_thread)
{
case 0: Z[0] = pos->pass; break;
case 1: Z[0] = pos->lane; break;
case 2: Z[0] = pos->slice; break;
case 3: Z[0] = options->memory_block_count; break;
case 4: Z[0] = options->iterations; break;
case 5: Z[0] = options->type; break;
case 6: Z[0] = (start_index / 128) + 1; break;
default: Z[0] = 0; break;
}
tmp[0] = Z[0];
argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz);
Z[0] ^= tmp[0];
for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx];
argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz);
for (u32 idx = 0; idx < 4; idx++) Z[idx] ^= tmp[idx];
for (u32 i = 0, index = (start_index + argon2_thread); i < 4; i++, index += THREADS_PER_LANE)
{
addresses[i] = argon2_ref_address (options, pos, index, Z[i]);
}
// if addresses[0] == 0xFFFFFFFE => bug
}
DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index)
{
switch (index)
{
case 0:
return array[0];
case 1:
return array[1];
case 2:
return array[2];
case 3:
return array[3];
}
return (u32) -1;
}
DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 argon2_thread)
{
// Apply wrap-around to previous block index if the current block is the first block in the lane
const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1);
GLOBAL_AS argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane];
for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
return &blocks[(index_in_lane * options->parallelism) + lane];
}
DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4],
u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz)
{
for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism)
{
u32 ref_address = 0;
if (indep_addr)
{
ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS);
ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, index, argon2_thread, argon2_lsz);
}
else
{
ref_address = argon2_ref_address (options, pos, index, R[0]);
ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, 0, argon2_thread, argon2_lsz);
}
GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address];
u64 tmp[4] = { 0 };
// First pass is overwrite, next passes are XOR with previous
if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10))
{
for (u32 idx = 0; idx < 4; idx++) tmp[idx] = cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
}
for (u32 idx = 0; idx < 4; idx++) R[idx] ^= ref_block->values[(idx * THREADS_PER_LANE) + argon2_thread];
for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx];
argon2_hash_block (R, argon2_thread, shuffle_buf, argon2_lsz);
for (u32 idx = 0; idx < 4; idx++) R[idx] ^= tmp[idx];
for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread] = R[idx];
}
}
DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz)
{
// We have already generated the first two blocks of each lane (for the first pass)
const u32 skip_blocks = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0;
const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks;
u64 R[4] = { 0 };
GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, argon2_thread);
if ((options->type == TYPE_I) || ((options->type == TYPE_ID) && (pos->pass == 0) && (pos->slice <= 1)))
{
for (u32 block_index = 0; block_index < options->segment_length; block_index += 128)
{
const u32 start_index = (block_index == 0) ? skip_blocks : block_index;
const u32 end_index = MIN(((start_index | 127) + 1), options->segment_length);
u32 addresses[4] = { 0 };
argon2_next_addresses (options, pos, addresses, block_index, argon2_thread, shuffle_buf, argon2_lsz);
argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz);
cur_block += (end_index - start_index) * options->parallelism;
}
}
else
{
u32 addresses[4] = { 0 };
argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz);
}
}
DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out)
{
const u32 lane_length = options->lane_length;
const u32 lanes = options->parallelism;
argon2_block_t final_block = { };
for (u32 l = 0; l < lanes; l++)
{
for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx];
}
u32 output_len[32] = { 0 };
output_len[0] = options->digest_len;
blake2b_ctx_t ctx;
blake2b_init (&ctx);
// Override default (0x40) value in BLAKE2b
ctx.h[0] ^= 0x40 ^ options->digest_len;
blake2b_update (&ctx, output_len, 4);
blake2b_update (&ctx, (PRIVATE_AS u32 *) final_block.values, sizeof(final_block));
blake2b_final (&ctx);
for (uint i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1)
{
out [i + 0] = l32_from_64_S (ctx.h[idx]);
out [i + 1] = h32_from_64_S (ctx.h[idx]);
}
}
DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (PRIVATE_AS const argon2_options_t *options, GLOBAL_AS void *buf, const int idx)
{
GLOBAL_AS u32 *buf32 = (GLOBAL_AS u32 *) buf;
#ifdef ARGON2_TMP_ELEM
return (GLOBAL_AS argon2_block_t *) buf32 + (ARGON2_TMP_ELEM * idx);
#else
return (GLOBAL_AS argon2_block_t *) buf32 + (options->memory_block_count * idx);
#endif
}

164
OpenCL/inc_hash_argon2.h Normal file
View File

@ -0,0 +1,164 @@
/**
* Author......: Netherlands Forensic Institute
* License.....: MIT
*/
#ifndef INC_HASH_ARGON2_H
#define INC_HASH_ARGON2_H
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#define ARGON2_VERSION_10 0x10
#define ARGON2_VERSION_13 0x13
#define THREADS_PER_LANE 32
#define FULL_MASK 0xffffffff
#define BLAKE2B_OUTBYTES 64
#define ARGON2_SYNC_POINTS 4
#define ARGON2_ADDRESSES_IN_BLOCK 128
#define TYPE_D 0
#define TYPE_I 1
#define TYPE_ID 2
#if defined IS_CUDA
#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) __shfl_sync ((mask),(var),(srcLane))
#elif defined IS_HIP
// attention hard coded 32 warps for hip here
#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) __shfl ((var),(srcLane),32)
#elif defined IS_OPENCL
#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) hc__shfl ((shfbuf),(var),(srcLane),(argon2_thread),(argon2_lsz))
#if defined IS_AMD && defined IS_GPU
DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
{
const u32 idx = src_lane << 2;
const u32 l32 = l32_from_64_S (var);
const u32 h32 = h32_from_64_S (var);
const u32 l32r = __builtin_amdgcn_ds_bpermute (idx, l32);
const u32 h32r = __builtin_amdgcn_ds_bpermute (idx, h32);
const u64 out = hl32_to_64_S (h32r, l32r);
return out;
}
#elif defined IS_NV && defined IS_GPU
DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
{
const u32 l32 = l32_from_64_S (var);
const u32 h32 = h32_from_64_S (var);
u32 l32r;
u32 h32r;
asm("shfl.sync.idx.b32 %0, %1, %2, 0x1f, 0;"
: "=r"(l32r)
: "r"(l32), "r"(src_lane));
asm("shfl.sync.idx.b32 %0, %1, %2, 0x1f, 0;"
: "=r"(h32r)
: "r"(h32), "r"(src_lane));
const u64 out = hl32_to_64_S (h32r, l32r);
return out;
}
#else
DECLSPEC u64 hc__shfl (MAYBE_UNUSED LOCAL_AS u64 *shuffle_buf, const u64 var, const int src_lane, const u32 argon2_thread, const u32 argon2_lsz)
{
shuffle_buf[argon2_thread] = var;
barrier (CLK_LOCAL_MEM_FENCE);
const u64 out = shuffle_buf[src_lane & (argon2_lsz - 1)];
barrier (CLK_LOCAL_MEM_FENCE);
return out;
}
#endif
#elif defined IS_METAL
#define hc__shfl_sync(shfbuf,mask,var,srcLane,argon2_thread,argon2_lsz) simd_shuffle_64 ((var),(srcLane),(argon2_lsz))
DECLSPEC u64 simd_shuffle_64 (const u64 var, const int src_lane, const u32 argon2_lsz)
{
const u32 idx = src_lane & (argon2_lsz - 1);
const u32 l32 = l32_from_64_S (var);
const u32 h32 = h32_from_64_S (var);
u32 l32r = simd_shuffle (l32, idx);
u32 h32r = simd_shuffle (h32, idx);
const u64 out = hl32_to_64_S (h32r, l32r);
return out;
}
#endif
#define ARGON2_G(a,b,c,d) \
{ \
a = a + b + 2 * trunc_mul(a, b); \
d = blake2b_rot32_S (d ^ a); \
c = c + d + 2 * trunc_mul(c, d); \
b = blake2b_rot24_S (b ^ c); \
a = a + b + 2 * trunc_mul(a, b); \
d = blake2b_rot16_S (d ^ a); \
c = c + d + 2 * trunc_mul(c, d); \
b = hc_rotr64_S (b ^ c, 63); \
}
#define ARGON2_P() \
{ \
ARGON2_G(v[0], v[4], v[8], v[12]); \
ARGON2_G(v[1], v[5], v[9], v[13]); \
ARGON2_G(v[2], v[6], v[10], v[14]); \
ARGON2_G(v[3], v[7], v[11], v[15]); \
\
ARGON2_G(v[0], v[5], v[10], v[15]); \
ARGON2_G(v[1], v[6], v[11], v[12]); \
ARGON2_G(v[2], v[7], v[8], v[13]); \
ARGON2_G(v[3], v[4], v[9], v[14]); \
}
typedef struct argon2_block
{
u64 values[128];
} argon2_block_t;
typedef struct argon2_options
{
u32 type;
u32 version;
u32 iterations;
u32 parallelism;
u32 memory_usage_in_kib;
u32 segment_length;
u32 lane_length;
u32 memory_block_count;
u32 digest_len;
} argon2_options_t;
typedef struct argon2_pos
{
u32 pass;
u32 slice;
u32 lane;
} argon2_pos_t;
DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, GLOBAL_AS argon2_block_t *out);
DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, LOCAL_AS u64 *shuffle_buf, const u32 argon2_thread, const u32 argon2_lsz);
DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u32 *out);
DECLSPEC GLOBAL_AS argon2_block_t *get_argon2_block (PRIVATE_AS const argon2_options_t *options, GLOBAL_AS void *buf, const int idx);
#endif // INC_HASH_ARGON2_H

View File

@ -409,7 +409,7 @@ DECLSPEC void blake2b_update (PRIVATE_AS blake2b_ctx_t *ctx, PRIVATE_AS const u3
u32 w6[4];
u32 w7[4];
const int limit = (const int) len - 128; // int type needed, could be negative
const int limit = len - 128; // int type needed, could be negative
int pos1;
int pos4;
@ -499,7 +499,7 @@ DECLSPEC void blake2b_update_global (PRIVATE_AS blake2b_ctx_t *ctx, GLOBAL_AS co
u32 w6[4];
u32 w7[4];
const int limit = (const int) len - 128; // int type needed, could be negative
const int limit = len - 128; // int type needed, could be negative
int pos1;
int pos4;
@ -580,7 +580,7 @@ DECLSPEC void blake2b_update_global (PRIVATE_AS blake2b_ctx_t *ctx, GLOBAL_AS co
DECLSPEC void blake2b_final (PRIVATE_AS blake2b_ctx_t *ctx)
{
blake2b_transform (ctx->h, ctx->m, ctx->len, BLAKE2B_FINAL);
blake2b_transform (ctx->h, ctx->m, ctx->len, (u64) BLAKE2B_FINAL);
}
DECLSPEC void blake2b_transform_vector (PRIVATE_AS u64x *h, PRIVATE_AS const u64x *m, const u32x len, const u64 f0)
@ -813,7 +813,7 @@ DECLSPEC void blake2b_update_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVA
u32x w6[4];
u32x w7[4];
const int limit = (const int) len - 128; // int type needed, could be negative
const int limit = len - 128; // int type needed, could be negative
int pos1;
int pos4;
@ -894,5 +894,5 @@ DECLSPEC void blake2b_update_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx, PRIVA
DECLSPEC void blake2b_final_vector (PRIVATE_AS blake2b_ctx_vector_t *ctx)
{
blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2B_FINAL);
blake2b_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, (u64) BLAKE2B_FINAL);
}

View File

@ -322,7 +322,7 @@ DECLSPEC void blake2s_update (PRIVATE_AS blake2s_ctx_t *ctx, PRIVATE_AS const u3
u32 w2[4];
u32 w3[4];
const int limit = (const int) len - 64; // int type needed, could be negative
const int limit = len - 64; // int type needed, could be negative
int pos1;
int pos4;
@ -376,7 +376,7 @@ DECLSPEC void blake2s_update_global (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_AS co
u32 w2[4];
u32 w3[4];
const int limit = (const int) len - 64; // int type needed, could be negative
const int limit = len - 64; // int type needed, could be negative
int pos1;
int pos4;
@ -516,7 +516,7 @@ DECLSPEC void blake2s_update_global_swap (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_
u32 w2[4];
u32 w3[4];
const int limit = (const int) len - 64; // int type needed, could be negative
const int limit = len - 64; // int type needed, could be negative
int pos1;
int pos4;
@ -597,13 +597,11 @@ DECLSPEC void blake2s_update_global_swap (PRIVATE_AS blake2s_ctx_t *ctx, GLOBAL_
blake2s_update_64 (ctx, w0, w1, w2, w3, len - (u32) pos1);
}
DECLSPEC void blake2s_final (PRIVATE_AS blake2s_ctx_t *ctx)
{
blake2s_transform (ctx->h, ctx->m, ctx->len, BLAKE2S_FINAL);
blake2s_transform (ctx->h, ctx->m, ctx->len, (u32) BLAKE2S_FINAL);
}
DECLSPEC void blake2s_hmac_init_64 (PRIVATE_AS blake2s_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3)
{
u32 a0[4];
@ -1158,7 +1156,7 @@ DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVA
u32x w2[4];
u32x w3[4];
const int limit = (const int) len - 64; // int type needed, could be negative
const int limit = len - 64; // int type needed, could be negative
int pos1;
int pos4;
@ -1207,7 +1205,7 @@ DECLSPEC void blake2s_update_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx, PRIVA
DECLSPEC void blake2s_final_vector (PRIVATE_AS blake2s_ctx_vector_t *ctx)
{
blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, BLAKE2S_FINAL);
blake2s_transform_vector (ctx->h, ctx->m, (u32x) ctx->len, (u32) BLAKE2S_FINAL);
}
DECLSPEC void blake2s_hmac_init_vector_64 (PRIVATE_AS blake2s_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3)

2279
OpenCL/inc_hash_ripemd320.cl Normal file

File diff suppressed because it is too large Load Diff

147
OpenCL/inc_hash_ripemd320.h Normal file
View File

@ -0,0 +1,147 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#ifndef INC_HASH_RIPEMD320_H
#define INC_HASH_RIPEMD320_H
#define RIPEMD320_F(x,y,z) ((x) ^ (y) ^ (z))
#define RIPEMD320_G(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) /* x ? y : z */
#define RIPEMD320_H(x,y,z) (((x) | ~(y)) ^ (z))
#define RIPEMD320_I(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) /* z ? x : y */
#define RIPEMD320_J(x,y,z) ((x) ^ ((y) | ~(z)))
#ifdef USE_BITSELECT
#define RIPEMD320_Go(x,y,z) (bitselect ((z), (y), (x)))
#define RIPEMD320_Io(x,y,z) (bitselect ((y), (x), (z)))
#else
#define RIPEMD320_Go(x,y,z) (RIPEMD320_G ((x), (y), (z)))
#define RIPEMD320_Io(x,y,z) (RIPEMD320_I ((x), (y), (z)))
#endif
#define RIPEMD320_STEP_S(f,a,b,c,d,e,x,K,s) \
{ \
a += K; \
a += x; \
a += f (b, c, d); \
a = hc_rotl32_S (a, s); \
a += e; \
c = hc_rotl32_S (c, 10u); \
}
#define RIPEMD320_STEP(f,a,b,c,d,e,x,K,s) \
{ \
a += make_u32x (K); \
a += x; \
a += f (b, c, d); \
a = hc_rotl32 (a, s); \
a += e; \
c = hc_rotl32 (c, 10u); \
}
#define ROTATE_LEFT_WORKAROUND_BUG(a,n) ((a << n) | (a >> (32 - n)))
#define RIPEMD320_STEP_S_WORKAROUND_BUG(f,a,b,c,d,e,x,K,s) \
{ \
a += K; \
a += x; \
a += f (b, c, d); \
a = ROTATE_LEFT_WORKAROUND_BUG (a, s); \
a += e; \
c = hc_rotl32_S (c, 10u); \
}
#define RIPEMD320_STEP_WORKAROUND_BUG(f,a,b,c,d,e,x,K,s) \
{ \
a += make_u32x (K); \
a += x; \
a += f (b, c, d); \
a = ROTATE_LEFT_WORKAROUND_BUG (a, s); \
a += e; \
c = hc_rotl32 (c, 10u); \
}
typedef struct ripemd320_ctx
{
u32 h[10];
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
int len;
} ripemd320_ctx_t;
typedef struct ripemd320_hmac_ctx
{
ripemd320_ctx_t ipad;
ripemd320_ctx_t opad;
} ripemd320_hmac_ctx_t;
typedef struct ripemd320_ctx_vector
{
u32x h[10];
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
int len;
} ripemd320_ctx_vector_t;
typedef struct ripemd320_hmac_ctx_vector
{
ripemd320_ctx_vector_t ipad;
ripemd320_ctx_vector_t opad;
} ripemd320_hmac_ctx_vector_t;
DECLSPEC void ripemd320_transform (PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3, PRIVATE_AS u32 *digest);
DECLSPEC void ripemd320_init (PRIVATE_AS ripemd320_ctx_t *ctx);
DECLSPEC void ripemd320_update_64 (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len);
DECLSPEC void ripemd320_update (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_global (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_global_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_global_utf16le (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_update_global_utf16le_swap (PRIVATE_AS ripemd320_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_final (PRIVATE_AS ripemd320_ctx_t *ctx);
DECLSPEC void ripemd320_hmac_init_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w0, PRIVATE_AS const u32 *w1, PRIVATE_AS const u32 *w2, PRIVATE_AS const u32 *w3);
DECLSPEC void ripemd320_hmac_init (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_init_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_init_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_init_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_64 (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const int len);
DECLSPEC void ripemd320_hmac_update (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, PRIVATE_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_global (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_global_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_global_utf16le (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_update_global_utf16le_swap (PRIVATE_AS ripemd320_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len);
DECLSPEC void ripemd320_hmac_final (PRIVATE_AS ripemd320_hmac_ctx_t *ctx);
DECLSPEC void ripemd320_transform_vector (PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3, PRIVATE_AS u32x *digest);
DECLSPEC void ripemd320_init_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx);
DECLSPEC void ripemd320_init_vector_from_scalar (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS ripemd320_ctx_t *ctx0);
DECLSPEC void ripemd320_update_vector_64 (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len);
DECLSPEC void ripemd320_update_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_update_vector_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_update_vector_utf16le (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_update_vector_utf16le_swap (PRIVATE_AS ripemd320_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_final_vector (PRIVATE_AS ripemd320_ctx_vector_t *ctx);
DECLSPEC void ripemd320_hmac_init_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w0, PRIVATE_AS const u32x *w1, PRIVATE_AS const u32x *w2, PRIVATE_AS const u32x *w3);
DECLSPEC void ripemd320_hmac_init_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_hmac_update_vector_64 (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const int len);
DECLSPEC void ripemd320_hmac_update_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx, PRIVATE_AS const u32x *w, const int len);
DECLSPEC void ripemd320_hmac_final_vector (PRIVATE_AS ripemd320_hmac_ctx_vector_t *ctx);
#endif // INC_HASH_RIPEMD320_H

View File

@ -19,7 +19,7 @@
CONSTANT_AS const u32 &bits14, \
CONSTANT_AS const u32 &bits15, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_R_MARKOV \
GLOBAL_AS bf_t *pws_buf_r, \
@ -31,7 +31,7 @@
CONSTANT_AS const u32 &bits14, \
CONSTANT_AS const u32 &bits15, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_C_MARKOV \
GLOBAL_AS pw_t *pws_buf, \
@ -43,7 +43,7 @@
CONSTANT_AS const u32 &bits14, \
CONSTANT_AS const u32 &bits15, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#else // CUDA, HIP, OpenCL

View File

@ -104,9 +104,19 @@ DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
return atomicOr (p, val);
}
DECLSPEC size_t get_group_id (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_group_id (const u32 dimindx)
{
return blockIdx.x;
switch (dimindx)
{
case 0:
return blockIdx.x;
case 1:
return blockIdx.y;
case 2:
return blockIdx.z;
}
return (size_t) -1;
}
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
@ -114,15 +124,34 @@ DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
return (blockIdx.x * blockDim.x) + threadIdx.x;
}
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_local_id (const u32 dimindx)
{
return threadIdx.x;
switch (dimindx)
{
case 0:
return threadIdx.x;
case 1:
return threadIdx.y;
case 2:
return threadIdx.z;
}
return (size_t) -1;
}
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_local_size (const u32 dimindx)
{
// verify
return blockDim.x;
switch (dimindx)
{
case 0:
return blockDim.x;
case 1:
return blockDim.y;
case 2:
return blockDim.z;
}
return (size_t) -1;
}
DECLSPEC u32x rotl32 (const u32x a, const int n)
@ -305,9 +334,19 @@ DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
return atomicOr (p, val);
}
DECLSPEC size_t get_group_id (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_group_id (const u32 dimindx)
{
return blockIdx.x;
switch (dimindx)
{
case 0:
return blockIdx.x;
case 1:
return blockIdx.y;
case 2:
return blockIdx.z;
}
return (size_t) -1;
}
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
@ -315,15 +354,34 @@ DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
return (blockIdx.x * blockDim.x) + threadIdx.x;
}
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_local_id (const u32 dimindx)
{
return threadIdx.x;
switch (dimindx)
{
case 0:
return threadIdx.x;
case 1:
return threadIdx.y;
case 2:
return threadIdx.z;
}
return (size_t) -1;
}
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
DECLSPEC size_t get_local_size (const u32 dimindx)
{
// verify
return blockDim.x;
switch (dimindx)
{
case 0:
return blockDim.x;
case 1:
return blockDim.y;
case 2:
return blockDim.z;
}
return (size_t) -1;
}
DECLSPEC u32x rotl32 (const u32x a, const int n)

View File

@ -27,8 +27,9 @@ DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val);
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)));
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)));
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)));
DECLSPEC size_t get_group_id (const u32 dimindx);
DECLSPEC size_t get_local_id (const u32 dimindx);
DECLSPEC size_t get_local_size (const u32 dimindx);
DECLSPEC u32x rotl32 (const u32x a, const int n);
DECLSPEC u32x rotr32 (const u32x a, const int n);
@ -48,7 +49,8 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val);
DECLSPEC size_t get_global_id (const u32 dimindx);
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)));
DECLSPEC size_t get_group_id (const u32 dimindx);
DECLSPEC size_t get_local_id (const u32 dimindx);
DECLSPEC size_t get_local_size (const u32 dimindx);
@ -71,10 +73,25 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val);
#define get_global_id(param) hc_gid
#define get_local_id(param) hc_lid
#define get_local_size(param) hc_lsz
#define get_group_id(param) hc_bid
#define get_global_id(dimindx) \
((dimindx) == 0 ? hc_gid.x : \
(dimindx) == 1 ? hc_gid.y : \
(dimindx) == 2 ? hc_gid.z : -1)
#define get_group_id(dimindx) \
((dimindx) == 0 ? hc_bid.x : \
(dimindx) == 1 ? hc_bid.y : \
(dimindx) == 2 ? hc_bid.z : -1)
#define get_local_id(dimindx) \
((dimindx) == 0 ? hc_lid.x : \
(dimindx) == 1 ? hc_lid.y : \
(dimindx) == 2 ? hc_lid.z : -1)
#define get_local_size(dimindx) \
((dimindx) == 0 ? hc_lsz.x : \
(dimindx) == 1 ? hc_lsz.y : \
(dimindx) == 2 ? hc_lsz.z : -1)
DECLSPEC u32x rotl32 (const u32x a, const int n);
DECLSPEC u32x rotr32 (const u32x a, const int n);

View File

@ -13,28 +13,28 @@
GLOBAL_AS u32 *pws_comp, \
GLOBAL_AS pw_t *pws_buf, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_GPU_MEMSET \
GLOBAL_AS uint4 *buf, \
CONSTANT_AS const u32 &value, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_GPU_BZERO \
GLOBAL_AS uint4 *buf, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_GPU_ATINIT \
GLOBAL_AS pw_t *buf, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#define KERN_ATTR_GPU_UTF8_TO_UTF16 \
GLOBAL_AS pw_t *pws_buf, \
CONSTANT_AS const u64 &gid_max, \
uint hc_gid [[ thread_position_in_grid ]]
uint3 hc_gid [[ thread_position_in_grid ]]
#else // CUDA, HIP, OpenCL

View File

@ -11,11 +11,13 @@
#define BITMAP_SHIFT1 kernel_param->bitmap_shift1
#define BITMAP_SHIFT2 kernel_param->bitmap_shift2
#define SALT_POS_HOST (kernel_param->pws_pos + gid)
#define SALT_POS_HOST_BID (kernel_param->pws_pos + bid)
#define LOOP_POS kernel_param->loop_pos
#define LOOP_CNT kernel_param->loop_cnt
#define IL_CNT kernel_param->il_cnt
#define DIGESTS_CNT 1
#define DIGESTS_OFFSET_HOST (kernel_param->pws_pos + gid)
#define DIGESTS_OFFSET_HOST (kernel_param->pws_pos + gid)
#define DIGESTS_OFFSET_HOST_BID (kernel_param->pws_pos + bid)
#define COMBS_MODE kernel_param->combs_mode
#define SALT_REPEAT kernel_param->salt_repeat
#define PWS_POS kernel_param->pws_pos
@ -25,11 +27,13 @@
#define BITMAP_SHIFT1 kernel_param->bitmap_shift1
#define BITMAP_SHIFT2 kernel_param->bitmap_shift2
#define SALT_POS_HOST kernel_param->salt_pos_host
#define SALT_POS_HOST_BID SALT_POS_HOST
#define LOOP_POS kernel_param->loop_pos
#define LOOP_CNT kernel_param->loop_cnt
#define IL_CNT kernel_param->il_cnt
#define DIGESTS_CNT kernel_param->digests_cnt
#define DIGESTS_OFFSET_HOST kernel_param->digests_offset_host
#define DIGESTS_OFFSET_HOST_BID DIGESTS_OFFSET_HOST
#define COMBS_MODE kernel_param->combs_mode
#define SALT_REPEAT kernel_param->salt_repeat
#define PWS_POS kernel_param->pws_pos
@ -1565,6 +1569,202 @@ typedef enum ripemd160_constants
} ripemd160_constants_t;
typedef enum ripemd320_constants
{
RIPEMD320M_A=0x67452301U,
RIPEMD320M_B=0xefcdab89U,
RIPEMD320M_C=0x98badcfeU,
RIPEMD320M_D=0x10325476U,
RIPEMD320M_E=0xc3d2e1f0U,
RIPEMD320M_F=0x76543210U,
RIPEMD320M_G=0xfedcba98U,
RIPEMD320M_H=0x89abcdefU,
RIPEMD320M_I=0x01234567U,
RIPEMD320M_L=0x3c2d1e0fU,
RIPEMD320C00=0x00000000U,
RIPEMD320C10=0x5a827999U,
RIPEMD320C20=0x6ed9eba1U,
RIPEMD320C30=0x8f1bbcdcU,
RIPEMD320C40=0xa953fd4eU,
RIPEMD320C50=0x50a28be6U,
RIPEMD320C60=0x5c4dd124U,
RIPEMD320C70=0x6d703ef3U,
RIPEMD320C80=0x7a6d76e9U,
RIPEMD320C90=0x00000000U,
RIPEMD320S00=11,
RIPEMD320S01=14,
RIPEMD320S02=15,
RIPEMD320S03=12,
RIPEMD320S04=5,
RIPEMD320S05=8,
RIPEMD320S06=7,
RIPEMD320S07=9,
RIPEMD320S08=11,
RIPEMD320S09=13,
RIPEMD320S0A=14,
RIPEMD320S0B=15,
RIPEMD320S0C=6,
RIPEMD320S0D=7,
RIPEMD320S0E=9,
RIPEMD320S0F=8,
RIPEMD320S10=7,
RIPEMD320S11=6,
RIPEMD320S12=8,
RIPEMD320S13=13,
RIPEMD320S14=11,
RIPEMD320S15=9,
RIPEMD320S16=7,
RIPEMD320S17=15,
RIPEMD320S18=7,
RIPEMD320S19=12,
RIPEMD320S1A=15,
RIPEMD320S1B=9,
RIPEMD320S1C=11,
RIPEMD320S1D=7,
RIPEMD320S1E=13,
RIPEMD320S1F=12,
RIPEMD320S20=11,
RIPEMD320S21=13,
RIPEMD320S22=6,
RIPEMD320S23=7,
RIPEMD320S24=14,
RIPEMD320S25=9,
RIPEMD320S26=13,
RIPEMD320S27=15,
RIPEMD320S28=14,
RIPEMD320S29=8,
RIPEMD320S2A=13,
RIPEMD320S2B=6,
RIPEMD320S2C=5,
RIPEMD320S2D=12,
RIPEMD320S2E=7,
RIPEMD320S2F=5,
RIPEMD320S30=11,
RIPEMD320S31=12,
RIPEMD320S32=14,
RIPEMD320S33=15,
RIPEMD320S34=14,
RIPEMD320S35=15,
RIPEMD320S36=9,
RIPEMD320S37=8,
RIPEMD320S38=9,
RIPEMD320S39=14,
RIPEMD320S3A=5,
RIPEMD320S3B=6,
RIPEMD320S3C=8,
RIPEMD320S3D=6,
RIPEMD320S3E=5,
RIPEMD320S3F=12,
RIPEMD320S40=9,
RIPEMD320S41=15,
RIPEMD320S42=5,
RIPEMD320S43=11,
RIPEMD320S44=6,
RIPEMD320S45=8,
RIPEMD320S46=13,
RIPEMD320S47=12,
RIPEMD320S48=5,
RIPEMD320S49=12,
RIPEMD320S4A=13,
RIPEMD320S4B=14,
RIPEMD320S4C=11,
RIPEMD320S4D=8,
RIPEMD320S4E=5,
RIPEMD320S4F=6,
RIPEMD320S50=8,
RIPEMD320S51=9,
RIPEMD320S52=9,
RIPEMD320S53=11,
RIPEMD320S54=13,
RIPEMD320S55=15,
RIPEMD320S56=15,
RIPEMD320S57=5,
RIPEMD320S58=7,
RIPEMD320S59=7,
RIPEMD320S5A=8,
RIPEMD320S5B=11,
RIPEMD320S5C=14,
RIPEMD320S5D=14,
RIPEMD320S5E=12,
RIPEMD320S5F=6,
RIPEMD320S60=9,
RIPEMD320S61=13,
RIPEMD320S62=15,
RIPEMD320S63=7,
RIPEMD320S64=12,
RIPEMD320S65=8,
RIPEMD320S66=9,
RIPEMD320S67=11,
RIPEMD320S68=7,
RIPEMD320S69=7,
RIPEMD320S6A=12,
RIPEMD320S6B=7,
RIPEMD320S6C=6,
RIPEMD320S6D=15,
RIPEMD320S6E=13,
RIPEMD320S6F=11,
RIPEMD320S70=9,
RIPEMD320S71=7,
RIPEMD320S72=15,
RIPEMD320S73=11,
RIPEMD320S74=8,
RIPEMD320S75=6,
RIPEMD320S76=6,
RIPEMD320S77=14,
RIPEMD320S78=12,
RIPEMD320S79=13,
RIPEMD320S7A=5,
RIPEMD320S7B=14,
RIPEMD320S7C=13,
RIPEMD320S7D=13,
RIPEMD320S7E=7,
RIPEMD320S7F=5,
RIPEMD320S80=15,
RIPEMD320S81=5,
RIPEMD320S82=8,
RIPEMD320S83=11,
RIPEMD320S84=14,
RIPEMD320S85=14,
RIPEMD320S86=6,
RIPEMD320S87=14,
RIPEMD320S88=6,
RIPEMD320S89=9,
RIPEMD320S8A=12,
RIPEMD320S8B=9,
RIPEMD320S8C=12,
RIPEMD320S8D=5,
RIPEMD320S8E=15,
RIPEMD320S8F=8,
RIPEMD320S90=8,
RIPEMD320S91=5,
RIPEMD320S92=12,
RIPEMD320S93=9,
RIPEMD320S94=12,
RIPEMD320S95=5,
RIPEMD320S96=14,
RIPEMD320S97=6,
RIPEMD320S98=8,
RIPEMD320S99=13,
RIPEMD320S9A=6,
RIPEMD320S9B=5,
RIPEMD320S9C=15,
RIPEMD320S9D=13,
RIPEMD320S9E=11,
RIPEMD320S9F=11
} ripemd320_constants_t;
typedef enum keccak_constants
{
KECCAK_RNDC_00=0x0000000000000001UL,
@ -1812,6 +2012,7 @@ typedef struct salt
u32 salt_len_pc;
u32 salt_iter;
u32 salt_iter2;
u32 salt_dimy;
u32 salt_sign[2];
u32 salt_repeats;

View File

@ -183,13 +183,11 @@ using namespace metal;
#ifdef IS_CUDA
#define USE_BITSELECT
#define USE_ROTATE
#define USE_FUNNELSHIFT
#endif
#ifdef IS_HIP
#define USE_BITSELECT
#define USE_ROTATE
#define USE_FUNNELSHIFT
#endif
#ifdef IS_ROCM
@ -220,7 +218,12 @@ using namespace metal;
#define s3 w
#endif
#if HAS_SHFW == 1
#define USE_FUNNELSHIFT
#endif
// some algorithms do not like this, eg 150, 1100, ...
#ifdef NO_FUNNELSHIFT
#undef USE_FUNNELSHIFT
#endif

View File

@ -238,12 +238,162 @@ DECLSPEC void chacha20_transform (PRIVATE_AS const u32x *w0, PRIVATE_AS const u3
KERNEL_FQ KERNEL_FA void m15400_m04 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
{
// fixed size 32
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
if (gid >= GID_CNT) return;
u32 w0[4];
u32 w1[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
/**
* Salt prep
*/
u32 iv[2];
iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
u32 plain[2];
plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
u32 position[2];
position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
/**
* loop
*/
u32 w0l = pws[gid].i[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];
const u32x r2 = digest[2];
const u32x r3 = digest[3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m15400_m08 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
{
// fixed size 32
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
if (gid >= GID_CNT) return;
u32 w0[4];
u32 w1[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
/**
* Salt prep
*/
u32 iv[2];
iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
u32 plain[2];
plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
u32 position[2];
position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
/**
* loop
*/
u32 w0l = pws[gid].i[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];
const u32x r2 = digest[2];
const u32x r3 = digest[3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m15400_m16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
@ -328,12 +478,187 @@ KERNEL_FQ KERNEL_FA void m15400_m16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
KERNEL_FQ KERNEL_FA void m15400_s04 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
{
// fixed size 32
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
if (gid >= GID_CNT) return;
u32 w0[4];
u32 w1[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
/**
* Salt prep
*/
u32 iv[2];
iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
u32 plain[2];
plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
u32 position[2];
position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = pws[gid].i[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];
const u32x r2 = digest[2];
const u32x r3 = digest[3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m15400_s08 (KERN_ATTR_VECTOR_ESALT (chacha20_t))
{
// fixed size 32
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
if (gid >= GID_CNT) return;
u32 w0[4];
u32 w1[4];
w0[0] = pws[gid].i[0];
w0[1] = pws[gid].i[1];
w0[2] = pws[gid].i[2];
w0[3] = pws[gid].i[3];
w1[0] = pws[gid].i[4];
w1[1] = pws[gid].i[5];
w1[2] = pws[gid].i[6];
w1[3] = pws[gid].i[7];
/**
* Salt prep
*/
u32 iv[2];
iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[0];
iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv[1];
u32 plain[2];
plain[0] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[0];
plain[1] = esalt_bufs[DIGESTS_OFFSET_HOST].plain[1];
u32 position[2];
position[0] = esalt_bufs[DIGESTS_OFFSET_HOST].position[0];
position[1] = esalt_bufs[DIGESTS_OFFSET_HOST].position[1];
u32 offset = esalt_bufs[DIGESTS_OFFSET_HOST].offset;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = pws[gid].i[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0x = w0l | w0r;
u32x w0_t[4];
u32x w1_t[4];
w0_t[0] = w0x;
w0_t[1] = w0[1];
w0_t[2] = w0[2];
w0_t[3] = w0[3];
w1_t[0] = w1[0];
w1_t[1] = w1[1];
w1_t[2] = w1[2];
w1_t[3] = w1[3];
u32x digest[4] = { 0 };
chacha20_transform (w0_t, w1_t, position, offset, iv, plain, digest);
const u32x r0 = digest[0];
const u32x r1 = digest[1];
const u32x r2 = digest[2];
const u32x r3 = digest[3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m15400_s16 (KERN_ATTR_VECTOR_ESALT (chacha20_t))

View File

@ -17,6 +17,47 @@
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
DECLSPEC void _totp_calculate (PRIVATE_AS u32 *code, PRIVATE_AS const u32 *w, const u32 pw_len, PRIVATE_AS const u32 *s, const u32 salt_len)
{
sha1_hmac_ctx_t ctx;
sha1_hmac_init_swap (&ctx, w, pw_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
*code = otp_code % 1000000;
}
KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_RULES ())
{
/**
@ -34,63 +75,85 @@ KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_RULES ())
COPY_PW (pws[gid]);
const u32 salt_len = 8;
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
for (u32 i = 0; i < count; i += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
if (count == 1)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_hmac_ctx_t ctx;
sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
COMPARE_M_SCALAR (otp_code0, 0, 0, 0);
}
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
COMPARE_M_SCALAR (otp_code, 0, 0, 0);
u32 otp_code0, otp_code1;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s + 0, 8);
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, 0, 0);
}
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s + 0, 8);
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
_totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, 0);
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s + 0, 8);
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
_totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
_totp_calculate (&otp_code3, tmp.i, tmp.pw_len, s + 48, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
@ -123,62 +186,108 @@ KERNEL_FQ KERNEL_FA void m18100_sxx (KERN_ATTR_RULES ())
COPY_PW (pws[gid]);
const u32 salt_len = 8;
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
for (u32 i = 0; i < count; i += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
if (count == 1)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
sha1_hmac_ctx_t ctx;
sha1_hmac_init_swap (&ctx, tmp.i, tmp.pw_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
COMPARE_S_SCALAR (otp_code0, 0, 0, 0);
}
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
COMPARE_S_SCALAR (otp_code, 0, 0, 0);
u32 otp_code0, otp_code1;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, 0, 0);
}
}
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
if (otp_code1 == search[1])
{
_totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, 0);
}
}
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
u32 otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, tmp.i, tmp.pw_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, tmp.i, tmp.pw_len, s + 16, 8);
if (otp_code1 == search[1])
{
_totp_calculate (&otp_code2, tmp.i, tmp.pw_len, s + 32, 8);
if (otp_code2 == search[2])
{
_totp_calculate (&otp_code3, tmp.i, tmp.pw_len, s + 48, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
}
}
}

View File

@ -14,6 +14,47 @@
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
DECLSPEC void _totp_calculate (PRIVATE_AS u32 *code, PRIVATE_AS const u32 *w, const u32 pw_len, PRIVATE_AS const u32 *s, const u32 salt_len)
{
sha1_hmac_ctx_t ctx;
sha1_hmac_init (&ctx, w, pw_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
*code = otp_code % 1000000;
}
KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_BASIC ())
{
/**
@ -38,81 +79,157 @@ KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_BASIC ())
w[idx] = hc_swap32_S (pws[gid].i[idx]);
}
const u32 salt_len = 8;
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
for (u32 i = 0; i < count; i += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
if (count == 1)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
COMPARE_M_SCALAR (otp_code0, 0, 0, 0);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
c[i] |= w[i];
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s + 0, 8);
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, 0, 0);
}
sha1_hmac_ctx_t ctx;
sha1_hmac_init (&ctx, c, pw_len + comb_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s + 0, 8);
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
_totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, 0);
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
u32 c[64];
COMPARE_M_SCALAR (otp_code, 0, 0, 0);
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s + 0, 8);
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
_totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
_totp_calculate (&otp_code3, c, pw_len + comb_len, s + 48, 8);
COMPARE_M_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
@ -152,80 +269,180 @@ KERNEL_FQ KERNEL_FA void m18100_sxx (KERN_ATTR_BASIC ())
w[idx] = hc_swap32_S (pws[gid].i[idx]);
}
const u32 salt_len = 8;
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
for (u32 i = 0; i < count; i += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
if (count == 1)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
COMPARE_S_SCALAR (otp_code0, 0, 0, 0);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
c[i] |= w[i];
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, 0, 0);
}
}
sha1_hmac_ctx_t ctx;
sha1_hmac_init (&ctx, c, pw_len + comb_len);
sha1_hmac_update (&ctx, s, salt_len);
sha1_hmac_final (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
if (otp_code1 == search[1])
{
_totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, 0);
}
}
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
u32 c[64];
COMPARE_S_SCALAR (otp_code, 0, 0, 0);
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = hc_swap32_S (combs_buf[il_pos].i[idx]);
}
switch_buffer_by_offset_1x64_be_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
u32 otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, c, pw_len + comb_len, s, 8);
if (otp_code0 == search[0])
{
_totp_calculate (&otp_code1, c, pw_len + comb_len, s + 16, 8);
if (otp_code1 == search[1])
{
_totp_calculate (&otp_code2, c, pw_len + comb_len, s + 32, 8);
if (otp_code2 == search[2])
{
_totp_calculate (&otp_code3, c, pw_len + comb_len, s + 48, 8);
COMPARE_S_SCALAR (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
}
}
}

View File

@ -1,205 +1,324 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = 8;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_hmac_ctx_vector_t ctx;
sha1_hmac_init_vector (&ctx, w, pw_len);
sha1_hmac_update_vector (&ctx, s, salt_len);
sha1_hmac_final_vector (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
COMPARE_M_SIMD (otp_code, 0, 0, 0);
}
}
KERNEL_FQ KERNEL_FA void m18100_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = 8;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[idx]);
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
sha1_hmac_ctx_vector_t ctx;
sha1_hmac_init_vector (&ctx, w, pw_len);
sha1_hmac_update_vector (&ctx, s, salt_len);
sha1_hmac_final_vector (&ctx);
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
COMPARE_S_SIMD (otp_code, 0, 0, 0);
}
}
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_sha1.cl)
#endif
DECLSPEC void _totp_calculate (PRIVATE_AS u32x *code, PRIVATE_AS const u32x *w, const u32 pw_len, PRIVATE_AS const u32x *s, const u32 salt_len)
{
sha1_hmac_ctx_vector_t ctx;
sha1_hmac_init_vector (&ctx, w, pw_len);
sha1_hmac_update_vector (&ctx, s, salt_len);
sha1_hmac_final_vector (&ctx);
// initialize a buffer for the otp code
u32x otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
switch (ctx.opad.h[4] & 15)
{
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
*code = otp_code % 1000000;
}
KERNEL_FQ KERNEL_FA void m18100_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32x s[64] = { 0 };
for (u32 i = 0; i < count; i += 1)
{
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
u32x w0l = w[0];
if (count == 1)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0;
_totp_calculate (&otp_code0, w, pw_len, s, 8);
COMPARE_M_SIMD (otp_code0, 0, 0, 0);
}
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1;
_totp_calculate (&otp_code0, w, pw_len, s + 0, 8);
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
COMPARE_M_SIMD (otp_code0, otp_code1, 0, 0);
}
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, w, pw_len, s + 0, 8);
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
_totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
COMPARE_M_SIMD (otp_code0, otp_code1, otp_code2, 0);
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, w, pw_len, s + 0, 8);
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
_totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
_totp_calculate (&otp_code3, w, pw_len, s + 48, 8);
COMPARE_M_SIMD (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
KERNEL_FQ KERNEL_FA void m18100_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 count = salt_bufs[SALT_POS_HOST].salt_len / 16;
u32x s[64] = { 0 };
for (u32 i = 0; i < count; i += 1)
{
s[16 * i + 0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 0]);
s[16 * i + 1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4 * i + 1]);
}
/**
* loop
*/
u32x w0l = w[0];
if (count == 1)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0;
_totp_calculate (&otp_code0, w, pw_len, s, 8);
COMPARE_S_SIMD (otp_code0, 0, 0, 0);
}
}
else if (count == 2)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1;
_totp_calculate (&otp_code0, w, pw_len, s, 8);
if (MATCHES_ONE_VS(otp_code0, search[0]))
{
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
COMPARE_S_SIMD (otp_code0, otp_code1, 0, 0);
}
}
}
else if (count == 3)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1, otp_code2;
_totp_calculate (&otp_code0, w, pw_len, s, 8);
if (MATCHES_ONE_VS(otp_code0, search[0]))
{
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
if (MATCHES_ONE_VS(otp_code1, search[1]))
{
_totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
COMPARE_S_SIMD (otp_code0, otp_code1, otp_code2, 0);
}
}
}
}
else if (count == 4)
{
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
u32x otp_code0, otp_code1, otp_code2, otp_code3;
_totp_calculate (&otp_code0, w, pw_len, s, 8);
if (MATCHES_ONE_VS(otp_code0, search[0]))
{
_totp_calculate (&otp_code1, w, pw_len, s + 16, 8);
if (MATCHES_ONE_VS(otp_code1, search[1]))
{
_totp_calculate (&otp_code2, w, pw_len, s + 32, 8);
if (MATCHES_ONE_VS(otp_code2, search[2]))
{
_totp_calculate (&otp_code3, w, pw_len, s + 48, 8);
COMPARE_S_SIMD (otp_code0, otp_code1, otp_code2, otp_code3);
}
}
}
}
}
}

View File

@ -636,7 +636,7 @@ KERNEL_FQ KERNEL_FA void m18600_loop (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t
}
}
KERNEL_FQ KERNEL_FA void FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
KERNEL_FQ KERNEL_FA FIXED_THREAD_COUNT(FIXED_LOCAL_SIZE_COMP) void m18600_comp (KERN_ATTR_TMPS_ESALT (odf11_tmp_t, odf11_t))
{
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);

View File

@ -368,7 +368,7 @@ KERNEL_FQ KERNEL_FA void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t,
AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4);
const int correct = is_valid_printable_32 (pt[0])
int correct = is_valid_printable_32 (pt[0])
+ is_valid_printable_32 (pt[1])
+ is_valid_printable_32 (pt[2])
+ is_valid_printable_32 (pt[3])
@ -379,6 +379,37 @@ KERNEL_FQ KERNEL_FA void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t,
if (correct != 8) return;
u32 ct2[8];
ct2[0] = pbkdf2_sha256_aes_gcm->ct_buf[8]; // third block of ciphertext
ct2[1] = pbkdf2_sha256_aes_gcm->ct_buf[9];
ct2[2] = pbkdf2_sha256_aes_gcm->ct_buf[10];
ct2[3] = pbkdf2_sha256_aes_gcm->ct_buf[11];
ct2[4] = pbkdf2_sha256_aes_gcm->ct_buf[12]; // fourth block of ciphertext
ct2[5] = pbkdf2_sha256_aes_gcm->ct_buf[13];
ct2[6] = pbkdf2_sha256_aes_gcm->ct_buf[14];
ct2[7] = pbkdf2_sha256_aes_gcm->ct_buf[15];
// Only a single increment as the previous AES_GCM_DECRYPT already does one for us
J0[3]++;
u32 pt2[8] = { 0 };
AES_GCM_decrypt (key, J0, ct2, 32, pt2, s_te0, s_te1, s_te2, s_te3, s_te4);
correct = is_valid_printable_32 (pt2[0])
+ is_valid_printable_32 (pt2[1])
+ is_valid_printable_32 (pt2[2])
+ is_valid_printable_32 (pt2[3])
+ is_valid_printable_32 (pt2[4])
+ is_valid_printable_32 (pt2[5])
+ is_valid_printable_32 (pt2[6])
+ is_valid_printable_32 (pt2[7]);
// We need to check a third and fourth block to avoid extremely rare false-positives. See:
// https://github.com/hashcat/hashcat/issues/4121
if (correct != 8) return;
/*
const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte
// only we can't guarantee it is not in the middle of a UTF8 byte stream at that point

157
OpenCL/m33000_a0-pure.cl Normal file
View File

@ -0,0 +1,157 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx = ctx0;
md5_update (&ctx, tmp.i, tmp.pw_len);
md5_update (&ctx, s2, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx = ctx0;
md5_update (&ctx, tmp.i, tmp.pw_len);
md5_update (&ctx, s2, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

147
OpenCL/m33000_a1-pure.cl Normal file
View File

@ -0,0 +1,147 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx = ctx0;
md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx, s2, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32 s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx = ctx0;
md5_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_update (&ctx, s2, salt2_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

181
OpenCL/m33000_a3-pure.cl Normal file
View File

@ -0,0 +1,181 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
typedef struct md5_double_salt
{
u32 salt1_buf[64];
int salt1_len;
u32 salt2_buf[64];
int salt2_len;
} md5_double_salt_t;
KERNEL_FQ KERNEL_FA void m33000_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32x s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx0);
md5_update_vector (&ctx, w, pw_len);
md5_update_vector (&ctx, s2, salt2_len);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33000_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t))
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const int salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len;
u32x s2[64] = { 0 };
for (int i = 0, idx = 0; i < salt2_len; i += 4, idx += 1)
{
s2[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len);
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
md5_ctx_vector_t ctx;
md5_init_vector_from_scalar (&ctx, &ctx0);
md5_update_vector (&ctx, w, pw_len);
md5_update_vector (&ctx, s2, salt2_len);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

277
OpenCL/m33100_a0-pure.cl Normal file
View File

@ -0,0 +1,277 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
md5_update (&ctx, s, salt_len);
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_update (&ctx, s, salt_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update (&ctx0, tmp.i, tmp.pw_len);
md5_final (&ctx0);
const u32 a = ctx0.h[0];
const u32 b = ctx0.h[1];
const u32 c = ctx0.h[2];
const u32 d = ctx0.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
md5_update (&ctx, s, salt_len);
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_update (&ctx, s, salt_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

271
OpenCL/m33100_a1-pure.cl Normal file
View File

@ -0,0 +1,271 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
md5_update (&ctx, s, salt_len);
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_update (&ctx, s, salt_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
md5_ctx_t ctx0;
md5_init (&ctx0);
md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
md5_ctx_t ctx1 = ctx0;
md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
md5_final (&ctx1);
const u32 a = ctx1.h[0];
const u32 b = ctx1.h[1];
const u32 c = ctx1.h[2];
const u32 d = ctx1.h[3];
md5_ctx_t ctx;
md5_init (&ctx);
md5_update (&ctx, s, salt_len);
u32 w0[4];
u32 w1[4];
u32 w2[4];
u32 w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_64 (&ctx, w0, w1, w2, w3, 32);
md5_update (&ctx, s, salt_len);
md5_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

297
OpenCL/m33100_a3-pure.cl Normal file
View File

@ -0,0 +1,297 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_md5.cl)
#endif
#if VECT_SIZE == 1
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)])
#elif VECT_SIZE == 2
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1])
#elif VECT_SIZE == 4
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3])
#elif VECT_SIZE == 8
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7])
#elif VECT_SIZE == 16
#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf])
#endif
KERNEL_FQ KERNEL_FA void m33100_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0lr = w0l | w0r;
w[0] = w0lr;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, s, salt_len);
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_vector_64 (&ctx, w0, w1, w2, w3, 32);
md5_update_vector (&ctx, s, salt_len);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33100_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 gid = get_global_id (0);
const u64 lid = get_local_id (0);
const u64 lsz = get_local_size (0);
/**
* bin2asc table
*/
LOCAL_VK u32 l_bin2asc[256];
for (u32 i = lid; i < 256; i += lsz)
{
const u32 i0 = (i >> 0) & 15;
const u32 i1 = (i >> 4) & 15;
l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8
| ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0;
}
SYNC_THREADS ();
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0lr = w0l | w0r;
w[0] = w0lr;
md5_ctx_vector_t ctx0;
md5_init_vector (&ctx0);
md5_update_vector (&ctx0, w, pw_len);
md5_final_vector (&ctx0);
const u32x a = ctx0.h[0];
const u32x b = ctx0.h[1];
const u32x c = ctx0.h[2];
const u32x d = ctx0.h[3];
md5_ctx_vector_t ctx;
md5_init_vector (&ctx);
md5_update_vector (&ctx, s, salt_len);
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0
| uint_to_hex_lower8 ((a >> 8) & 255) << 16;
w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0
| uint_to_hex_lower8 ((a >> 24) & 255) << 16;
w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0
| uint_to_hex_lower8 ((b >> 8) & 255) << 16;
w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0
| uint_to_hex_lower8 ((b >> 24) & 255) << 16;
w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0
| uint_to_hex_lower8 ((c >> 8) & 255) << 16;
w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0
| uint_to_hex_lower8 ((c >> 24) & 255) << 16;
w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0
| uint_to_hex_lower8 ((d >> 8) & 255) << 16;
w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0
| uint_to_hex_lower8 ((d >> 24) & 255) << 16;
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
md5_update_vector_64 (&ctx, w0, w1, w2, w3, 32);
md5_update_vector (&ctx, s, salt_len);
md5_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

View File

@ -0,0 +1,225 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp_optimized.h)
#include M2S(INCLUDE_PATH/inc_rp_optimized.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
{
ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
}
KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
u32x w[16];
w[ 0] = w0[0];
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = out_len * 8;
w[15] = 0;
/**
* RipeMD320
*/
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
u32x w0[4] = { 0 };
u32x w1[4] = { 0 };
u32x w2[4] = { 0 };
u32x w3[4] = { 0 };
const u32x out_len = apply_rules_vect_optimized (pw_buf0, pw_buf1, pw_len, rules_buf, il_pos, w0, w1);
append_0x80_2x4_VV (w0, w1, out_len);
u32x w[16];
w[ 0] = w0[0];
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = 0;
w[ 9] = 0;
w[10] = 0;
w[11] = 0;
w[12] = 0;
w[13] = 0;
w[14] = out_len * 8;
w[15] = 0;
/**
* RipeMD320
*/
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_RULES ())
{
}
KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_RULES ())
{
}

118
OpenCL/m33600_a0-pure.cl Normal file
View File

@ -0,0 +1,118 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_ctx_t ctx;
ripemd320_init (&ctx);
ripemd320_update (&ctx, tmp.i, tmp.pw_len);
ripemd320_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_ctx_t ctx;
ripemd320_init (&ctx);
ripemd320_update (&ctx, tmp.i, tmp.pw_len);
ripemd320_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

View File

@ -0,0 +1,339 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
{
ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
}
KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* RipeMD320
*/
u32x w[16];
w[ 0] = w0[0];
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = w2[0];
w[ 9] = w2[1];
w[10] = w2[2];
w[11] = w2[3];
w[12] = w3[0];
w[13] = w3[1];
w[14] = pw_len * 8;
w[15] = 0;
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
/**
* base
*/
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
u32 pw_buf0[4];
u32 pw_buf1[4];
pw_buf0[0] = pws[gid].i[0];
pw_buf0[1] = pws[gid].i[1];
pw_buf0[2] = pws[gid].i[2];
pw_buf0[3] = pws[gid].i[3];
pw_buf1[0] = pws[gid].i[4];
pw_buf1[1] = pws[gid].i[5];
pw_buf1[2] = pws[gid].i[6];
pw_buf1[3] = pws[gid].i[7];
const u32 pw_l_len = pws[gid].pw_len & 63;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x pw_r_len = pwlenx_create_combt (combs_buf, il_pos) & 63;
const u32x pw_len = (pw_l_len + pw_r_len) & 63;
/**
* concat password candidate
*/
u32x wordl0[4] = { 0 };
u32x wordl1[4] = { 0 };
u32x wordl2[4] = { 0 };
u32x wordl3[4] = { 0 };
wordl0[0] = pw_buf0[0];
wordl0[1] = pw_buf0[1];
wordl0[2] = pw_buf0[2];
wordl0[3] = pw_buf0[3];
wordl1[0] = pw_buf1[0];
wordl1[1] = pw_buf1[1];
wordl1[2] = pw_buf1[2];
wordl1[3] = pw_buf1[3];
u32x wordr0[4] = { 0 };
u32x wordr1[4] = { 0 };
u32x wordr2[4] = { 0 };
u32x wordr3[4] = { 0 };
wordr0[0] = ix_create_combt (combs_buf, il_pos, 0);
wordr0[1] = ix_create_combt (combs_buf, il_pos, 1);
wordr0[2] = ix_create_combt (combs_buf, il_pos, 2);
wordr0[3] = ix_create_combt (combs_buf, il_pos, 3);
wordr1[0] = ix_create_combt (combs_buf, il_pos, 4);
wordr1[1] = ix_create_combt (combs_buf, il_pos, 5);
wordr1[2] = ix_create_combt (combs_buf, il_pos, 6);
wordr1[3] = ix_create_combt (combs_buf, il_pos, 7);
if (COMBS_MODE == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_le_VV (wordr0, wordr1, wordr2, wordr3, pw_l_len);
}
else
{
switch_buffer_by_offset_le_VV (wordl0, wordl1, wordl2, wordl3, pw_r_len);
}
u32x w0[4];
u32x w1[4];
u32x w2[4];
u32x w3[4];
w0[0] = wordl0[0] | wordr0[0];
w0[1] = wordl0[1] | wordr0[1];
w0[2] = wordl0[2] | wordr0[2];
w0[3] = wordl0[3] | wordr0[3];
w1[0] = wordl1[0] | wordr1[0];
w1[1] = wordl1[1] | wordr1[1];
w1[2] = wordl1[2] | wordr1[2];
w1[3] = wordl1[3] | wordr1[3];
w2[0] = wordl2[0] | wordr2[0];
w2[1] = wordl2[1] | wordr2[1];
w2[2] = wordl2[2] | wordr2[2];
w2[3] = wordl2[3] | wordr2[3];
w3[0] = wordl3[0] | wordr3[0];
w3[1] = wordl3[1] | wordr3[1];
w3[2] = wordl3[2] | wordr3[2];
w3[3] = wordl3[3] | wordr3[3];
/**
* RipeMD320
*/
u32x w[16];
w[ 0] = w0[0];
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = w2[0];
w[ 9] = w2[1];
w[10] = w2[2];
w[11] = w2[3];
w[12] = w3[0];
w[13] = w3[1];
w[14] = pw_len * 8;
w[15] = 0;
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_BASIC ())
{
}
KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_BASIC ())
{
}

112
OpenCL/m33600_a1-pure.cl Normal file
View File

@ -0,0 +1,112 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
ripemd320_ctx_t ctx0;
ripemd320_init (&ctx0);
ripemd320_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
ripemd320_ctx_t ctx = ctx0;
ripemd320_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
ripemd320_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
ripemd320_ctx_t ctx0;
ripemd320_init (&ctx0);
ripemd320_update_global (&ctx0, pws[gid].i, pws[gid].pw_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
ripemd320_ctx_t ctx = ctx0;
ripemd320_update_global (&ctx, combs_buf[il_pos].i, combs_buf[il_pos].pw_len);
ripemd320_final (&ctx);
const u32 r0 = ctx.h[DGST_R0];
const u32 r1 = ctx.h[DGST_R1];
const u32 r2 = ctx.h[DGST_R2];
const u32 r3 = ctx.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

View File

@ -0,0 +1,447 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
DECLSPEC void ripemd320_transform_transport_vector (PRIVATE_AS const u32x *w, PRIVATE_AS u32x *dgst)
{
ripemd320_transform_vector (w + 0, w + 4, w + 8, w + 12, dgst);
}
DECLSPEC void m33600m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ())
{
/**
* modifiers are taken from args
*/
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x w[16];
w[ 0] = w0lr;
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = w2[0];
w[ 9] = w2[1];
w[10] = w2[2];
w[11] = w2[3];
w[12] = w3[0];
w[13] = w3[1];
w[14] = pw_len * 8;
w[15] = 0;
/**
* RipeMD320
*/
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
DECLSPEC void m33600s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 pw_len, KERN_ATTR_FUNC_BASIC ())
{
/**
* modifiers are taken from args
*/
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* loop
*/
u32 w0l = w0[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = ix_create_bft (bfs_buf, il_pos);
const u32x w0lr = w0l | w0r;
u32x w[16];
w[ 0] = w0lr;
w[ 1] = w0[1];
w[ 2] = w0[2];
w[ 3] = w0[3];
w[ 4] = w1[0];
w[ 5] = w1[1];
w[ 6] = w1[2];
w[ 7] = w1[3];
w[ 8] = w2[0];
w[ 9] = w2[1];
w[10] = w2[2];
w[11] = w2[3];
w[12] = w3[0];
w[13] = w3[1];
w[14] = pw_len * 8;
w[15] = 0;
/**
* RipeMD320
*/
u32x dgst[10];
dgst[0] = RIPEMD320M_A;
dgst[1] = RIPEMD320M_B;
dgst[2] = RIPEMD320M_C;
dgst[3] = RIPEMD320M_D;
dgst[4] = RIPEMD320M_E;
dgst[5] = RIPEMD320M_F;
dgst[6] = RIPEMD320M_G;
dgst[7] = RIPEMD320M_H;
dgst[8] = RIPEMD320M_I;
dgst[9] = RIPEMD320M_L;
ripemd320_transform_transport_vector (w, dgst);
COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]);
}
}
KERNEL_FQ KERNEL_FA void m33600_m04 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}
KERNEL_FQ KERNEL_FA void m33600_m08 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}
KERNEL_FQ KERNEL_FA void m33600_m16 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}
KERNEL_FQ KERNEL_FA void m33600_s04 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}
KERNEL_FQ KERNEL_FA void m33600_s08 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = 0;
w2[1] = 0;
w2[2] = 0;
w2[3] = 0;
u32 w3[4];
w3[0] = 0;
w3[1] = 0;
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}
KERNEL_FQ KERNEL_FA void m33600_s16 (KERN_ATTR_BASIC ())
{
/**
* base
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
const u64 lsz = get_local_size (0);
if (gid >= GID_CNT) return;
u32 w0[4];
w0[0] = pws[gid].i[ 0];
w0[1] = pws[gid].i[ 1];
w0[2] = pws[gid].i[ 2];
w0[3] = pws[gid].i[ 3];
u32 w1[4];
w1[0] = pws[gid].i[ 4];
w1[1] = pws[gid].i[ 5];
w1[2] = pws[gid].i[ 6];
w1[3] = pws[gid].i[ 7];
u32 w2[4];
w2[0] = pws[gid].i[ 8];
w2[1] = pws[gid].i[ 9];
w2[2] = pws[gid].i[10];
w2[3] = pws[gid].i[11];
u32 w3[4];
w3[0] = pws[gid].i[12];
w3[1] = pws[gid].i[13];
w3[2] = 0;
w3[3] = 0;
const u32 pw_len = pws[gid].pw_len & 63;
/**
* main
*/
m33600s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz);
}

138
OpenCL/m33600_a3-pure.cl Normal file
View File

@ -0,0 +1,138 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33600_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_ctx_vector_t ctx;
ripemd320_init_vector (&ctx);
ripemd320_update_vector (&ctx, w, pw_len);
ripemd320_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33600_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_ctx_vector_t ctx;
ripemd320_init_vector (&ctx);
ripemd320_update_vector (&ctx, w, pw_len);
ripemd320_final_vector (&ctx);
const u32x r0 = ctx.h[DGST_R0];
const u32x r1 = ctx.h[DGST_R1];
const u32x r2 = ctx.h[DGST_R2];
const u32x r3 = ctx.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

135
OpenCL/m33650_a0-pure.cl Normal file
View File

@ -0,0 +1,135 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_hmac_ctx_t ctx;
ripemd320_hmac_init (&ctx, tmp.i, tmp.pw_len);
ripemd320_hmac_update (&ctx, s, salt_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_hmac_ctx_t ctx;
ripemd320_hmac_init (&ctx, tmp.i, tmp.pw_len);
ripemd320_hmac_update (&ctx, s, salt_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

183
OpenCL/m33650_a1-pure.cl Normal file
View File

@ -0,0 +1,183 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = combs_buf[il_pos].i[idx];
}
switch_buffer_by_offset_1x64_le_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
ripemd320_hmac_ctx_t ctx;
ripemd320_hmac_init (&ctx, c, pw_len + comb_len);
ripemd320_hmac_update (&ctx, s, salt_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = combs_buf[il_pos].i[idx];
}
switch_buffer_by_offset_1x64_le_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
ripemd320_hmac_ctx_t ctx;
ripemd320_hmac_init (&ctx, c, pw_len + comb_len);
ripemd320_hmac_update (&ctx, s, salt_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

155
OpenCL/m33650_a3-pure.cl Normal file
View File

@ -0,0 +1,155 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33650_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_hmac_ctx_vector_t ctx;
ripemd320_hmac_init_vector (&ctx, w, pw_len);
ripemd320_hmac_update_vector (&ctx, s, salt_len);
ripemd320_hmac_final_vector (&ctx);
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33650_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_hmac_ctx_vector_t ctx;
ripemd320_hmac_init_vector (&ctx, w, pw_len);
ripemd320_hmac_update_vector (&ctx, s, salt_len);
ripemd320_hmac_final_vector (&ctx);
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

139
OpenCL/m33660_a0-pure.cl Normal file
View File

@ -0,0 +1,139 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_rp.h)
#include M2S(INCLUDE_PATH/inc_rp.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_t ctx0;
ripemd320_hmac_init (&ctx0, s, salt_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_hmac_ctx_t ctx = ctx0;
ripemd320_hmac_update (&ctx, tmp.i, tmp.pw_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_RULES ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
COPY_PW (pws[gid]);
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_t ctx0;
ripemd320_hmac_init (&ctx0, s, salt_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
pw_t tmp = PASTE_PW;
tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len);
ripemd320_hmac_ctx_t ctx = ctx0;
ripemd320_hmac_update (&ctx, tmp.i, tmp.pw_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

187
OpenCL/m33660_a1-pure.cl Normal file
View File

@ -0,0 +1,187 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
//#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_scalar.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_t ctx0;
ripemd320_hmac_init (&ctx0, s, salt_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = combs_buf[il_pos].i[idx];
}
switch_buffer_by_offset_1x64_le_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
ripemd320_hmac_ctx_t ctx = ctx0;
ripemd320_hmac_update (&ctx, c, pw_len + comb_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SCALAR (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_BASIC ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32 w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32 s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_t ctx0;
ripemd320_hmac_init (&ctx0, s, salt_len);
/**
* loop
*/
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++)
{
const u32 comb_len = combs_buf[il_pos].pw_len;
u32 c[64];
#ifdef _unroll
#pragma unroll
#endif
for (int idx = 0; idx < 64; idx++)
{
c[idx] = combs_buf[il_pos].i[idx];
}
switch_buffer_by_offset_1x64_le_S (c, pw_len);
#ifdef _unroll
#pragma unroll
#endif
for (int i = 0; i < 64; i++)
{
c[i] |= w[i];
}
ripemd320_hmac_ctx_t ctx = ctx0;
ripemd320_hmac_update (&ctx, c, pw_len + comb_len);
ripemd320_hmac_final (&ctx);
const u32 r0 = ctx.opad.h[DGST_R0];
const u32 r1 = ctx.opad.h[DGST_R1];
const u32 r2 = ctx.opad.h[DGST_R2];
const u32 r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SCALAR (r0, r1, r2, r3);
}
}

159
OpenCL/m33660_a3-pure.cl Normal file
View File

@ -0,0 +1,159 @@
/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#define NEW_SIMD_CODE
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_vendor.h)
#include M2S(INCLUDE_PATH/inc_types.h)
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_simd.cl)
#include M2S(INCLUDE_PATH/inc_hash_ripemd320.cl)
#endif
KERNEL_FQ KERNEL_FA void m33660_mxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_vector_t ctx0;
ripemd320_hmac_init_vector (&ctx0, s, salt_len);
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_hmac_ctx_vector_t ctx = ctx0;
ripemd320_hmac_update_vector (&ctx, w, pw_len);
ripemd320_hmac_final_vector (&ctx);
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SIMD (r0, r1, r2, r3);
}
}
KERNEL_FQ KERNEL_FA void m33660_sxx (KERN_ATTR_VECTOR ())
{
/**
* modifier
*/
const u64 lid = get_local_id (0);
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
/**
* digest
*/
const u32 search[4] =
{
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2],
digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3]
};
/**
* base
*/
const u32 pw_len = pws[gid].pw_len;
u32x w[64] = { 0 };
for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1)
{
w[idx] = pws[gid].i[idx];
}
const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len;
u32x s[64] = { 0 };
for (u32 i = 0, idx = 0; i < salt_len; i += 4, idx += 1)
{
s[idx] = salt_bufs[SALT_POS_HOST].salt_buf[idx];
}
ripemd320_hmac_ctx_vector_t ctx0;
ripemd320_hmac_init_vector (&ctx0, s, salt_len);
/**
* loop
*/
u32x w0l = w[0];
for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE)
{
const u32x w0r = words_buf_r[il_pos / VECT_SIZE];
const u32x w0 = w0l | w0r;
w[0] = w0;
ripemd320_hmac_ctx_vector_t ctx = ctx0;
ripemd320_hmac_update_vector (&ctx, w, pw_len);
ripemd320_hmac_final_vector (&ctx);
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SIMD (r0, r1, r2, r3);
}
}

151
OpenCL/m34000-pure.cl Normal file
View File

@ -0,0 +1,151 @@
/**
* Author......: Netherlands Forensic Institute
* License.....: MIT
*/
#ifdef KERNEL_STATIC
#include M2S(INCLUDE_PATH/inc_platform.cl)
#include M2S(INCLUDE_PATH/inc_common.cl)
#include M2S(INCLUDE_PATH/inc_hash_blake2b.cl)
#include M2S(INCLUDE_PATH/inc_hash_argon2.cl)
#endif
#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl)
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
typedef struct argon2_tmp
{
u32 state[4];
} argon2_tmp_t;
KERNEL_FQ KERNEL_FA void m34000_init (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
{
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
const u32 gd4 = gid / 4;
const u32 gm4 = gid % 4;
GLOBAL_AS void *V;
switch (gm4)
{
case 0: V = d_extra0_buf; break;
case 1: V = d_extra1_buf; break;
case 2: V = d_extra2_buf; break;
case 3: V = d_extra3_buf; break;
}
const argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, gd4);
argon2_init (&pws[gid], &salt_bufs[SALT_POS_HOST], &options, argon2_block);
}
KERNEL_FQ KERNEL_FA void m34000_loop (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
{
const u64 gid = get_global_id (0);
const u64 bid = get_group_id (0);
const u64 lid = get_local_id (1);
const u64 lsz = get_local_size (1);
if (bid >= GID_CNT) return;
const u32 argon2_thread = get_local_id (0);
const u32 argon2_lsz = get_local_size (0);
#ifdef ARGON2_PARALLELISM
LOCAL_VK u64 shuffle_bufs[ARGON2_PARALLELISM][32];
#else
LOCAL_VK u64 shuffle_bufs[32][32];
#endif
LOCAL_AS u64 *shuffle_buf = shuffle_bufs[lid];
SYNC_THREADS();
const u32 bd4 = bid / 4;
const u32 bm4 = bid % 4;
GLOBAL_AS void *V;
switch (bm4)
{
case 0: V = d_extra0_buf; break;
case 1: V = d_extra1_buf; break;
case 2: V = d_extra2_buf; break;
case 3: V = d_extra3_buf; break;
}
argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST_BID];
#ifdef ARGON2_PARALLELISM
options.parallelism = ARGON2_PARALLELISM;
#endif
GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, bd4);
argon2_pos_t pos;
pos.pass = (LOOP_POS / ARGON2_SYNC_POINTS);
pos.slice = (LOOP_POS % ARGON2_SYNC_POINTS);
for (u32 i = 0; i < LOOP_CNT; i++)
{
for (pos.lane = lid; pos.lane < options.parallelism; pos.lane += lsz)
{
argon2_fill_segment (argon2_block, &options, &pos, shuffle_buf, argon2_thread, argon2_lsz);
}
SYNC_THREADS ();
pos.slice++;
if (pos.slice == ARGON2_SYNC_POINTS)
{
pos.slice = 0;
pos.pass++;
}
}
}
KERNEL_FQ KERNEL_FA void m34000_comp (KERN_ATTR_TMPS_ESALT (argon2_tmp_t, argon2_options_t))
{
const u64 gid = get_global_id (0);
if (gid >= GID_CNT) return;
const u32 gd4 = gid / 4;
const u32 gm4 = gid % 4;
GLOBAL_AS void *V;
switch (gm4)
{
case 0: V = d_extra0_buf; break;
case 1: V = d_extra1_buf; break;
case 2: V = d_extra2_buf; break;
case 3: V = d_extra3_buf; break;
}
argon2_options_t options = esalt_bufs[DIGESTS_OFFSET_HOST];
GLOBAL_AS argon2_block_t *argon2_block = get_argon2_block (&options, V, gd4);
u32 out[8];
argon2_final (argon2_block, &options, out);
const u32 r0 = out[0];
const u32 r1 = out[1];
const u32 r2 = out[2];
const u32 r3 = out[3];
#define il_pos 0
#include COMPARE_M
}

View File

@ -6,13 +6,14 @@ import sys
def extract_salts(salts_buf) -> list:
salts=[]
for salt_buf, salt_buf_pc, salt_len, salt_len_pc, salt_iter, salt_iter2, salt_sign, salt_repeats, orig_pos, digests_cnt, digests_done, digests_offset, scrypt_N, scrypt_r, scrypt_p in struct.iter_unpack("256s 256s I I I I 8s I I I I I I I I", salts_buf):
for salt_buf, salt_buf_pc, salt_len, salt_len_pc, salt_iter, salt_iter2, salt_dimy, salt_sign, salt_repeats, orig_pos, digests_cnt, digests_done, digests_offset, scrypt_N, scrypt_r, scrypt_p in struct.iter_unpack("256s 256s I I I I I 8s I I I I I I I I", salts_buf):
salt_buf = salt_buf[0:salt_len]
salt_buf_pc = salt_buf_pc[0:salt_len_pc]
salts.append({ "salt_buf": salt_buf, \
"salt_buf_pc": salt_buf_pc, \
"salt_iter": salt_iter, \
"salt_iter2": salt_iter2, \
"salt_dimy": salt_dimy, \
"salt_sign": salt_sign, \
"salt_repeats": salt_repeats, \
"orig_pos": orig_pos, \

View File

@ -27,6 +27,8 @@
- Added hash-mode: GPG (AES-128/AES-256 (SHA-256($pass)))
- Added hash-mode: GPG (AES-128/AES-256 (SHA-512($pass)))
- Added hash-mode: GPG (CAST5 (SHA-1($pass)))
- Added hash-mode: HMAC-RIPEMD320 (key = $pass)
- Added hash-mode: HMAC-RIPEMD320 (key = $salt)
- Added hash-mode: Kerberos 5, etype 17, AS-REP
- Added hash-mode: Kerberos 5, etype 18, AS-REP
- Added hash-mode: MetaMask Mobile Wallet
@ -40,6 +42,7 @@
- Added hash-mode: NetIQ SSPR (SHA-1 with Salt)
- Added hash-mode: NetIQ SSPR (SHA-256 with Salt)
- Added hash-mode: NetIQ SSPR (SHA-512 with Salt)
- Added hash-mode: RIPEMD-320
- Added hash-mode: RC4 104-bit DropN
- Added hash-mode: RC4 40-bit DropN
- Added hash-mode: RC4 72-bit DropN
@ -49,6 +52,7 @@
- Added hash-mode: bcrypt(sha256($pass))
- Added hash-mode: HMAC-RIPEMD160 (key = $pass)
- Added hash-mode: HMAC-RIPEMD160 (key = $salt)
- Added hash-mode: md5($salt1.$pass.$salt2)
- Added hash-mode: md5($salt1.sha1($salt2.$pass))
- Added hash-mode: md5(md5($pass.$salt))
- Added hash-mode: md5(md5($salt).md5(md5($pass)))
@ -60,6 +64,7 @@
- Added hash-mode: md5(sha1($salt.$pass))
- Added hash-mode: sha512(sha512($pass).$salt)
- Added hash-mode: sha512(sha512_bin($pass).$salt)
- Added hash-mode: md5($salt.md5($pass).$salt)
##
## Features
@ -67,7 +72,9 @@
- Added new feature (-Y) that creates N virtual instances for each device in your system at the cost of N times the device memory consumption
- Added options --benchmark-min and --benchmark-max to set a hash-mode range to be used during the benchmark
- Added option --total-candidates to provide the total candidate count for an attack insteda of the internal "--keyspace" value
- Added option --backend-devices-keepfree to configure X percentage of device memory available to keep free
- Added display of password length mininum and maximum in the Kernel.Feature status line
##
## Performance
@ -89,6 +96,9 @@
- Fixed bug in grep out-of-memory workaround on Unit Test
- Fixed bug in input_tokenizer when TOKEN_ATTR_FIXED_LENGTH is used and refactor modules
- Fixed bug in --stdout that caused certain rules to malfunction
- Fixed bug in --stdout when multiple computing devices are active
- Fixed bug in Hardware Monitor: prevent disable if ADL fail
- Fixed race condition in selftest_init on OpenCL with non-blocking write
- Fixed build failed for 10700 optimized with Apple Metal
- Fixed build failed for 13772 and 13773 with Apple Metal
- Fixed build failed for 18400 with Apple Metal
@ -123,8 +133,11 @@
- Alias Devices: Prevents hashcat, when started with x86_64 emulation on Apple Silicon, from showing the Apple M1 OpenCL CPU as an alias for the Apple M1 Metal GPU
- Apple Driver: Automatically enable GPU support on Apple OpenCL instead of CPU support
- Apple Driver: Updated requirements to use Apple OpenCL API to macOS 13.0 - use
- Backend: Added workaround to get rid of internal runtimes memory leaks
- Backend: Updated filename chksum format to prevent invalid cache on Apple Silicon when switching arch
- Backend: Splitting backend_ctx_devices_init into smaller runtime-specific functions
- Backend Checks: Describe workaround in error message when detecting more than 64 backend devices
- Backend Info: Added --machine-readable format
- Brain: Added sanity check and corresponding error message for invalid --brain-port values
- Dependencies: Added sse2neon v1.8.0 (commit 658eeac)
- Dependencies: Updated LZMA SDK to 24.09
@ -132,26 +145,38 @@
- Dependencies: Updated xxHash to 0.8.3 (commit 50f4226)
- Building: Support building windows binaries on macOS using MinGW
- Dependencies: Updated OpenCL-Headers to v2024.10.24 (commit 265df85)
- Documents: Renamed status_code.txt in exit_status_code.txt and added device_status_code.txt
- Documents: Updated BUILD.md and added BUILD_macOS.md (containing instructions for building windows binaries on macOS)
- Modules: Added module_unstable_warning for 22500, update module_unstable_warning for 10700
- HIP Backend: Avoid deprecated functions
- Modules: Added support for non-zero IVs for -m 6800 (Lastpass). Also added `tools/lastpass2hashcat.py`
- Modules: Updated module_unstable_warning
- Open Document Format: Added support for small documents with content length < 1024
- OpenCL Backend: added workaround to set device_available_memory from CUDA/HIP alias device
- Selftest: rename selftest function to process_selftest and splitting into 3 smaller functions
- Status Code: Add specific return code for self-test fail (-11)
- Scrypt: Increase buffer sizes in module for hash mode 8900 to allow longer scrypt digests
- Unicode: Update UTF-8 to UTF-16 conversion to match RFC 3629
- Unit tests: Updated install_modules.sh with Crypt::Argon2
- User Options: Added error message when mixing --username and --show to warn users of exponential delay
- MetaMask: update extraction tool to support MetaMask Mobile wallets
- SecureCRT MasterPassphrase v2: update module, pure kernels and test unit. Add optimized kernels.
- Metal Backend: added workaround to prevent 'Infinite Loop' bug when build kernels
- Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel
- Metal Backend: added support to 2D/3D Compute
- Metal Backend: allow use of devices with Metal if runtime version is >= 200
- Metal Backend: disable Metal devices only if at least one OpenCL device is active
- Metal Backend: improved compute workloads calculation
- Modules: Check UnpackSize to raise false positive with hc_decompress_rar
- User Options: added --metal-compiler-runtime option
- User Options: limit --bitmap-max value to 31
- User Options: assigned -H to --hash-info
- Hash-Info: show more details using -HH
- Hardware Monitor: avoid sprintf in src/ext_iokit.c
- Hardware Monitor: Splitting hwmon_ctx_init function into smaller library-specific functions
- Help: show supported hash-modes only with -hh
- Makefile: prevent make failure with Apple Silicon in case of partial rebuild
- Makefile: updated MACOSX_DEPLOYMENT_TARGET to 15.0
- Rules: Add support to character class rules
- Rules: Rename best64.rule to best66.rule and remove the unknown section from it

View File

@ -0,0 +1,20 @@
Device Status Codes:
====================
0 = "Initializing"
1 = "Autotuning"
2 = "Selftest"
3 = "Running"
4 = "Paused"
5 = "Exhausted"
6 = "Cracked"
7 = "Aborted"
8 = "Quit"
9 = "Bypass"
10 = "Aborted (Checkpoint)"
11 = "Aborted (Runtime)"
12 = "Running (Checkpoint Quit requested)"
13 = "Error"
14 = "Aborted (Finish)"
15 = "Running (Quit after attack requested)"
16 = "Autodetect"

View File

@ -23,7 +23,7 @@ static const u64 BRIDGE_TYPE = BRIDGE_TYPE_MATCH_TUNINGS
static const char *BRIDGE_NAME = "scrypt_jane";
```
* `BRIDGE_NAME` tells Hashcat which bridge to load (e.g., `bridge_scrypt_jane.so`).
* `BRIDGE_NAME` tells hashcat which bridge to load (e.g., `bridge_scrypt_jane.so`).
* `BRIDGE_TYPE` indicates which backend kernel functions the bridge will override:
* `BRIDGE_TYPE_LAUNCH_LOOP`: Entry point for all bridges that register to run after `RUN_LOOP`
@ -31,7 +31,7 @@ static const char *BRIDGE_NAME = "scrypt_jane";
* `BRIDGE_TYPE_REPLACE_LOOP`: Same as BRIDGE_TYPE_LAUNCH_LOOP, but deactivates `RUN_LOOP`
* `BRIDGE_TYPE_REPLACE_LOOP2`: Same as BRIDGE_TYPE_LAUNCH_LOOP2, but deactivates `RUN_LOOP2`
Hashcat loads the bridge dynamically and uses it for any declared invocation.
hashcat loads the bridge dynamically and uses it for any declared invocation.
Note that bridges only load for outside kernel, aka "slow hash" kernels. In "fast hash" kernels, such as MD5, they are ignored. In case you want to implement a "fast hash" + bridge hybrid, you can move the "fast hash" code into a new "slow hash" kernel.
@ -50,7 +50,7 @@ ATTACK_EXEC_OUTSIDE_KERNEL:
RUN_PREPARE
ITER_REPEATS:
RUN_LOOP
RUN_EXTENTED
RUN_EXTENDED
COPY_BRIDGE_MATERIAL_TO_HOST
BRIDGE_LAUNCH_LOOP
COPY_BRIDGE_MATERIAL_TO_DEVICE
@ -75,16 +75,16 @@ ATTACK_EXEC_OUTSIDE_KERNEL:
- COPY_* refers to host-to-device or device-to-host copies and typically involve PCIe data transfer.
- CALL_* are code functions executed on the host CPU. They are plugin-specific and defined in a module. They were the predecessor of bridges but are still usable.
- SALT_* typically are optional steps which allow certain algorithms specific optimizations. For instance in Scrypt with P > 1, the V and XY buffer can be reused and allow temporary storage of result values into B. This saves memory requirement, improving parallelization
- ITER_* is the main loop that chunks what typically is defined as "iterations" in a algorithm computation. For instance a PBKDF2 function is called with 10,000 iterations, which would take a while to compute. The time this takes could be longer than a GPU drivers watchdog allows (before it resets the compute engine.). Hashcat will divide the 10,000 into chunks of let's say 1,000 and call the same kernel 10 times
- ITER_* is the main loop that chunks what typically is defined as "iterations" in a algorithm computation. For instance a PBKDF2 function is called with 10,000 iterations, which would take a while to compute. The time this takes could be longer than a GPU drivers watchdog allows (before it resets the compute engine.). hashcat will divide the 10,000 into chunks of let's say 1,000 and call the same kernel 10 times
- BRIDGE_* existing bridge entry points. During the "lifetime" of a hash computation the tmps[] variable is used (algorithm specific, so defined in the specific plugin module and kernel). This variable is which we refer to as bridge material, but it's possible we add other types of variables to "material" in the future
- ITER2/LOOP2: Optional entry points in case the algorithm consists of two types of long running (high iterated) sub-components. For instance one iteration of 10k loops sha256 followed by 100k loops of sha512, or bcrypt followed by scrypt
* `BRIDGE_TYPE_LAUNCH_INIT`
* `BRIDGE_TYPE_LAUNCH_COMP`
Hashcat devs will add support on request.
hashcat devs will add support on request.
As mentioned in the BRIDGE_* entry points, it's the developer's responsibility to ensure compatibility. That typically means the handling of the `tmps` variable relevant in the `kernel_loop` and how it changes over algorithm computations lifetime. Hashcat will take care of copying the data from and to the compute backend buffers (bridge material).
As mentioned in the BRIDGE_* entry points, it's the developer's responsibility to ensure compatibility. That typically means the handling of the `tmps` variable relevant in the `kernel_loop` and how it changes over algorithm computations lifetime. hashcat will take care of copying the data from and to the compute backend buffers (bridge material).
But the bridge developer must ensure data transformation compatibility. For instance, if we replace the loop section in SCRYPT (8900), the long running part is the smix() activity. But SCRYPT implements the PBKDF2 handling in both init and comp kernels, preparing the values in B[] after the init kernel, and expecting modified values in B[] before running comp kernel. If you want to replace the smix() section with let's say FPGA code, the bridge needs to understand the structure of the tmps[] variable. In this case tmps[] just reflect SCRYPT B[], making this simple, but other algorithms may require more than just one large buffer array. That means the structure itself (datatypes), but also the amount of workitems, because there's almost always more than one workitem (to reduce overhead times).
@ -95,7 +95,7 @@ There's some more BRIDGE PARAMETERs that you should know:
## How Bridges Work
When Hashcat starts with a plugin that specifies a bridge, it loads the bridge and invokes its initialization function. The bridge must then discover its internal compute units, called *bridge units*. Handling the units must be implemented by the bridge developer, and typically involves loading some library, init it, and retrieve some resources available, for instances loading XRT, asking how many FPGA are available. If there's two FPGA, then the bridge unit count would be two. You also need to provide some detailed information on the unit itself, for instance the name of the device, or version or your software solution if it's not a hardware.
When hashcat starts with a plugin that specifies a bridge, it loads the bridge and invokes its initialization function. The bridge must then discover its internal compute units, called *bridge units*. Handling the units must be implemented by the bridge developer, and typically involves loading some library, init it, and retrieve some resources available, for instances loading XRT, asking how many FPGA are available. If there's two FPGA, then the bridge unit count would be two. You also need to provide some detailed information on the unit itself, for instance the name of the device, or version or your software solution if it's not a hardware.
Each of these bridge unit maps to one virtual backend device, which allows asynchronous and independent parallel execution, and this were virtual backend devices become relevant. Read section about virtual backend devices for a better understanding
@ -110,7 +110,7 @@ From the bridge_init() function you have access to the following generic paramet
## Virtual Backend Devices
This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. Hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
This feature is available also outside of bridges, eg in order to increase some workload on a compute device, but it was added in the first place to support bridges. The main problem is that it's possible that a bridge return 2 bridge units which may have different speeds (clocking), or an ideal batch size. The time it takes to compute a certain batch of passwords would be different, so there was a need for an asynchronous execution strategy. hashcat supports mixed speed device types, but that typically mean "backend" devices. To solve the issue, we partition (virtualize) one physical backend device into multiple virtual backend devices (done internally by hashcat), and "link" each of the virtual backend device to a bridge unit. Due to this binding we can support bridge units of different speed. There's two flags a user can control in regard to virtual device backend:
* Use `-Y` to define how many virtual backend devices to create.
* Use `-R` to bind these virtual devices to a physical backend host (new in v7).

View File

@ -70,4 +70,4 @@ Depending on interface compatibility, code from other password cracking tools (e
The Assimilation Bridge introduces a highly extensible mechanism to integrate custom compute resources and logic into Hashcat.
For hands-on examples and developer guidance, refer to the accompanying documentation in `docs/hashcat-assimiliation-bridge-development.md` (first draft).
For hands-on examples and developer guidance, refer to the accompanying documentation in `docs/hashcat-assimilation-bridge-development.md` (first draft).

View File

@ -210,13 +210,13 @@ Notes:
If you modify one of these plugin files, there's a trade-off: you wont be able to contribute that code directly to the upstream Hashcat repository, since those files are meant to remain clean for demonstration purposes.
To address this, the assimilation bridge provides a generic parameter that users can specify via the command line. In the case of the Python bridge, only the first parameter is used. You can override the Python script to be loaded using `--bridge-parameter1`:
To address this, the assimilation bridge provides a generic parameter that users can specify via the command line. In the case of the Python bridge, only the first parameter is used. Using `--bridge-parameter1` allows you to override the Python script to be loaded:
```
$ ./hashcat -m 73000 --bridge-parameter1 myimplementation.py hash.txt wordlist.txt ...
$ ./hashcat -m 73000 --bridge-parameter1 ./Python/myimplementation.py hash.txt wordlist.txt ...
```
This tells the Python bridge plugin to load `myimplementation.py` instead of the default `generic_hash_mp.py`. This approach is especially useful if you plan to contribute `myimplementation.py` to the upstream Hashcat repository. If you choose to stay within the generic mode, your Python code wont have a dedicated hash mode, and you'll need to instruct users to use the `--bridge-parameter1` flag to load your implementation.
This tells the Python bridge plugin to load `myimplementation.py` located in the local `Python` subdirectory instead of the default `generic_hash_mp.py`. This approach is especially useful if you plan to contribute `myimplementation.py` to the upstream Hashcat repository. If you choose to stay within the generic mode, your Python code wont have a dedicated hash mode, and you'll need to instruct users to use the `--bridge-parameter1` flag to load your implementation.
### Design Tradeoffs and Format Considerations

View File

@ -57,6 +57,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- SHA3-384
- SHA3-512
- RIPEMD-160
- RIPEMD-320
- BLAKE2b-512
- BLAKE2s-256
- SM3
@ -86,11 +87,13 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- md5($salt.md5($salt.$pass))
- md5($salt.sha1($salt.$pass))
- md5($salt.utf16le($pass))
- md5($salt1.$pass.$salt2)
- md5($salt1.sha1($salt2.$pass))
- md5($salt1.strtoupper(md5($salt2.$pass)))
- md5(md5($pass))
- md5(md5($pass).md5($salt))
- md5(md5($pass.$salt))
- md5($salt.md5($pass).$salt)
- md5(md5(md5($pass)))
- md5(md5(md5($pass)).$salt)
- md5(md5(md5($pass).$salt1).$salt2)
@ -143,6 +146,8 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or
- HMAC-MD5 (key = $salt)
- HMAC-RIPEMD160 (key = $pass)
- HMAC-RIPEMD160 (key = $salt)
- HMAC-RIPEMD320 (key = $pass)
- HMAC-RIPEMD320 (key = $salt)
- HMAC-SHA1 (key = $pass)
- HMAC-SHA1 (key = $salt)
- HMAC-SHA256 (key = $pass)

View File

@ -425,7 +425,7 @@ _hashcat ()
local HIDDEN_FILES_AGGRESSIVE="${HIDDEN_FILES}|hcmask|hcchr"
local BUILD_IN_CHARSETS='?l ?u ?d ?a ?b ?s ?h ?H'
local SHORT_OPTS="-m -a -V -h -b -t -T -o -p -c -d -D -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z -M -Y -R"
local SHORT_OPTS="-m -a -V -h -H -b -t -T -o -p -c -d -D -w -n -u -j -k -r -g -1 -2 -3 -4 -i -I -s -l -O -S -z -M -Y -R"
local LONG_OPTS="--hash-type --attack-mode --version --help --quiet --benchmark --benchmark-all --hex-salt --hex-wordlist --hex-charset --force --status --status-json --status-timer --stdin-timeout-abort --machine-readable --loopback --markov-hcstat2 --markov-disable --markov-inverse --markov-classic --markov-threshold --runtime --session --speed-only --progress-only --restore --restore-file-path --restore-disable --outfile --outfile-format --outfile-autohex-disable --outfile-json --outfile-check-timer --outfile-check-dir --wordlist-autohex-disable --separator --show --deprecated-check-disable --left --username --dynamic-x --remove --remove-timer --potfile-disable --potfile-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --example-hashes --hash-info --backend-ignore-cuda --backend-ignore-opencl --backend-ignore-hip --backend-ignore-metal --backend-info --backend-devices --backend-devices-virtmulti --backend-devices-virthost --backend-devices-keepfree --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-disable --hwmon-temp-abort --skip --limit --keyspace --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-func-sel --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --hook-threads --increment --increment-min --increment-max --logfile-disable --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --stdout --keep-guessing --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --optimized-kernel-enable --multiply-accel-disable --self-test-disable --slow-candidates --brain-server --brain-server-timer --brain-client --brain-client-features --brain-host --brain-port --brain-session --brain-session-whitelist --brain-password --identify --bridge-parameter1 --bridge-parameter2 --bridge-parameter3 --bridge-parameter4"
local OPTIONS="-m -a -t -o -p -c -d -w -n -u -j -k -r -g -1 -2 -3 -4 -s -l --hash-type --attack-mode --status-timer --stdin-timeout-abort --markov-hcstat2 --markov-threshold --runtime --session --outfile --outfile-format --outfile-check-timer --outfile-check-dir --separator --remove-timer --potfile-path --restore-file-path --debug-mode --debug-file --induction-dir --segment-size --bitmap-min --bitmap-max --cpu-affinity --backend-devices --backend-devices-virtmulti --backend-devices-virthost --backend-devices-keepfree --opencl-device-types --backend-vector-width --workload-profile --kernel-accel --kernel-loops --kernel-threads --spin-damp --hwmon-temp-abort --skip --limit --rule-left --rule-right --rules-file --generate-rules --generate-rules-func-min --generate-rules-func-max --generate-rules-func-sel --generate-rules-seed --custom-charset1 --custom-charset2 --custom-charset3 --custom-charset4 --hook-threads --increment-min --increment-max --scrypt-tmto --keyboard-layout-mapping --truecrypt-keyfiles --veracrypt-keyfiles --veracrypt-pim-start --veracrypt-pim-stop --hccapx-message-pair --nonce-error-corrections --encoding-from --encoding-to --brain-server-timer --brain-client-features --brain-host --brain-password --brain-port --brain-session --brain-session-whitelist --bridge-parameter1 --bridge-parameter2 --bridge-parameter3 --bridge-parameter4"
@ -729,11 +729,11 @@ _hashcat ()
while [ ${h} -le ${COMP_CWORD} ]; do
if [[ "${COMP_WORDS[h]}" == "-a" ]]; then
if [[ "${COMP_WORDS[h]}" == "-a" ]]; then
attack_mode=${COMP_WORDS[$((h + 1))]}
elif [[ "${COMP_WORDS[h]}" == -a* ]]; then
elif [[ "${COMP_WORDS[h]}" == -a* ]]; then
attack_mode=${COMP_WORDS[h]:2}

View File

@ -51,7 +51,7 @@ typedef cl_int (CL_API_CALL *OCL_CLENQUEUEFILLBUFFER) (cl_comman
typedef cl_int (CL_API_CALL *OCL_CLENQUEUECOPYBUFFER) (cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
typedef void * (CL_API_CALL *OCL_CLENQUEUEMAPBUFFER) (cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event *, cl_event *, cl_int *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUENDRANGEKERNEL) (cl_command_queue, cl_kernel, cl_uint, const size_t *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUEREADBUFFER) (cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUEREADBUFFER) (cl_command_queue, cl_mem, cl_bool, size_t, size_t, void *, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUEUNMAPMEMOBJECT) (cl_command_queue, cl_mem, void *, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLENQUEUEWRITEBUFFER) (cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void *, cl_uint, const cl_event *, cl_event *);
typedef cl_int (CL_API_CALL *OCL_CLFINISH) (cl_command_queue);

View File

@ -1154,14 +1154,19 @@ typedef CUresult (CUDA_API_CALL *CUDA_CUINIT) (unsigned int);
typedef CUresult (CUDA_API_CALL *CUDA_CULAUNCHKERNEL) (CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void **, void **);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOC) (CUdeviceptr *, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMALLOCHOST) (void **, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOD) (CUdeviceptr, CUdeviceptr, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOH) (void *, CUdeviceptr, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTOD) (CUdeviceptr, const void *, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32) (CUdeviceptr, unsigned int, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8) (CUdeviceptr, unsigned char, size_t);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTODASYNC) (CUdeviceptr, CUdeviceptr, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYDTOHASYNC) (void *, CUdeviceptr, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMCPYHTODASYNC) (CUdeviceptr, const void *, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC) (CUdeviceptr, unsigned int, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC) (CUdeviceptr, unsigned char, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREE) (CUdeviceptr);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMFREEHOST) (void *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMGETINFO) (size_t *, size_t *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD32ASYNC) (CUdeviceptr, unsigned int, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMEMSETD8ASYNC) (CUdeviceptr, unsigned char, size_t, CUstream);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETFUNCTION) (CUfunction *, CUmodule, const char *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULEGETGLOBAL) (CUdeviceptr *, size_t *, CUmodule, const char *);
typedef CUresult (CUDA_API_CALL *CUDA_CUMODULELOAD) (CUmodule *, const char *);
@ -1217,14 +1222,19 @@ typedef struct hc_cuda_lib
CUDA_CULAUNCHKERNEL cuLaunchKernel;
CUDA_CUMEMALLOC cuMemAlloc;
CUDA_CUMEMALLOCHOST cuMemAllocHost;
CUDA_CUMEMCPYDTOD cuMemcpyDtoD;
CUDA_CUMEMCPYDTOH cuMemcpyDtoH;
CUDA_CUMEMCPYHTOD cuMemcpyHtoD;
CUDA_CUMEMSETD32 cuMemsetD32;
CUDA_CUMEMSETD8 cuMemsetD8;
CUDA_CUMEMCPYDTODASYNC cuMemcpyDtoDAsync;
CUDA_CUMEMCPYDTOHASYNC cuMemcpyDtoHAsync;
CUDA_CUMEMCPYHTODASYNC cuMemcpyHtoDAsync;
CUDA_CUMEMSETD32ASYNC cuMemsetD32Async;
CUDA_CUMEMSETD8ASYNC cuMemsetD8Async;
CUDA_CUMEMFREE cuMemFree;
CUDA_CUMEMFREEHOST cuMemFreeHost;
CUDA_CUMEMGETINFO cuMemGetInfo;
CUDA_CUMEMSETD32ASYNC cuMemsetD32Async;
CUDA_CUMEMSETD8ASYNC cuMemsetD8Async;
CUDA_CUMODULEGETFUNCTION cuModuleGetFunction;
CUDA_CUMODULEGETGLOBAL cuModuleGetGlobal;
CUDA_CUMODULELOAD cuModuleLoad;
@ -1272,13 +1282,18 @@ int hc_cuFuncSetAttribute (void *hashcat_ctx, CUfunction hfunc, CUfunction_
int hc_cuInit (void *hashcat_ctx, unsigned int Flags);
int hc_cuLaunchKernel (void *hashcat_ctx, CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, void **kernelParams, void **extra);
int hc_cuMemAlloc (void *hashcat_ctx, CUdeviceptr *dptr, size_t bytesize);
int hc_cuMemcpyDtoD (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyDtoH (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
int hc_cuMemcpyHtoD (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
int hc_cuMemsetD32 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N);
int hc_cuMemsetD8 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N);
int hc_cuMemcpyDtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
int hc_cuMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
int hc_cuMemcpyHtoDAsync (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr);
int hc_cuMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total);
int hc_cuMemsetD32Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
int hc_cuMemsetD8Async (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr);
int hc_cuMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total);
int hc_cuModuleGetFunction (void *hashcat_ctx, CUfunction *hfunc, CUmodule hmod, const char *name);
int hc_cuModuleGetGlobal (void *hashcat_ctx, CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
int hc_cuModuleLoadDataEx (void *hashcat_ctx, CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);

File diff suppressed because it is too large Load Diff

View File

@ -111,7 +111,7 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_devi
int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size);
int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms);
int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
#endif // __APPLE__

View File

@ -33,6 +33,7 @@ bool overflow_check_u64_add (const u64 a, const u64 b);
bool overflow_check_u64_mul (const u64 a, const u64 b);
bool is_power_of_2 (const u32 v);
u32 smallest_repeat_double (const u32 v);
u32 get_random_num (const u32 min, const u32 max);

View File

@ -115,6 +115,7 @@ typedef enum event_identifier
EVENT_BRIDGES_SALT_POST = 0x00000122,
EVENT_BRIDGES_SALT_PRE = 0x00000123,
EVENT_CALCULATED_WORDS_BASE = 0x00000020,
EVENT_CALCULATED_WORDS_CNT = 0x00000021,
EVENT_CRACKER_FINISHED = 0x00000030,
EVENT_CRACKER_HASH_CRACKED = 0x00000031,
EVENT_CRACKER_STARTING = 0x00000032,
@ -424,6 +425,9 @@ typedef enum opti_type
OPTI_TYPE_REGISTER_LIMIT = (1 << 20), // We'll limit the register count to 128
OPTI_TYPE_SLOW_HASH_SIMD_INIT2 = (1 << 21),
OPTI_TYPE_SLOW_HASH_SIMD_LOOP2 = (1 << 22),
OPTI_TYPE_SLOW_HASH_DIMY_INIT = (1 << 23),
OPTI_TYPE_SLOW_HASH_DIMY_LOOP = (1 << 24),
OPTI_TYPE_SLOW_HASH_DIMY_COMP = (1 << 25),
} opti_type_t;
@ -488,14 +492,17 @@ typedef enum opts_type
OPTS_TYPE_DYNAMIC_SHARED = (1ULL << 53), // use dynamic shared memory (note: needs special kernel changes)
OPTS_TYPE_SELF_TEST_DISABLE = (1ULL << 54), // some algos use JiT in combinations with a salt or create too much startup time
OPTS_TYPE_MP_MULTI_DISABLE = (1ULL << 55), // do not multiply the kernel-accel with the multiprocessor count per device to allow more fine-tuned workload settings
OPTS_TYPE_NATIVE_THREADS = (1ULL << 56), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
OPTS_TYPE_MAXIMUM_THREADS = (1ULL << 57), // disable else branch in pre-compilation thread count optimization setting
OPTS_TYPE_POST_AMP_UTF16LE = (1ULL << 58), // run the utf8 to utf16le conversion kernel after they have been processed from amplifiers
OPTS_TYPE_THREAD_MULTI_DISABLE // do not multiply the kernel-power with the thread count per device for super slow algos
= (1ULL << 56),
OPTS_TYPE_NATIVE_THREADS = (1ULL << 57), // forces "native" thread count: CPU=1, GPU-Intel=8, GPU-AMD=64 (wavefront), GPU-NV=32 (warps)
OPTS_TYPE_MAXIMUM_THREADS = (1ULL << 58), // disable else branch in pre-compilation thread count optimization setting
OPTS_TYPE_POST_AMP_UTF16LE = (1ULL << 59), // run the utf8 to utf16le conversion kernel after they have been processed from amplifiers
OPTS_TYPE_AUTODETECT_DISABLE
= (1ULL << 59), // skip autodetect engine
OPTS_TYPE_STOCK_MODULE = (1ULL << 60), // module included with hashcat default distribution
= (1ULL << 60), // skip autodetect engine
OPTS_TYPE_STOCK_MODULE = (1ULL << 61), // module included with hashcat default distribution
OPTS_TYPE_MULTIHASH_DESPITE_ESALT
= (1ULL << 61), // overrule multihash cracking check same salt but not same esalt
= (1ULL << 62), // overrule multihash cracking check same salt but not same esalt
OPTS_TYPE_MAXIMUM_ACCEL = (1ULL << 63) // try to maximize kernel-accel during autotune
} opts_type_t;
@ -539,6 +546,7 @@ typedef enum dgst_size
DGST_SIZE_4_6 = (6 * sizeof (u32)), // 24
DGST_SIZE_4_7 = (7 * sizeof (u32)), // 28
DGST_SIZE_4_8 = (8 * sizeof (u32)), // 32
DGST_SIZE_4_10 = (10 * sizeof (u32)), // 40
DGST_SIZE_4_16 = (16 * sizeof (u32)), // 64 !!!
DGST_SIZE_4_32 = (32 * sizeof (u32)), // 128 !!!
DGST_SIZE_4_64 = (64 * sizeof (u32)), // 256
@ -710,7 +718,7 @@ typedef enum user_options_defaults
#else
HWMON_TEMP_ABORT = 90,
#endif
HASH_INFO = false,
HASH_INFO = 0,
HASH_MODE = 0,
HCCAPX_MESSAGE_PAIR = 0,
HEX_CHARSET = false,
@ -726,6 +734,7 @@ typedef enum user_options_defaults
KERNEL_LOOPS = 0,
KERNEL_THREADS = 0,
KEYSPACE = false,
TOTAL_CANDIDATES = false,
LEFT = false,
LIMIT = 0,
LOGFILE = true,
@ -827,13 +836,17 @@ typedef enum user_options_map
IDX_CUSTOM_CHARSET_2 = '2',
IDX_CUSTOM_CHARSET_3 = '3',
IDX_CUSTOM_CHARSET_4 = '4',
IDX_CUSTOM_CHARSET_5 = '5',
IDX_CUSTOM_CHARSET_6 = '6',
IDX_CUSTOM_CHARSET_7 = '7',
IDX_CUSTOM_CHARSET_8 = '8',
IDX_DEBUG_FILE = 0xff12,
IDX_DEBUG_MODE = 0xff13,
IDX_DEPRECATED_CHECK_DISABLE = 0xff14,
IDX_DYNAMIC_X = 0xff55,
IDX_ENCODING_FROM = 0xff15,
IDX_ENCODING_TO = 0xff16,
IDX_HASH_INFO = 0xff17,
IDX_HASH_INFO = 'H', // 0xff17
IDX_FORCE = 0xff18,
IDX_HWMON_DISABLE = 0xff19,
IDX_HWMON_TEMP_ABORT = 0xff1a,
@ -909,6 +922,7 @@ typedef enum user_options_map
IDX_STATUS_TIMER = 0xff4c,
IDX_STDOUT_FLAG = 0xff4d,
IDX_STDIN_TIMEOUT_ABORT = 0xff4e,
IDX_TOTAL_CANDIDATES = 0xff58,
IDX_TRUECRYPT_KEYFILES = 0xff4f,
IDX_USERNAME = 0xff50,
IDX_VERACRYPT_KEYFILES = 0xff51,
@ -1387,6 +1401,8 @@ typedef struct hc_device_param
u32 kernel_threads_min;
u32 kernel_threads_max;
bool overtune_unfriendly; // whatever sets this decide we operate in a mode that is not allowing to overtune threads_max or accel_max in autotuner
u64 kernel_power;
u64 hardware_power;
@ -1519,6 +1535,7 @@ typedef struct hc_device_param
bool has_lop3;
bool has_mov64;
bool has_prmt;
bool has_shfw;
double spin_damp;
@ -2209,6 +2226,8 @@ typedef struct outfile_ctx
char *filename;
hc_thread_mutex_t mux_outfile;
} outfile_ctx_t;
typedef struct pot
@ -2433,13 +2452,13 @@ typedef struct user_options
bool deprecated_check;
bool dynamic_x;
bool hwmon;
bool hash_info;
bool hex_charset;
bool hex_salt;
bool hex_wordlist;
bool increment;
bool keep_guessing;
bool keyspace;
bool total_candidates;
bool left;
bool logfile;
bool loopback;
@ -2484,7 +2503,6 @@ typedef struct user_options
char *bridge_parameter3;
char *bridge_parameter4;
char *cpu_affinity;
char *custom_charset_4;
char *debug_file;
char *induction_dir;
char *keyboard_layout_mapping;
@ -2503,6 +2521,11 @@ typedef struct user_options
const char *custom_charset_1;
const char *custom_charset_2;
const char *custom_charset_3;
const char *custom_charset_4;
const char *custom_charset_5;
const char *custom_charset_6;
const char *custom_charset_7;
const char *custom_charset_8;
const char *encoding_from;
const char *encoding_to;
const char *rule_buf_l;
@ -2526,6 +2549,7 @@ typedef struct user_options
#endif
u32 debug_mode;
u32 hwmon_temp_abort;
u32 hash_info;
int hash_mode;
u32 hccapx_message_pair;
u32 hook_threads;

View File

@ -358,7 +358,7 @@ LFLAGS_NATIVE += -lpthread
endif # NetBSD
ifeq ($(UNAME),Darwin)
export MACOSX_DEPLOYMENT_TARGET=10.15
export MACOSX_DEPLOYMENT_TARGET=15.0
CFLAGS_NATIVE := $(CFLAGS)
CFLAGS_NATIVE += -DWITH_HWMON

View File

@ -43,7 +43,8 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
device_param->kernel_param.loop_cnt = kernel_loops; // not a bug, both need to be set
device_param->kernel_param.il_cnt = kernel_loops; // because there's two variables for inner iters for slow and fast hashes
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads;
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors)
* ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads);
u32 kernel_power_try = hardware_power * kernel_accel;
@ -98,6 +99,7 @@ static double try_run_times (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi
static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
{
const hashes_t *hashes = hashcat_ctx->hashes;
const hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
const backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
const straight_ctx_t *straight_ctx = hashcat_ctx->straight_ctx;
@ -132,7 +134,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
device_param->kernel_accel = kernel_accel_min;
device_param->kernel_loops = kernel_loops_min;
device_param->kernel_threads = kernel_threads_min;
device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads_min;
device_param->hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors)
* ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads_min);
device_param->kernel_power = device_param->hardware_power * kernel_accel_min;
}
@ -211,7 +214,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
// from here it's clear we are allowed to autotune
// so let's init some fake words
const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * kernel_threads_max;
const u32 hardware_power_max = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors)
* ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : kernel_threads_max);
u32 kernel_power_max = hardware_power_max * kernel_accel_max;
@ -264,12 +268,12 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyDtoDAsync (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->cuda_stream) == -1) return -1;
if (hc_cuMemcpyDtoD (hashcat_ctx, device_param->cuda_d_rules_c, device_param->cuda_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipMemcpyDtoDAsync (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), device_param->hip_stream) == -1) return -1;
if (hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t)) == -1) return -1;
}
#if defined (__APPLE__)
@ -297,13 +301,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
const u32 kernel_threads_sav = device_param->kernel_threads;
device_param->kernel_threads = device_param->kernel_wgs1;
device_param->kernel_threads = MIN (device_param->kernel_wgs1, kernel_threads_max);
run_kernel (hashcat_ctx, device_param, KERN_RUN_1, 0, kernel_power_max, false, 0, true);
if (hashconfig->opts_type & OPTS_TYPE_LOOP_PREPARE)
{
device_param->kernel_threads = device_param->kernel_wgs2p;
device_param->kernel_threads = MIN (device_param->kernel_wgs2p, kernel_threads_max);
run_kernel (hashcat_ctx, device_param, KERN_RUN_2P, 0, kernel_power_max, false, 0, true);
}
@ -328,18 +332,122 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
}
// v7 autotuner is a lot more straight forward
// we start with some purely theoretical values as a base, then move on to some meassured tests
for (u32 kernel_loops_test = kernel_loops_min; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL)
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_test, kernel_threads_min, 2);
if (kernel_accel_min < kernel_accel_max)
{
// let's also do some minimal accel, this is only to improve early meassurements taken with try_run()
//printf ("loop %f %u %u %u\n", exec_msec, kernel_accel_min, kernel_loops_test, kernel_threads_min);
const u32 kernel_accel_start = previous_power_of_two (kernel_accel_max / 8);
if ((kernel_accel_start >= kernel_accel_min) && (kernel_accel_start <= kernel_accel_max))
{
kernel_accel = kernel_accel_start;
}
}
}
if (kernel_threads_min < kernel_threads_max)
{
// there could be a situation, like in 18600, where we have a thread_min which is not a multiple of
// kernel_preferred_wgs_multiple. As long as it's only a threads_min, but not a threads_max, we
// should stick to at least kernel_preferred_wgs_multiple
if (kernel_threads_min % device_param->kernel_preferred_wgs_multiple)
{
if ((device_param->kernel_preferred_wgs_multiple >= kernel_threads_min) && (device_param->kernel_preferred_wgs_multiple <= kernel_threads_max))
{
kernel_threads = device_param->kernel_preferred_wgs_multiple;
}
}
}
if (hashconfig->attack_exec == ATTACK_EXEC_OUTSIDE_KERNEL)
{
if (hashes && hashes->salts_buf)
{
u32 start = kernel_loops_max;
const u32 salt_iter = hashes->salts_buf->salt_iter; // we use the first salt as reference
if (salt_iter)
{
start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter));
start = MIN (start, smallest_repeat_double (hashes->salts_buf->salt_iter + 1));
if (((hashes->salts_buf->salt_iter + 0) % 125) == 0) start = MIN (start, 125);
if (((hashes->salts_buf->salt_iter + 1) % 125) == 0) start = MIN (start, 125);
if ((start >= kernel_loops_min) && (start <= kernel_loops_max))
{
kernel_loops = start;
}
}
else
{
// how can there be a slow hash with no iterations?
}
}
}
else
{
// let's also do some minimal loops, this is only to improve early meassurements taken with try_run()
const u32 kernel_loops_start = previous_power_of_two (kernel_loops_max / 4);
if ((kernel_loops_start >= kernel_loops_min) && (kernel_loops_start <= kernel_loops_max))
{
kernel_loops = kernel_loops_start;
}
}
if (1)
{
// some algorithm start ways to high with these theoretical preset (for instance, 8700)
// so much that they can't be tuned anymore
while ((kernel_accel > kernel_accel_min) || (kernel_threads > kernel_threads_min) || (kernel_loops > kernel_loops_min))
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2);
if (exec_msec < target_msec / 16) break;
if (kernel_accel > kernel_accel_min)
{
kernel_accel = MAX (kernel_accel / 2, kernel_accel_min);
continue;
}
if (kernel_threads > kernel_threads_min)
{
kernel_threads = MAX (kernel_threads / 2, kernel_threads_min);
continue;
}
if (kernel_loops > kernel_loops_min)
{
kernel_loops = MAX (kernel_loops / 2, kernel_loops_min);
continue;
}
}
}
for (u32 kernel_loops_test = kernel_loops; kernel_loops_test <= kernel_loops_max; kernel_loops_test <<= 1)
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops_test, kernel_threads, 2);
//printf ("loop %f %u %u %u\n", exec_msec, kernel_accel, kernel_loops_test, kernel_threads);
if (exec_msec > target_msec) break;
// we want a little room for threads to play with so not full target_msec
// but of course only if we are going to make use of that :)
if ((kernel_accel_min < kernel_accel_max) || (kernel_threads_min < kernel_threads_max))
if ((kernel_accel < kernel_accel_max) || (kernel_threads < kernel_threads_max))
{
if (exec_msec > target_msec / 8) break;
@ -353,21 +461,46 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
kernel_loops = kernel_loops_test;
}
for (u32 kernel_threads_test = kernel_threads_min; kernel_threads_test <= kernel_threads_max; kernel_threads_test <<= 1)
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel_min, kernel_loops, kernel_threads_test, 2);
double exec_msec_init = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads, 2);
//printf ("threads %f %u %u %u\n", exec_msec, kernel_accel_min, kernel_loops, kernel_threads_test);
float threads_eff_best = exec_msec_init / kernel_threads;
u32 threads_cnt_best = kernel_threads;
float threads_eff_prev = 0;
u32 threads_cnt_prev = 0;
for (u32 kernel_threads_test = kernel_threads; kernel_threads_test <= kernel_threads_max; kernel_threads_test = (kernel_threads_test < device_param->kernel_preferred_wgs_multiple) ? kernel_threads_test << 1 : kernel_threads_test + device_param->kernel_preferred_wgs_multiple)
{
double exec_msec = try_run_times (hashcat_ctx, device_param, kernel_accel, kernel_loops, kernel_threads_test, 2);
//printf ("thread %f %u %u %u\n", exec_msec, kernel_accel, kernel_loops, kernel_threads_test);
if (exec_msec > target_msec) break;
if (kernel_threads >= 32)
{
// we want a little room for accel to play with so not full target_msec
if (exec_msec > target_msec / 8) break;
if (exec_msec > target_msec / 4) break;
}
kernel_threads = kernel_threads_test;
threads_eff_prev = exec_msec / kernel_threads_test;
threads_cnt_prev = kernel_threads_test;
//printf ("%f\n", threads_eff_prev);
if (threads_eff_prev < threads_eff_best)
{
threads_eff_best = threads_eff_prev;
threads_cnt_best = threads_cnt_prev;
}
}
// now we decide to choose either maximum or in some extreme cases prefer more efficient ones
if ((threads_eff_best * 1.06) < threads_eff_prev)
{
kernel_threads = threads_cnt_best;
}
#define STEPS_CNT 12
@ -401,20 +534,21 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (kernel_accel > kernel_accel_max) kernel_accel = kernel_accel_max;
}
if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
// overtune section. relevant if we have strange numbers from the APIs, namely 96, 384, and such
// this is a dangerous action, and we set conditions somewhere in the code to disable this
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
}
// some final play, if we have strange numbers from the APIs, namely 96, 384, and such
if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max))
if ((kernel_accel_min == kernel_accel_max) || (kernel_threads_min == kernel_threads_max) || (device_param->overtune_unfriendly == true))
{
}
else
{
if (kernel_accel > 64) kernel_accel -= kernel_accel % 32;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
if (kernel_accel > device_param->device_processors) kernel_accel -= kernel_accel % device_param->device_processors;
}
u32 fun[2];
if (is_power_of_2 (kernel_threads) == false)
@ -539,7 +673,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
device_param->kernel_loops = kernel_loops;
device_param->kernel_threads = kernel_threads;
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors) * device_param->kernel_threads;
const u32 hardware_power = ((hashconfig->opts_type & OPTS_TYPE_MP_MULTI_DISABLE) ? 1 : device_param->device_processors)
* ((hashconfig->opts_type & OPTS_TYPE_THREAD_MULTI_DISABLE) ? 1 : device_param->kernel_threads);
device_param->hardware_power = hardware_power;
@ -578,7 +713,7 @@ HC_API_CALL void *thread_autotune (void *p)
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
}
// check for autotune failure
@ -594,10 +729,6 @@ HC_API_CALL void *thread_autotune (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
return NULL;
}

File diff suppressed because it is too large Load Diff

View File

@ -81,8 +81,8 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;
@ -110,6 +110,8 @@ int bitmap_ctx_init (hashcat_ctx_t *hashcat_ctx)
u32 *bitmap_s2_c = (u32 *) hcmalloc ((1U << bitmap_max) * sizeof (u32));
u32 *bitmap_s2_d = (u32 *) hcmalloc ((1U << bitmap_max) * sizeof (u32));
if (!bitmap_s1_a || !bitmap_s1_b || !bitmap_s1_c || !bitmap_s1_d || !bitmap_s2_a || !bitmap_s2_b || !bitmap_s2_c || !bitmap_s2_d) return -1;
u32 bitmap_bits;
u32 bitmap_nums;
u32 bitmap_mask;

View File

@ -345,6 +345,34 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
XXH64_update (state, custom_charset_4, strlen (custom_charset_4));
}
if (user_options->custom_charset_5)
{
const char *custom_charset_5 = user_options->custom_charset_5;
XXH64_update (state, custom_charset_5, strlen (custom_charset_5));
}
if (user_options->custom_charset_6)
{
const char *custom_charset_6 = user_options->custom_charset_6;
XXH64_update (state, custom_charset_6, strlen (custom_charset_6));
}
if (user_options->custom_charset_7)
{
const char *custom_charset_7 = user_options->custom_charset_7;
XXH64_update (state, custom_charset_7, strlen (custom_charset_7));
}
if (user_options->custom_charset_8)
{
const char *custom_charset_8 = user_options->custom_charset_8;
XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
}
}
else if (user_options->attack_mode == ATTACK_MODE_HYBRID1)
{
@ -405,6 +433,34 @@ u32 brain_compute_attack (hashcat_ctx_t *hashcat_ctx)
XXH64_update (state, custom_charset_4, strlen (custom_charset_4));
}
if (user_options->custom_charset_5)
{
const char *custom_charset_5 = user_options->custom_charset_5;
XXH64_update (state, custom_charset_5, strlen (custom_charset_5));
}
if (user_options->custom_charset_6)
{
const char *custom_charset_6 = user_options->custom_charset_6;
XXH64_update (state, custom_charset_6, strlen (custom_charset_6));
}
if (user_options->custom_charset_7)
{
const char *custom_charset_7 = user_options->custom_charset_7;
XXH64_update (state, custom_charset_7, strlen (custom_charset_7));
}
if (user_options->custom_charset_8)
{
const char *custom_charset_8 = user_options->custom_charset_8;
XXH64_update (state, custom_charset_8, strlen (custom_charset_8));
}
const int hex_wordlist = user_options->hex_wordlist;
XXH64_update (state, &hex_wordlist, sizeof (hex_wordlist));

View File

@ -87,12 +87,12 @@ bool bridges_init (hashcat_ctx_t *hashcat_ctx)
user_options_t *user_options = hashcat_ctx->user_options;
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
if (user_options->hash_info == true) return true;
if (user_options->backend_info > 0) return true;
if (user_options->hash_info > 0) return true;
if (user_options->usage > 0) return true;
if (user_options->left == true) return true;
if (user_options->show == true) return true;
if (user_options->usage > 0) return true;
if (user_options->version == true) return true;
if (user_options->backend_info > 0) return true;
// There is a problem here. At this point, hashconfig is not yet initialized.
// This is because initializing hashconfig requires the module to be loaded,
@ -241,12 +241,12 @@ bool bridges_salt_prepare (hashcat_ctx_t *hashcat_ctx)
hashes_t *hashes = hashcat_ctx->hashes;
user_options_t *user_options = hashcat_ctx->user_options;
if (user_options->hash_info == true) return true;
if (user_options->backend_info > 0) return true;
if (user_options->hash_info > 0) return true;
if (user_options->usage > 0) return true;
if (user_options->left == true) return true;
if (user_options->show == true) return true;
if (user_options->usage > 0) return true;
if (user_options->version == true) return true;
if (user_options->backend_info > 0) return true;
if (bridge_ctx->enabled == false) return true;

View File

@ -21,8 +21,8 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;
if (user_options->version == true) return 0;

View File

@ -17,8 +17,8 @@ int cpt_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;

View File

@ -118,9 +118,9 @@ int debugfile_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->benchmark == true) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;

View File

@ -58,9 +58,9 @@ int dictstat_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->benchmark == true) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;

View File

@ -381,7 +381,7 @@ HC_API_CALL void *thread_calc_stdin (void *p)
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
}
if (calc_stdin (hashcat_ctx, device_param) == -1)
@ -396,11 +396,6 @@ HC_API_CALL void *thread_calc_stdin (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
if (bridge_ctx->enabled == true)
{
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)
@ -1685,7 +1680,7 @@ HC_API_CALL void *thread_calc (void *p)
if (device_param->is_hip == true)
{
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
if (hc_hipSetDevice (hashcat_ctx, device_param->hip_device) == -1) return NULL;
}
if (calc (hashcat_ctx, device_param) == -1)
@ -1700,11 +1695,6 @@ HC_API_CALL void *thread_calc (void *p)
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
}
if (device_param->is_hip == true)
{
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
}
if (bridge_ctx->enabled == true)
{
if (bridge_ctx->thread_term != BRIDGE_DEFAULT)

View File

@ -87,14 +87,19 @@ int cuda_init (void *hashcat_ctx)
HC_LOAD_FUNC_CUDA (cuda, cuLaunchKernel, cuLaunchKernel, CUDA_CULAUNCHKERNEL, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemAlloc, cuMemAlloc_v2, CUDA_CUMEMALLOC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemAllocHost, cuMemAllocHost_v2, CUDA_CUMEMALLOCHOST, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoD, cuMemcpyDtoD_v2, CUDA_CUMEMCPYDTOD, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoH, cuMemcpyDtoH_v2, CUDA_CUMEMCPYDTOH, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoD, cuMemcpyHtoD_v2, CUDA_CUMEMCPYHTOD, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32, cuMemsetD32, CUDA_CUMEMSETD32, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8, cuMemsetD8, CUDA_CUMEMSETD8, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoDAsync, cuMemcpyDtoDAsync_v2, CUDA_CUMEMCPYDTODASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyDtoHAsync, cuMemcpyDtoHAsync_v2, CUDA_CUMEMCPYDTOHASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemcpyHtoDAsync, cuMemcpyHtoDAsync_v2, CUDA_CUMEMCPYHTODASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async, cuMemsetD32Async, CUDA_CUMEMSETD32ASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async, cuMemsetD8Async, CUDA_CUMEMSETD8ASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemFree, cuMemFree_v2, CUDA_CUMEMFREE, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemFreeHost, cuMemFreeHost, CUDA_CUMEMFREEHOST, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemGetInfo, cuMemGetInfo_v2, CUDA_CUMEMGETINFO, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD32Async, cuMemsetD32Async, CUDA_CUMEMSETD32ASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuMemsetD8Async, cuMemsetD8Async, CUDA_CUMEMSETD8ASYNC, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuModuleGetFunction, cuModuleGetFunction, CUDA_CUMODULEGETFUNCTION, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuModuleGetGlobal, cuModuleGetGlobal_v2, CUDA_CUMODULEGETGLOBAL, CUDA, 1);
HC_LOAD_FUNC_CUDA (cuda, cuModuleLoad, cuModuleLoad, CUDA_CUMODULELOAD, CUDA, 1);
@ -517,6 +522,142 @@ int hc_cuMemFree (void *hashcat_ctx, CUdeviceptr dptr)
return 0;
}
int hc_cuMemcpyDtoH (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemcpyDtoH (dstHost, srcDevice, ByteCount);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemcpyDtoH(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuMemcpyDtoD (void *hashcat_ctx, CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemcpyDtoD (dstDevice, srcDevice, ByteCount);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemcpyDtoD(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuMemcpyHtoD (void *hashcat_ctx, CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemcpyHtoD (dstDevice, srcHost, ByteCount);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemcpyHtoD(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuMemsetD32 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned int ui, size_t N)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemsetD32 (dstDevice, ui, N);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemsetD32(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemsetD32(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuMemsetD8 (void *hashcat_ctx, CUdeviceptr dstDevice, unsigned char uc, size_t N)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
CUDA_PTR *cuda = (CUDA_PTR *) backend_ctx->cuda;
const CUresult CU_err = cuda->cuMemsetD8 (dstDevice, uc, N);
if (CU_err != CUDA_SUCCESS)
{
const char *pStr = NULL;
if (cuda->cuGetErrorString (CU_err, &pStr) == CUDA_SUCCESS)
{
event_log_error (hashcat_ctx, "cuMemsetD8(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "cuMemsetD8(): %d", CU_err);
}
return -1;
}
return 0;
}
int hc_cuMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;

View File

@ -115,47 +115,55 @@ int hip_init (void *hashcat_ctx)
// a good reference is cuda.h itself
// this needs to be verified for each new cuda release
HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreateWithFlags, HIP_HIPEVENTCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, HIP_HIPMEMCPYDTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, HIP_HIPMEMCPYDTOHASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async, hipMemsetD32Async, HIP_HIPMEMSETD32ASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async, hipMemsetD8Async, HIP_HIPMEMSETD8ASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxCreate, hipCtxCreate, HIP_HIPCTXCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxDestroy, hipCtxDestroy, HIP_HIPCTXDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxPopCurrent, hipCtxPopCurrent, HIP_HIPCTXPOPCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxPushCurrent, hipCtxPushCurrent, HIP_HIPCTXPUSHCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxSetCurrent, hipCtxSetCurrent, HIP_HIPCTXSETCURRENT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipCtxSynchronize, hipCtxSynchronize, HIP_HIPCTXSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGet, hipDeviceGet, HIP_HIPDEVICEGET, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetAttribute, hipDeviceGetAttribute, HIP_HIPDEVICEGETATTRIBUTE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetCount, hipGetDeviceCount, HIP_HIPDEVICEGETCOUNT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceGetName, hipDeviceGetName, HIP_HIPDEVICEGETNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDeviceTotalMem, hipDeviceTotalMem, HIP_HIPDEVICETOTALMEM, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipDriverGetVersion, hipDriverGetVersion, HIP_HIPDRIVERGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventCreate, hipEventCreate, HIP_HIPEVENTCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventCreateWithFlags, hipEventCreateWithFlags, HIP_HIPEVENTCREATEWITHFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventDestroy, hipEventDestroy, HIP_HIPEVENTDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventElapsedTime, hipEventElapsedTime, HIP_HIPEVENTELAPSEDTIME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventRecord, hipEventRecord, HIP_HIPEVENTRECORD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipEventSynchronize, hipEventSynchronize, HIP_HIPEVENTSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipFuncGetAttribute, hipFuncGetAttribute, HIP_HIPFUNCGETATTRIBUTE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorName, hipDrvGetErrorName, HIP_HIPGETERRORNAME, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetErrorString, hipDrvGetErrorString, HIP_HIPGETERRORSTRING, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipInit, hipInit, HIP_HIPINIT, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipLaunchKernel, hipModuleLaunchKernel, HIP_HIPLAUNCHKERNEL, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemAlloc, hipMalloc, HIP_HIPMEMALLOC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemFree, hipFree, HIP_HIPMEMFREE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemGetInfo, hipMemGetInfo, HIP_HIPMEMGETINFO, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoD, hipMemcpyDtoD, HIP_HIPMEMCPYDTOD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoH, hipMemcpyDtoH, HIP_HIPMEMCPYDTOH, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoD, hipMemcpyHtoD, HIP_HIPMEMCPYHTOD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD32, hipMemsetD32, HIP_HIPMEMSETD32, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD8, hipMemsetD8, HIP_HIPMEMSETD8, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoDAsync, hipMemcpyDtoDAsync, HIP_HIPMEMCPYDTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyDtoHAsync, hipMemcpyDtoHAsync, HIP_HIPMEMCPYDTOHASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemcpyHtoDAsync, hipMemcpyHtoDAsync, HIP_HIPMEMCPYHTODASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD32Async, hipMemsetD32Async, HIP_HIPMEMSETD32ASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipMemsetD8Async, hipMemsetD8Async, HIP_HIPMEMSETD8ASYNC, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleGetFunction, hipModuleGetFunction, HIP_HIPMODULEGETFUNCTION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleGetGlobal, hipModuleGetGlobal, HIP_HIPMODULEGETGLOBAL, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleLoadDataEx, hipModuleLoadDataEx, HIP_HIPMODULELOADDATAEX, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleUnload, hipModuleUnload, HIP_HIPMODULEUNLOAD, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipRuntimeGetVersion, hipRuntimeGetVersion, HIP_HIPRUNTIMEGETVERSION, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipSetDevice, hipSetDevice, HIP_HIPSETDEVICE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipSetDeviceFlags, hipSetDeviceFlags, HIP_HIPSETDEVICEFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamCreate, hipStreamCreate, HIP_HIPSTREAMCREATE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamCreateWithFlags, hipStreamCreateWithFlags, HIP_HIPSTREAMCREATEWITHFLAGS, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamDestroy, hipStreamDestroy, HIP_HIPSTREAMDESTROY, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipStreamSynchronize, hipStreamSynchronize, HIP_HIPSTREAMSYNCHRONIZE, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipGetDeviceProperties, hipGetDevicePropertiesR0600, HIP_HIPGETDEVICEPROPERTIES, HIP, 1);
HC_LOAD_FUNC_HIP (hip, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, hipModuleOccupancyMaxActiveBlocksPerMultiprocessor, HIP_HIPMODULEOCCUPANCYMAXACTIVEBLOCKSPERMULTIPROCESSOR, HIP, 1);
return 0;
}
@ -503,13 +511,13 @@ int hc_hipDriverGetVersion (void *hashcat_ctx, int *driverVersion)
return 0;
}
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flags)
int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipEventCreate (phEvent, Flags);
const hipError_t HIP_err = hip->hipEventCreate (phEvent);
if (HIP_err != hipSuccess)
{
@ -530,6 +538,33 @@ int hc_hipEventCreate (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int Flag
return 0;
}
int hc_hipEventCreateWithFlags (void *hashcat_ctx, hipEvent_t *phEvent, unsigned int flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipEventCreateWithFlags (phEvent, flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipEventCreateWithFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipEventDestroy (void *hashcat_ctx, hipEvent_t hEvent)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
@ -800,6 +835,143 @@ int hc_hipMemGetInfo (void *hashcat_ctx, size_t *free, size_t *total)
return 0;
}
int hc_hipMemcpyDtoH (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipMemcpyDtoH (dstHost, srcDevice, ByteCount);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyDtoH(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemcpyDtoD (void *hashcat_ctx, hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipMemcpyDtoD (dstDevice, srcDevice, ByteCount);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyDtoD(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemcpyHtoD (void *hashcat_ctx, hipDeviceptr_t dstDevice, const void *srcHost, size_t ByteCount)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipMemcpyHtoD (dstDevice, srcHost, ByteCount);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemcpyHtoD(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemsetD32 (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned int ui, size_t N)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipMemsetD32 (dstDevice, ui, N);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipMemsetD32(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemsetD32(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemsetD8 (void *hashcat_ctx, hipDeviceptr_t dstDevice, unsigned char uc, size_t N)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipMemsetD8 (dstDevice, uc, N);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipMemsetD8(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipMemsetD8(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipMemcpyDtoHAsync (void *hashcat_ctx, void *dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, hipStream_t hStream)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
@ -1070,13 +1242,67 @@ int hc_hipRuntimeGetVersion (void *hashcat_ctx, int *runtimeVersion)
return 0;
}
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
int hc_hipSetDevice (void *hashcat_ctx, hipDevice_t dev)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreate (phStream, Flags);
const hipError_t HIP_err = hip->hipSetDevice (dev);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipSetDevice(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipSetDevice(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipSetDeviceFlags (void *hashcat_ctx, unsigned int flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipSetDeviceFlags (flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipSetDeviceFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreate (phStream);
if (HIP_err != hipSuccess)
{
@ -1097,6 +1323,33 @@ int hc_hipStreamCreate (void *hashcat_ctx, hipStream_t *phStream, unsigned int F
return 0;
}
int hc_hipStreamCreateWithFlags (void *hashcat_ctx, hipStream_t *phStream, unsigned int Flags)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;
HIP_PTR *hip = (HIP_PTR *) backend_ctx->hip;
const hipError_t HIP_err = hip->hipStreamCreateWithFlags (phStream, Flags);
if (HIP_err != hipSuccess)
{
const char *pStr = NULL;
if (hip->hipGetErrorString (HIP_err, &pStr) == hipSuccess)
{
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %s", pStr);
}
else
{
event_log_error (hashcat_ctx, "hipStreamCreateWithFlags(): %d", HIP_err);
}
return -1;
}
return 0;
}
int hc_hipStreamDestroy (void *hashcat_ctx, hipStream_t hStream)
{
backend_ctx_t *backend_ctx = ((hashcat_ctx_t *) hashcat_ctx)->backend_ctx;

View File

@ -195,11 +195,14 @@ static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_option
}
// if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage
if (include_path != nil)
{
NSString *path_key = @"INCLUDE_PATH";
NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding];
// Include path may contain spaces, escape them with a backslash
path_value = [path_value stringByReplacingOccurrencesOfString:@" " withString:@"\\ "];
[build_options_dict setObject:path_value forKey:path_key];
@ -743,6 +746,7 @@ int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_libra
dispatch_queue_t queue = dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
// if no user-defined runtime, set to METAL_COMPILER_RUNTIME
long timeout = (user_options->metal_compiler_runtime > 0) ? user_options->metal_compiler_runtime : METAL_COMPILER_RUNTIME;
dispatch_time_t when = dispatch_time (DISPATCH_TIME_NOW,NSEC_PER_SEC * timeout);
@ -1314,10 +1318,21 @@ int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_com
return 0;
}
int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, size_t global_work_size, size_t local_work_size, double *ms)
int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms)
{
MTLSize numThreadgroups = {local_work_size, 1, 1};
MTLSize threadsGroup = {global_work_size, 1, 1};
MTLSize threadsPerThreadgroup =
{
local_work_size[0],
local_work_size[1],
local_work_size[2]
};
MTLSize threadgroupsPerGrid =
{
(global_work_size[0] + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width,
work_dim > 1 ? (global_work_size[1] + threadsPerThreadgroup.height - 1) / threadsPerThreadgroup.height : 1,
work_dim > 2 ? (global_work_size[2] + threadsPerThreadgroup.depth - 1) / threadsPerThreadgroup.depth : 1
};
if (metal_command_encoder == nil)
{
@ -1333,7 +1348,7 @@ int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_com
return -1;
}
[metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups];
[metal_command_encoder dispatchThreadgroups: threadgroupsPerGrid threadsPerThreadgroup: threadsPerThreadgroup];
[metal_command_encoder endEncoding];
[metal_command_buffer commit];
@ -1377,17 +1392,22 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device,
if (k_string != nil)
{
id <MTLLibrary> r = [metal_device newLibraryWithFile: k_string error: &error];
NSURL *libURL = [NSURL fileURLWithPath: k_string];
if (error != nil)
if (libURL != nil)
{
event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
return -1;
id <MTLLibrary> r = [metal_device newLibraryWithURL: libURL error:&error];
if (error != nil)
{
event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]);
return -1;
}
*metal_library = r;
return 0;
}
*metal_library = r;
return 0;
}
return -1;
@ -1420,10 +1440,17 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device
}
compileOptions.preprocessorMacros = build_options_dict;
/*
compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize;
compileOptions.mathMode = MTLMathModeSafe;
// compileOptions.mathMode = MTLMathModeRelaxed;
// compileOptions.enableLogging = true;
// compileOptions.fastMathEnabled = false;
*/
}
// todo: detect current os version and choose the right
// compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
// compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3;
/*
if (@available(macOS 12.0, *))
{

View File

@ -132,6 +132,7 @@ static int inner2_loop (hashcat_ctx_t *hashcat_ctx)
status_ctx->words_base = status_ctx->words_cnt / amplifier_cnt;
EVENT (EVENT_CALCULATED_WORDS_BASE);
EVENT (EVENT_CALCULATED_WORDS_CNT);
if (user_options->keyspace == true)
{
@ -1481,6 +1482,8 @@ bool autodetect_hashmode_test (hashcat_ctx_t *hashcat_ctx)
{
char *input_buf = user_options_extra->hc_hash;
if (!input_buf) return false;
size_t input_len = strlen (input_buf);
char *hash_buf = NULL;

View File

@ -334,7 +334,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
if (device_param->is_cuda == true)
{
rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->cuda_stream);
rc = hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
if (rc == 0)
{
@ -351,7 +351,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
if (device_param->is_hip == true)
{
rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size, device_param->hip_stream);
rc = hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
if (rc == 0)
{
@ -382,7 +382,7 @@ int check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pla
if (device_param->is_opencl == true)
{
rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_FALSE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, &opencl_event);
if (rc == 0)
{
@ -587,14 +587,14 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
if (device_param->is_cuda == true)
{
if (hc_cuMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32), device_param->cuda_stream) == -1) return -1;
if (hc_cuMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->cuda_d_result, sizeof (u32)) == -1) return -1;
if (hc_cuStreamSynchronize (hashcat_ctx, device_param->cuda_stream) == -1) return -1;
}
if (device_param->is_hip == true)
{
if (hc_hipMemcpyDtoHAsync (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32), device_param->hip_stream) == -1) return -1;
if (hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32)) == -1) return -1;
if (hc_hipStreamSynchronize (hashcat_ctx, device_param->hip_stream) == -1) return -1;
}
@ -624,7 +624,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
if (device_param->is_cuda == true)
{
rc = hc_cuMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->cuda_stream);
rc = hc_cuMemcpyDtoH (hashcat_ctx, cracked, device_param->cuda_d_plain_bufs, num_cracked * sizeof (plain_t));
if (rc == 0)
{
@ -641,7 +641,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
if (device_param->is_hip == true)
{
rc = hc_hipMemcpyDtoHAsync (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t), device_param->hip_stream);
rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t));
if (rc == 0)
{
@ -1133,7 +1133,7 @@ int hashes_init_stage1 (hashcat_ctx_t *hashcat_ctx)
hashes_cnt = 1;
}
else if (user_options->hash_info == true)
else if (user_options->hash_info > 0)
{
}
else if (user_options->keyspace == true)
@ -2386,13 +2386,13 @@ int hashes_init_stage5 (hashcat_ctx_t *hashcat_ctx)
char *st_hash = strdup (tmp_buf);
event_log_error (hashcat_ctx, "ERROR: Incompatible self-test SCRYPT configuration detected.");
event_log_error (hashcat_ctx, "ERROR: Incompatible self-test configuration detected.");
event_log_warning (hashcat_ctx, "The specified target hash:");
event_log_warning (hashcat_ctx, " -> %s", user_hash);
event_log_warning (hashcat_ctx, "does not match the SCRYPT configuration of the self-test hash:");
event_log_warning (hashcat_ctx, "does not match the configuration of the self-test hash:");
event_log_warning (hashcat_ctx, " -> %s", st_hash);
event_log_warning (hashcat_ctx, "The JIT-compiled kernel for this SCRYPT configuration may be incompatible.");
event_log_warning (hashcat_ctx, "The JIT-compiled kernel for this configuration may be incompatible.");
event_log_warning (hashcat_ctx, "You must disable the self-test functionality or recompile the plugin with a matching self-test hash.");
event_log_warning (hashcat_ctx, "To disable the self-test, use the --self-test-disable option.");
event_log_warning (hashcat_ctx, NULL);
@ -2414,11 +2414,11 @@ int hashes_init_stage5 (hashcat_ctx_t *hashcat_ctx)
char *user_hash2 = strdup (tmp_buf);
event_log_error (hashcat_ctx, "ERROR: Mixed SCRYPT configuration detected.");
event_log_error (hashcat_ctx, "ERROR: Mixed configuration detected.");
event_log_warning (hashcat_ctx, "The specified target hash:");
event_log_warning (hashcat_ctx, " -> %s", user_hash);
event_log_warning (hashcat_ctx, "does not match the SCRYPT configuration of another target hash:");
event_log_warning (hashcat_ctx, "does not match the configuration of another target hash:");
event_log_warning (hashcat_ctx, " -> %s", user_hash2);
event_log_warning (hashcat_ctx, "Please run these hashes in separate cracking sessions.");
event_log_warning (hashcat_ctx, NULL);

View File

@ -1268,142 +1268,10 @@ u64 hm_get_memoryused_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int ba
return 0;
}
int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
static void hwmon_ctx_init_nvml (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_nvml, int backend_devices_cnt)
{
bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
user_options_t *user_options = hashcat_ctx->user_options;
hwmon_ctx->enabled = false;
int backend_devices_cnt = backend_ctx->backend_devices_cnt;
if (bridge_ctx->enabled == true) backend_devices_cnt = 1;
//#if !defined (WITH_HWMON)
//return 0;
//#endif // WITH_HWMON
if (user_options->usage > 0) return 0;
//if (user_options->backend_info > 0) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;
if (user_options->stdout_flag == true) return 0;
if (user_options->version == true) return 0;
if (user_options->identify == true) return 0;
//we need hwmon support to get free memory per device support
//its a joke, but there's no way around
//if (user_options->hwmon == false) return 0;
hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
/**
* Initialize shared libraries
*/
hm_attrs_t *hm_adapters_adl = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_nvapi = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_nvml = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_sysfs_amdgpu = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_sysfs_cpu = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_iokit = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
#define FREE_ADAPTERS \
do { \
hcfree (hm_adapters_adl); \
hcfree (hm_adapters_nvapi); \
hcfree (hm_adapters_nvml); \
hcfree (hm_adapters_sysfs_amdgpu); \
hcfree (hm_adapters_sysfs_cpu); \
hcfree (hm_adapters_iokit); \
} while (0)
if (backend_ctx->need_nvml == true)
{
hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
if (nvml_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_nvml);
hwmon_ctx->hm_nvml = NULL;
}
}
if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
{
hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
if (nvapi_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_nvapi);
hwmon_ctx->hm_nvapi = NULL;
}
}
if (backend_ctx->need_adl == true)
{
hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
if (adl_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_adl);
hwmon_ctx->hm_adl = NULL;
}
}
if (backend_ctx->need_sysfs_amdgpu == true)
{
hwmon_ctx->hm_sysfs_amdgpu = (SYSFS_AMDGPU_PTR *) hcmalloc (sizeof (SYSFS_AMDGPU_PTR));
if (sysfs_amdgpu_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_sysfs_amdgpu);
hwmon_ctx->hm_sysfs_amdgpu = NULL;
}
// also if there's ADL, we don't need sysfs_amdgpu
if (hwmon_ctx->hm_adl)
{
hcfree (hwmon_ctx->hm_sysfs_amdgpu);
hwmon_ctx->hm_sysfs_amdgpu = NULL;
}
}
if (backend_ctx->need_sysfs_cpu == true)
{
hwmon_ctx->hm_sysfs_cpu = (SYSFS_CPU_PTR *) hcmalloc (sizeof (SYSFS_CPU_PTR));
if (sysfs_cpu_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_sysfs_cpu);
hwmon_ctx->hm_sysfs_cpu = NULL;
}
}
#if defined(__APPLE__)
if (backend_ctx->need_iokit == true)
{
hwmon_ctx->hm_iokit = (IOKIT_PTR *) hcmalloc (sizeof (IOKIT_PTR));
if (iokit_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_iokit);
hwmon_ctx->hm_iokit = NULL;
}
}
#endif
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
if (hwmon_ctx->hm_nvml)
{
@ -1485,6 +1353,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hcfree (nvmlGPUHandle);
}
}
}
static void hwmon_ctx_init_nvapi (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_nvapi, int backend_devices_cnt)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
if (hwmon_ctx->hm_nvapi)
{
@ -1558,6 +1432,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hcfree (nvGPUHandle);
}
}
}
static int hwmon_ctx_init_adl (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_adl, int backend_devices_cnt)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
if (hwmon_ctx->hm_adl)
{
@ -1567,23 +1447,13 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
int tmp_in;
if (get_adapters_num_adl (hashcat_ctx, &tmp_in) == -1)
{
FREE_ADAPTERS;
return -1;
}
if (get_adapters_num_adl (hashcat_ctx, &tmp_in) == -1) return -1;
// adapter info
LPAdapterInfo lpAdapterInfo = (LPAdapterInfo) hccalloc (tmp_in, sizeof (AdapterInfo));
if (hm_ADL_Adapter_AdapterInfo_Get (hashcat_ctx, lpAdapterInfo, tmp_in * sizeof (AdapterInfo)) == -1)
{
FREE_ADAPTERS;
return -1;
}
if (hm_ADL_Adapter_AdapterInfo_Get (hashcat_ctx, lpAdapterInfo, tmp_in * sizeof (AdapterInfo)) == -1) return -1;
for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
{
@ -1639,107 +1509,260 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
}
}
return 0;
}
static void hwmon_ctx_init_sysfs_amdgpu_iokit (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_sysfs_amdgpu, hm_attrs_t *hm_adapters_iokit, int backend_devices_cnt)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
if (hwmon_ctx->hm_sysfs_amdgpu || hwmon_ctx->hm_iokit)
{
if (true)
for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
{
for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
if (device_param->skipped == true) continue;
if (device_param->is_cuda == true)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
// nothing to do
}
if (device_param->skipped == true) continue;
#if defined (__APPLE__)
if (device_param->is_metal == true)
{
const u32 device_id = device_param->device_id;
if (device_param->is_cuda == true)
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
{
// nothing to do
hm_adapters_iokit[device_id].buslanes_get_supported = false;
hm_adapters_iokit[device_id].corespeed_get_supported = false;
hm_adapters_iokit[device_id].fanspeed_get_supported = true;
hm_adapters_iokit[device_id].fanpolicy_get_supported = false;
hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
hm_adapters_iokit[device_id].temperature_get_supported = true;
hm_adapters_iokit[device_id].utilization_get_supported = true;
}
}
#endif
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
{
const u32 device_id = device_param->device_id;
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
{
hm_adapters_iokit[device_id].buslanes_get_supported = false;
hm_adapters_iokit[device_id].corespeed_get_supported = false;
hm_adapters_iokit[device_id].fanspeed_get_supported = true;
hm_adapters_iokit[device_id].fanpolicy_get_supported = false;
hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
hm_adapters_iokit[device_id].temperature_get_supported = true;
hm_adapters_iokit[device_id].utilization_get_supported = true;
}
#if defined (__APPLE__)
if (device_param->is_metal == true)
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
if (hwmon_ctx->hm_sysfs_amdgpu)
{
const u32 device_id = device_param->device_id;
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
{
hm_adapters_iokit[device_id].buslanes_get_supported = false;
hm_adapters_iokit[device_id].corespeed_get_supported = false;
hm_adapters_iokit[device_id].fanspeed_get_supported = true;
hm_adapters_iokit[device_id].fanpolicy_get_supported = false;
hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
hm_adapters_iokit[device_id].temperature_get_supported = true;
hm_adapters_iokit[device_id].utilization_get_supported = true;
}
}
#endif
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
{
const u32 device_id = device_param->device_id;
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (hwmon_ctx->hm_iokit))
{
hm_adapters_iokit[device_id].buslanes_get_supported = false;
hm_adapters_iokit[device_id].corespeed_get_supported = false;
hm_adapters_iokit[device_id].fanspeed_get_supported = true;
hm_adapters_iokit[device_id].fanpolicy_get_supported = false;
hm_adapters_iokit[device_id].memoryspeed_get_supported = false;
hm_adapters_iokit[device_id].temperature_get_supported = true;
hm_adapters_iokit[device_id].utilization_get_supported = true;
}
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
if (hwmon_ctx->hm_sysfs_amdgpu)
{
hm_adapters_sysfs_amdgpu[device_id].buslanes_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].corespeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].fanspeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].fanpolicy_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported = true;
}
hm_adapters_sysfs_amdgpu[device_id].buslanes_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].corespeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].fanspeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].fanpolicy_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].memoryspeed_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].temperature_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].utilization_get_supported = true;
hm_adapters_sysfs_amdgpu[device_id].memoryused_get_supported = true;
}
}
}
}
}
static void hwmon_ctx_init_sysfs_cpu (hashcat_ctx_t *hashcat_ctx, hm_attrs_t *hm_adapters_sysfs_cpu, int backend_devices_cnt)
{
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
if (hwmon_ctx->hm_sysfs_cpu)
{
if (true)
for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
{
for (int backend_devices_idx = 0; backend_devices_idx < backend_devices_cnt; backend_devices_idx++)
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
if (device_param->skipped == true) continue;
if (device_param->is_cuda == true)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[backend_devices_idx];
if (device_param->skipped == true) continue;
if (device_param->is_cuda == true)
{
// nothing to do
}
}
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
{
const u32 device_id = device_param->device_id;
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) == 0) continue;
if (hwmon_ctx->hm_sysfs_cpu)
{
const u32 device_id = device_param->device_id;
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) == 0) continue;
if (hwmon_ctx->hm_sysfs_cpu)
{
hm_adapters_sysfs_cpu[device_id].buslanes_get_supported = false;
hm_adapters_sysfs_cpu[device_id].corespeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].fanspeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].fanpolicy_get_supported = false;
hm_adapters_sysfs_cpu[device_id].memoryspeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].temperature_get_supported = true;
hm_adapters_sysfs_cpu[device_id].utilization_get_supported = true;
}
hm_adapters_sysfs_cpu[device_id].buslanes_get_supported = false;
hm_adapters_sysfs_cpu[device_id].corespeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].fanspeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].fanpolicy_get_supported = false;
hm_adapters_sysfs_cpu[device_id].memoryspeed_get_supported = false;
hm_adapters_sysfs_cpu[device_id].temperature_get_supported = true;
hm_adapters_sysfs_cpu[device_id].utilization_get_supported = true;
}
}
}
}
}
int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
{
bridge_ctx_t *bridge_ctx = hashcat_ctx->bridge_ctx;
hwmon_ctx_t *hwmon_ctx = hashcat_ctx->hwmon_ctx;
backend_ctx_t *backend_ctx = hashcat_ctx->backend_ctx;
user_options_t *user_options = hashcat_ctx->user_options;
hwmon_ctx->enabled = false;
int backend_devices_cnt = backend_ctx->backend_devices_cnt;
if (bridge_ctx->enabled == true) backend_devices_cnt = 1;
//#if !defined (WITH_HWMON)
//return 0;
//#endif // WITH_HWMON
if (user_options->usage > 0) return 0;
if (user_options->hash_info > 0) return 0;
//if (user_options->backend_info > 0) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;
if (user_options->stdout_flag == true) return 0;
if (user_options->version == true) return 0;
if (user_options->identify == true) return 0;
//we need hwmon support to get free memory per device support
//its a joke, but there's no way around
//if (user_options->hwmon == false) return 0;
hwmon_ctx->hm_device = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
/**
* Initialize shared libraries
*/
hm_attrs_t *hm_adapters_adl = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_nvapi = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_nvml = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_sysfs_amdgpu = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_sysfs_cpu = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
hm_attrs_t *hm_adapters_iokit = (hm_attrs_t *) hccalloc (DEVICES_MAX, sizeof (hm_attrs_t));
if (backend_ctx->need_nvml == true)
{
hwmon_ctx->hm_nvml = (NVML_PTR *) hcmalloc (sizeof (NVML_PTR));
if (nvml_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_nvml);
hwmon_ctx->hm_nvml = NULL;
}
}
if ((backend_ctx->need_nvapi == true) && (hwmon_ctx->hm_nvml)) // nvapi can't work alone, we need nvml, too
{
hwmon_ctx->hm_nvapi = (NVAPI_PTR *) hcmalloc (sizeof (NVAPI_PTR));
if (nvapi_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_nvapi);
hwmon_ctx->hm_nvapi = NULL;
}
}
if (backend_ctx->need_adl == true)
{
hwmon_ctx->hm_adl = (ADL_PTR *) hcmalloc (sizeof (ADL_PTR));
if (adl_init (hashcat_ctx) == -1)
{
hcfree (hwmon_ctx->hm_adl);
hwmon_ctx->hm_adl = NULL;
}
}
if (backend_ctx->need_sysfs_amdgpu == true)
{
hwmon_ctx->hm_sysfs_amdgpu = (SYSFS_AMDGPU_PTR *) hcmalloc (sizeof (SYSFS_AMDGPU_PTR));
if (sysfs_amdgpu_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_sysfs_amdgpu);
hwmon_ctx->hm_sysfs_amdgpu = NULL;
}
}
if (backend_ctx->need_sysfs_cpu == true)
{
hwmon_ctx->hm_sysfs_cpu = (SYSFS_CPU_PTR *) hcmalloc (sizeof (SYSFS_CPU_PTR));
if (sysfs_cpu_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_sysfs_cpu);
hwmon_ctx->hm_sysfs_cpu = NULL;
}
}
#if defined(__APPLE__)
if (backend_ctx->need_iokit == true)
{
hwmon_ctx->hm_iokit = (IOKIT_PTR *) hcmalloc (sizeof (IOKIT_PTR));
if (iokit_init (hashcat_ctx) == false)
{
hcfree (hwmon_ctx->hm_iokit);
hwmon_ctx->hm_iokit = NULL;
}
}
#endif
hwmon_ctx_init_nvml (hashcat_ctx, hm_adapters_nvml, backend_devices_cnt);
hwmon_ctx_init_nvapi (hashcat_ctx, hm_adapters_nvapi, backend_devices_cnt);
// if ADL init fail, disable
if (hwmon_ctx_init_adl (hashcat_ctx, hm_adapters_adl, backend_devices_cnt) == -1)
{
hcfree (hwmon_ctx->hm_adl);
hwmon_ctx->hm_adl = NULL;
}
// if there's ADL, we don't need sysfs_amdgpu
if (hwmon_ctx->hm_adl)
{
hcfree (hwmon_ctx->hm_sysfs_amdgpu);
hwmon_ctx->hm_sysfs_amdgpu = NULL;
}
hwmon_ctx_init_sysfs_amdgpu_iokit (hashcat_ctx, hm_adapters_sysfs_amdgpu, hm_adapters_iokit, backend_devices_cnt);
hwmon_ctx_init_sysfs_cpu (hashcat_ctx, hm_adapters_sysfs_cpu, backend_devices_cnt);
#if defined(__APPLE__)
if (backend_ctx->need_iokit == true)
@ -1757,7 +1780,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (hwmon_ctx->hm_adl == NULL && hwmon_ctx->hm_nvml == NULL && hwmon_ctx->hm_sysfs_amdgpu == NULL && hwmon_ctx->hm_sysfs_cpu == NULL && hwmon_ctx->hm_iokit == NULL)
{
FREE_ADAPTERS;
hcfree (hm_adapters_adl);
hcfree (hm_adapters_nvapi);
hcfree (hm_adapters_nvml);
hcfree (hm_adapters_sysfs_amdgpu);
hcfree (hm_adapters_sysfs_cpu);
hcfree (hm_adapters_iokit);
return 0;
}
@ -1992,7 +2020,12 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
hm_get_memoryused_with_devices_idx (hashcat_ctx, backend_devices_idx);
}
FREE_ADAPTERS;
hcfree (hm_adapters_adl);
hcfree (hm_adapters_nvapi);
hcfree (hm_adapters_nvml);
hcfree (hm_adapters_sysfs_amdgpu);
hcfree (hm_adapters_sysfs_cpu);
hcfree (hm_adapters_iokit);
return 0;
}

View File

@ -41,9 +41,9 @@ int induct_ctx_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->benchmark == true) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;

View File

@ -363,7 +363,7 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx)
hashconfig->has_optimized_kernel = hc_path_read (source_file);
if (user_options->hash_info == false)
if (user_options->hash_info == 0 || user_options->hash_info > 1)
{
if (user_options->optimized_kernel == true)
{

View File

@ -62,9 +62,9 @@ int loopback_init (hashcat_ctx_t *hashcat_ctx)
if (user_options->usage > 0) return 0;
if (user_options->backend_info > 0) return 0;
if (user_options->hash_info > 0) return 0;
if (user_options->benchmark == true) return 0;
if (user_options->hash_info == true) return 0;
if (user_options->keyspace == true) return 0;
if (user_options->left == true) return 0;
if (user_options->show == true) return 0;

View File

@ -192,12 +192,13 @@ static void main_outerloop_starting (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MA
status_ctx->shutdown_outer = false;
if (user_options->hash_info == true) return;
if (user_options->backend_info > 0) return;
if (user_options->hash_info > 0) return;
if (user_options->keyspace == true) return;
if (user_options->stdout_flag == true) return;
if (user_options->speed_only == true) return;
if (user_options->identify == true) return;
if (user_options->backend_info > 0) return;
if ((user_options_extra->wordlist_mode == WL_MODE_FILE) || (user_options_extra->wordlist_mode == WL_MODE_MASK))
{
@ -269,10 +270,11 @@ static void main_cracker_finished (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYB
const user_options_t *user_options = hashcat_ctx->user_options;
const user_options_extra_t *user_options_extra = hashcat_ctx->user_options_extra;
if (user_options->hash_info == true) return;
if (user_options->backend_info > 0) return;
if (user_options->hash_info > 0) return;
if (user_options->keyspace == true) return;
if (user_options->stdout_flag == true) return;
if (user_options->backend_info > 0) return;
// if we had a prompt, clear it
@ -370,10 +372,22 @@ static void main_calculated_words_base (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx,
const user_options_t *user_options = hashcat_ctx->user_options;
if (user_options->keyspace == false) return;
if (user_options->total_candidates == true) return;
event_log_info (hashcat_ctx, "%" PRIu64 "", status_ctx->words_base);
}
static void main_calculated_words_cnt (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
{
const status_ctx_t *status_ctx = hashcat_ctx->status_ctx;
const user_options_t *user_options = hashcat_ctx->user_options;
if (user_options->keyspace == false) return;
if (user_options->total_candidates == false) return;
event_log_info (hashcat_ctx, "%" PRIu64 "", status_ctx->words_cnt);
}
static void main_potfile_remove_parse_pre (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE_UNUSED const void *buf, MAYBE_UNUSED const size_t len)
{
const user_options_t *user_options = hashcat_ctx->user_options;
@ -631,7 +645,17 @@ static void main_backend_session_hostmem (MAYBE_UNUSED hashcat_ctx_t *hashcat_ct
const u64 *hostmem = (const u64 *) buf;
event_log_info (hashcat_ctx, "Host memory required for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024));
u64 free_memory = 0;
if (get_free_memory (&free_memory) == false)
{
event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB", *hostmem / (1024 * 1024));
}
else
{
event_log_info (hashcat_ctx, "Host memory allocated for this attack: %" PRIu64 " MB (%" PRIu64 " MB free)", *hostmem / (1024 * 1024), free_memory / (1024 * 1024));
}
event_log_info (hashcat_ctx, NULL);
}
@ -996,7 +1020,7 @@ static void main_hashconfig_post (MAYBE_UNUSED hashcat_ctx_t *hashcat_ctx, MAYBE
if (hashconfig->is_salted == true)
{
if (hashconfig->opti_type & OPTI_TYPE_RAW_HASH)
if (hashconfig->opti_type & OPTI_TYPE_RAW_HASH || hashconfig->salt_type & SALT_TYPE_GENERIC)
{
event_log_info (hashcat_ctx, "Minimum salt length supported by kernel: %u", hashconfig->salt_min);
event_log_info (hashcat_ctx, "Maximum salt length supported by kernel: %u", hashconfig->salt_max);
@ -1251,6 +1275,7 @@ static void event (const u32 id, hashcat_ctx_t *hashcat_ctx, const void *buf, co
case EVENT_BRIDGES_SALT_POST: main_bridges_salt_post (hashcat_ctx, buf, len); break;
case EVENT_BRIDGES_SALT_PRE: main_bridges_salt_pre (hashcat_ctx, buf, len); break;
case EVENT_CALCULATED_WORDS_BASE: main_calculated_words_base (hashcat_ctx, buf, len); break;
case EVENT_CALCULATED_WORDS_CNT: main_calculated_words_cnt (hashcat_ctx, buf, len); break;
case EVENT_CRACKER_FINISHED: main_cracker_finished (hashcat_ctx, buf, len); break;
case EVENT_CRACKER_HASH_CRACKED: main_cracker_hash_cracked (hashcat_ctx, buf, len); break;
case EVENT_CRACKER_STARTING: main_cracker_starting (hashcat_ctx, buf, len); break;
@ -1408,7 +1433,7 @@ int main (int argc, char **argv)
rc_final = 0;
}
else if (user_options->hash_info == true)
else if (user_options->hash_info > 0)
{
hash_info (hashcat_ctx);

150
src/modules/argon2_common.c Normal file
View File

@ -0,0 +1,150 @@
/**
* Author......: Netherlands Forensic Institute
* License.....: MIT
*/
#include <inttypes.h>
#include "common.h"
#include "types.h"
#include "modules.h"
#include "bitops.h"
#include "convert.h"
#include "shared.h"
#include "memory.h"
#define ARGON2_SYNC_POINTS 4
#define ARGON2_BLOCK_SIZE 1024
u64 argon2_module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u64 tmp_size = 0; // we'll add some later
return tmp_size;
}
u64 get_largest_memory_block_count (const hashes_t *hashes)
{
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
u64 largest_memory_block_count = (options_st == NULL) ? options->memory_block_count : options_st->memory_block_count;
for (u32 i = 0; i < hashes->salts_cnt; i++)
{
largest_memory_block_count = MAX (largest_memory_block_count, options->memory_block_count);
options++;
}
return largest_memory_block_count;
}
const char *argon2_module_extra_tuningdb_block (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, const backend_ctx_t *backend_ctx, MAYBE_UNUSED const hashes_t *hashes, const u32 device_id, const u32 kernel_accel_user)
{
hc_device_param_t *device_param = &backend_ctx->devices_param[device_id];
const u32 memory_block_count = get_largest_memory_block_count (hashes);
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
int lines_sz = 4096;
char *lines_buf = hcmalloc (lines_sz);
int lines_pos = 0;
const u32 device_processors = device_param->device_processors;
const u32 device_maxworkgroup_size = device_param->device_maxworkgroup_size;
const u64 fixed_mem = (256 * 1024 * 1024); // some storage we need for pws[], tmps[], and others. Is around 72MiB in reality.
const u64 spill_mem = 2048 * device_processors * device_maxworkgroup_size; // 1600 according to ptxas
const u64 available_mem = MIN (device_param->device_available_mem, (device_param->device_maxmem_alloc * 4)) - (fixed_mem + spill_mem);
u32 kernel_accel_new = device_processors;
if (kernel_accel_user)
{
kernel_accel_new = kernel_accel_user;
}
else
{
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->device_host_unified_memory == false))
{
kernel_accel_new = available_mem / size_per_accel;
kernel_accel_new = MIN (kernel_accel_new, 1024); // 1024 = max supported
}
}
char *new_device_name = hcstrdup (device_param->device_name);
for (size_t i = 0; i < strlen (new_device_name); i++)
{
if (new_device_name[i] == ' ') new_device_name[i] = '_';
}
lines_pos += snprintf (lines_buf + lines_pos, lines_sz - lines_pos, "%s * %u 1 %u A\n", new_device_name, user_options->hash_mode, kernel_accel_new);
hcfree (new_device_name);
return lines_buf;
}
u64 argon2_module_extra_buffer_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 memory_block_count = get_largest_memory_block_count (hashes);
const u64 size_per_accel = ARGON2_BLOCK_SIZE * memory_block_count;
const u64 size_argon2 = device_param->kernel_accel_max * size_per_accel;
return size_argon2;
}
u64 argon2_module_extra_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes)
{
/*
argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
argon2_options_t *options_st = (argon2_options_t *) hashes->st_esalts_buf;
const u32 memory_block_count = (options->memory_block_count) ? options->memory_block_count : options_st->memory_block_count;
const u32 parallelism = (options->parallelism) ? options->parallelism : options_st->parallelism;
for (u32 i = 1; i < hashes->salts_cnt; i++)
{
if ((memory_block_count != options[i].memory_block_count)
|| (parallelism != options[i].parallelism))
{
return (1ULL << 63) + i;
}
}
// now that we know they all have the same settings, we also need to check the self-test hash is different to what the user hash is using
if ((hashconfig->opts_type & OPTS_TYPE_SELF_TEST_DISABLE) == 0)
{
if ((memory_block_count != options_st->memory_block_count)
|| (parallelism != options_st->parallelism))
{
return (1ULL << 62);
}
}
*/
u64 tmp_size = sizeof (argon2_tmp_t);
return tmp_size;
}
char *argon2_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
//argon2_options_t *options = (argon2_options_t *) hashes->esalts_buf;
char *jit_build_options = NULL;
//hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count);
return jit_build_options;
}

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED;
static const char *HASH_NAME = "md5($salt.$pass)";
static const u64 KERN_TYPE = 20;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_FORUM_SOFTWARE;
static const char *HASH_NAME = "osCommerce, xt:Commerce";
static const u64 KERN_TYPE = 20;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_OS;
static const char *HASH_NAME = "Juniper NetScreen/SSG (ScreenOS)";
static const u64 KERN_TYPE = 20;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_IMS;
static const char *HASH_NAME = "Skype";
static const u64 KERN_TYPE = 20;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_EAS;
static const char *HASH_NAME = "SolarWinds Serv-U";
static const u64 KERN_TYPE = 20;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH;
static const char *HASH_NAME = "SHA2-224";
static const u64 KERN_TYPE = 1300;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_EARLY_SKIP
| OPTI_TYPE_NOT_ITERATED

View File

@ -20,7 +20,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_4; // originally DGST_SIZE_4_2
static const u32 HASH_CATEGORY = HASH_CATEGORY_OS;
static const char *HASH_NAME = "descrypt, DES (Unix), Traditional DES";
static const u64 KERN_TYPE = 1500;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE
| OPTS_TYPE_TM_KERNEL

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_OS;
static const char *HASH_NAME = "sha512crypt $6$, SHA512 (Unix)";
static const u64 KERN_TYPE = 1800;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_USES_BITS_64;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE;

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_OS;
static const char *HASH_NAME = "AIX {ssha256}";
static const u64 KERN_TYPE = 6400;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE;

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_PASSWORD_MANAGER;
static const char *HASH_NAME = "LastPass + LastPass sniffed";
static const u64 KERN_TYPE = 6800;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE;

View File

@ -85,15 +85,6 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
return pw_max;
}
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
char *jit_build_options = NULL;
hc_asprintf (&jit_build_options, "-D NO_UNROLL");
return jit_build_options;
}
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
{
u64 *digest = (u64 *) digest_buf;
@ -398,7 +389,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_hook23 = MODULE_DEFAULT;
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
module_ctx->module_hook_size = MODULE_DEFAULT;
module_ctx->module_jit_build_options = module_jit_build_options;
module_ctx->module_jit_build_options = MODULE_DEFAULT;
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;

View File

@ -21,6 +21,7 @@ static const char *HASH_NAME = "SAP CODVN B (BCODE)";
static const u64 KERN_TYPE = 7700;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_NOT_ITERATED;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE

View File

@ -21,6 +21,7 @@ static const char *HASH_NAME = "SAP CODVN B (BCODE) from RFC_READ_TABLE";
static const u64 KERN_TYPE = 7701;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_PRECOMPUTE_INIT
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_NOT_ITERATED;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_LE

View File

@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_PASSWORD_MANAGER;
static const char *HASH_NAME = "1Password, cloudkeychain";
static const u64 KERN_TYPE = 8200;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT
| OPTI_TYPE_USES_BITS_64
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE

View File

@ -19,7 +19,8 @@ static const u32 DGST_SIZE = DGST_SIZE_4_5;
static const u32 HASH_CATEGORY = HASH_CATEGORY_NETWORK_SERVER;
static const char *HASH_NAME = "DNSSEC (NSEC3)";
static const u64 KERN_TYPE = 8300;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
| OPTI_TYPE_REGISTER_LIMIT;
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
| OPTS_TYPE_PT_GENERATE_BE
| OPTS_TYPE_ST_HEX

View File

@ -109,11 +109,11 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
{
if ((device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) && (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU))
{
if (device_param->is_metal == false)
if (device_param->is_metal == true)
{
if (strncmp (device_param->device_name, "Apple M", 7) == 0)
if (strncmp (device_param->device_name, "Intel", 5) == 0)
{
// AppleM1, OpenCL, MTLCompilerService, createKernel never-end with pure kernel and newComputePipelineState failed with optimized kernel
// Intel Iris Graphics, Metal Version 244.303: failed to create 'm10700_loop' pipeline, timeout reached (status 49)
return true;
}
}
@ -152,38 +152,47 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
{
const u32 shared_size_scratch = (32 + 64 + 16); // LOCAL_VK u32 s_sc[FIXED_LOCAL_SIZE][PWMAXSZ4 + BLMAXSZ4 + AESSZ4];
const u32 shared_size_aes = (5 * 1024); // LOCAL_VK u32 s_te0[256];
char *jit_build_options = NULL;
if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL)
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
{
u32 native_threads = 0;
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", 1);
}
else
{
u32 overhead = 0;
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
if (device_param->opencl_device_vendor_id == VENDOR_ID_NV)
{
native_threads = 1;
}
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
{
#if defined (__APPLE__)
// note we need to use device_param->device_local_mem_size - 4 because opencl jit returns with:
// Entry function '...' uses too much shared data (0xc004 bytes, 0xc000 max)
// on my development system. no clue where the 4 bytes are spent.
// I did some research on this and it seems to be related with the datatype.
// For example, if i used u8 instead, there's only 1 byte wasted.
native_threads = 32;
#else
if (device_param->device_local_mem_size < 49152)
if (device_param->is_opencl == true)
{
native_threads = MIN (device_param->kernel_preferred_wgs_multiple, 32); // We can't just set 32, because Intel GPU need 8
overhead = 1;
}
else
{
// to go over 48KiB, we need to use dynamic shared mem
native_threads = 49152 / 128;
}
#endif
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
const u32 device_local_mem_size = MIN (device_param->device_local_mem_size, 48*1024);
u32 fixed_local_size = ((device_local_mem_size - overhead) - shared_size_aes) / shared_size_scratch;
if (user_options->kernel_threads_chgd == true)
{
fixed_local_size = user_options->kernel_threads;
}
else
{
if (fixed_local_size > device_param->kernel_preferred_wgs_multiple) fixed_local_size -= fixed_local_size % device_param->kernel_preferred_wgs_multiple;
}
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", fixed_local_size);
}
return jit_build_options;

View File

@ -129,6 +129,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 250;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@ -344,7 +351,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;

View File

@ -146,6 +146,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 250;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@ -361,7 +368,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;

View File

@ -146,6 +146,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 250;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@ -361,7 +368,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;

View File

@ -131,6 +131,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 250;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@ -346,7 +353,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;

View File

@ -148,6 +148,13 @@ u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED c
return tmp_size;
}
u32 module_kernel_loops_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_min = 250;
return kernel_loops_min;
}
u32 module_kernel_loops_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
{
const u32 kernel_loops_max = 1000; // lowest PIM multiplier
@ -363,7 +370,7 @@ void module_init (module_ctx_t *module_ctx)
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_max = module_kernel_loops_max;
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
module_ctx->module_kernel_loops_min = module_kernel_loops_min;
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
module_ctx->module_kern_type = module_kern_type;

Some files were not shown because too many files have changed in this diff Show More