1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-23 00:28:11 +00:00

AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+

This commit is contained in:
Jens Steube 2021-07-26 07:59:12 +02:00
parent b53691c8f5
commit 1e3bd2c8a0
16 changed files with 244 additions and 67 deletions

View File

@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -45,12 +45,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
tmp4 = hc_bytealign (in3, 0, offset); tmp4 = hc_bytealign (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -139,7 +145,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -153,12 +159,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
tmp4 = hc_bytealign (in3, in4, offset); tmp4 = hc_bytealign (in3, in4, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -246,7 +258,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1; u32 tmp1;
u32 tmp2; u32 tmp2;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -255,12 +267,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
tmp2 = hc_bytealign (in1, 0, offset); tmp2 = hc_bytealign (in1, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];

View File

@ -1664,18 +1664,18 @@ DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32
DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63) DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
{ {
const u32 s001 = (0x001 & SALT) ? 0xffffffff : 0; const u32 s001 = (0x001 & SALT) ? 1 : 0;
const u32 s002 = (0x002 & SALT) ? 0xffffffff : 0; const u32 s002 = (0x002 & SALT) ? 1 : 0;
const u32 s004 = (0x004 & SALT) ? 0xffffffff : 0; const u32 s004 = (0x004 & SALT) ? 1 : 0;
const u32 s008 = (0x008 & SALT) ? 0xffffffff : 0; const u32 s008 = (0x008 & SALT) ? 1 : 0;
const u32 s010 = (0x010 & SALT) ? 0xffffffff : 0; const u32 s010 = (0x010 & SALT) ? 1 : 0;
const u32 s020 = (0x020 & SALT) ? 0xffffffff : 0; const u32 s020 = (0x020 & SALT) ? 1 : 0;
const u32 s040 = (0x040 & SALT) ? 0xffffffff : 0; const u32 s040 = (0x040 & SALT) ? 1 : 0;
const u32 s080 = (0x080 & SALT) ? 0xffffffff : 0; const u32 s080 = (0x080 & SALT) ? 1 : 0;
const u32 s100 = (0x100 & SALT) ? 0xffffffff : 0; const u32 s100 = (0x100 & SALT) ? 1 : 0;
const u32 s200 = (0x200 & SALT) ? 0xffffffff : 0; const u32 s200 = (0x200 & SALT) ? 1 : 0;
const u32 s400 = (0x400 & SALT) ? 0xffffffff : 0; const u32 s400 = (0x400 & SALT) ? 1 : 0;
const u32 s800 = (0x800 & SALT) ? 0xffffffff : 0; const u32 s800 = (0x800 & SALT) ? 1 : 0;
KXX_DECL u32 k00, k01, k02, k03, k04, k05; KXX_DECL u32 k00, k01, k02, k03, k04, k05;
KXX_DECL u32 k06, k07, k08, k09, k10, k11; KXX_DECL u32 k06, k07, k08, k09, k10, k11;

View File

@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -44,12 +44,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
tmp4 = hc_bytealign (in3, 0, offset); tmp4 = hc_bytealign (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -138,7 +144,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -152,12 +158,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
tmp4 = hc_bytealign (in3, in4, offset); tmp4 = hc_bytealign (in3, in4, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -245,7 +257,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1; u32 tmp1;
u32 tmp2; u32 tmp2;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -254,12 +266,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
tmp2 = hc_bytealign (in1, 0, offset); tmp2 = hc_bytealign (in1, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];

View File

@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
u32 tmp4; u32 tmp4;
u32 tmp5; u32 tmp5;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -2134,12 +2134,18 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
tmp5 = hc_bytealign (in4, 0, offset); tmp5 = hc_bytealign (in4, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];

View File

@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -41,12 +41,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
tmp4 = hc_bytealign (in3, 0, offset); tmp4 = hc_bytealign (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -135,7 +141,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3; u32 tmp3;
u32 tmp4; u32 tmp4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
u32 in2 = append[2]; u32 in2 = append[2];
@ -149,12 +155,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
tmp4 = hc_bytealign (in3, in4, offset); tmp4 = hc_bytealign (in3, in4, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -242,7 +254,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1; u32 tmp1;
u32 tmp2; u32 tmp2;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];
@ -251,12 +263,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
tmp2 = hc_bytealign (in1, 0, offset); tmp2 = hc_bytealign (in1, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3; const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4; const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_NV
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
u32 in0 = append[0]; u32 in0 = append[0];
u32 in1 = append[1]; u32 in1 = append[1];

View File

@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in2 = append[2]; u32 in2 = append[2];
u32 in3 = append[3]; u32 in3 = append[3];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -53,8 +53,15 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
const u32 tmp4 = hc_bytealign_be (in3, 0, offset); const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -165,7 +172,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
u32 in2 = append[2]; u32 in2 = append[2];
u32 in3 = append[3]; u32 in3 = append[3];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -173,8 +180,15 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
const u32 tmp4 = hc_bytealign_be (in3, 0, offset); const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -322,7 +336,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
u32 in3 = append[3]; u32 in3 = append[3];
u32 in4 = append[4]; u32 in4 = append[4];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -331,8 +345,15 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
const u32 tmp5 = hc_bytealign_be (in4, 0, offset); const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -456,7 +477,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
u32 in3 = append[3]; u32 in3 = append[3];
u32 in4 = append[4]; u32 in4 = append[4];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset); const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset); const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -465,8 +486,15 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
const u32 tmp5 = hc_bytealign_be (in4, 0, offset); const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -756,7 +784,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in2 = append[2]; u32 in2 = append[2];
u32 in3 = append[3]; u32 in3 = append[3];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@ -764,8 +792,15 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -915,7 +950,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
u32 in3 = append[3]; u32 in3 = append[3];
u32 in4 = 0x80000000; u32 in4 = 0x80000000;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@ -923,8 +958,15 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
@ -1074,7 +1116,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in3 = append[3]; u32 in3 = append[3];
u32 in4 = append[4]; u32 in4 = append[4];
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@ -1083,8 +1125,15 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);

View File

@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
u32 i; u32 i;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
for (i = 0; i < pd; i++) sc[idx++] = pw[i]; for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i] sc[idx++] = pw[i]
| hc_bytealign_be (bl[0], 0, pm4); | hc_bytealign_be (bl[0], 0, pm4);
@ -242,8 +242,15 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4); sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
#if defined IS_NV
const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8));
#endif
for (i = 0; i < pd; i++) sc[idx++] = pw[i]; for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i] sc[idx++] = pw[i]
@ -263,16 +270,22 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
const u32 om = m % 4; const u32 om = m % 4;
const u32 od = m / 4; const u32 od = m / 4;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om); pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
int selector = (0x76543210 >> (om * 4)) & 0xffff;
#if defined IS_NV
const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8));
#endif
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector); pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector); pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector); pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);

View File

@ -42,13 +42,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0; u32 tmp0;
u32 tmp1; u32 tmp1;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len); tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len); tmp1 = hc_bytealign_be (append, 0, func_len);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector); tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector); tmp1 = hc_byte_perm (0, append, selector);

View File

@ -37,13 +37,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0; u32 tmp0;
u32 tmp1; u32 tmp1;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len); tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len); tmp1 = hc_bytealign_be (append, 0, func_len);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector); tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector); tmp1 = hc_byte_perm (0, append, selector);

View File

@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15; u32x tmp15;
u32x tmp16; u32x tmp16;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -71,8 +71,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
tmp16 = hc_bytealign_be (carry[15], 0, offset); tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);

View File

@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15; u32x tmp15;
u32x tmp16; u32x tmp16;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -69,8 +69,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
tmp16 = hc_bytealign_be (carry[15], 0, offset); tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);

View File

@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15; u32x tmp15;
u32x tmp16; u32x tmp16;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
@ -68,8 +68,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
tmp16 = hc_bytealign_be (carry[15], 0, offset); tmp16 = hc_bytealign_be (carry[15], 0, offset);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
tmp00 = hc_byte_perm (carry[ 0], 0, selector); tmp00 = hc_byte_perm (carry[ 0], 0, selector);
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);

View File

@ -145,13 +145,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0; u32 tmp0;
u32 tmp1; u32 tmp1;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len); tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len); tmp1 = hc_bytealign_be (append, 0, func_len);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector); tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector); tmp1 = hc_byte_perm (0, append, selector);

View File

@ -56,13 +56,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0; u32 tmp0;
u32 tmp1; u32 tmp1;
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len); tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len); tmp1 = hc_bytealign_be (append, 0, func_len);
#endif #endif
#ifdef IS_NV #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
#endif
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
#endif
tmp0 = hc_byte_perm (append, 0, selector); tmp0 = hc_byte_perm (append, 0, selector);
tmp1 = hc_byte_perm (0, append, selector); tmp1 = hc_byte_perm (0, append, selector);

View File

@ -18,6 +18,7 @@
## Improvements ## Improvements
## ##
- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads - Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
@ -28,6 +29,7 @@
## Technical ## Technical
## ##
- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
- Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows

View File

@ -184,7 +184,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
{ {
if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false)) if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
{ {
hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff); hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff);
}
else
{
hc_asprintf (&jit_build_options, "-D _unroll -fno-experimental-new-pass-manager");
} }
} }
else else