mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-23 00:28:11 +00:00
AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
This commit is contained in:
parent
b53691c8f5
commit
1e3bd2c8a0
@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -45,12 +45,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
tmp4 = hc_bytealign (in3, 0, offset);
|
tmp4 = hc_bytealign (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -139,7 +145,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -153,12 +159,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
tmp4 = hc_bytealign (in3, in4, offset);
|
tmp4 = hc_bytealign (in3, in4, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -246,7 +258,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
u32 tmp2;
|
u32 tmp2;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
|
||||||
@ -255,12 +267,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
tmp2 = hc_bytealign (in1, 0, offset);
|
tmp2 = hc_bytealign (in1, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
@ -1664,18 +1664,18 @@ DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32
|
|||||||
|
|
||||||
DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
|
DECLSPEC void DESCrypt (const u32 SALT, const u32 K00, const u32 K01, const u32 K02, const u32 K03, const u32 K04, const u32 K05, const u32 K06, const u32 K07, const u32 K08, const u32 K09, const u32 K10, const u32 K11, const u32 K12, const u32 K13, const u32 K14, const u32 K15, const u32 K16, const u32 K17, const u32 K18, const u32 K19, const u32 K20, const u32 K21, const u32 K22, const u32 K23, const u32 K24, const u32 K25, const u32 K26, const u32 K27, const u32 K28, const u32 K29, const u32 K30, const u32 K31, const u32 K32, const u32 K33, const u32 K34, const u32 K35, const u32 K36, const u32 K37, const u32 K38, const u32 K39, const u32 K40, const u32 K41, const u32 K42, const u32 K43, const u32 K44, const u32 K45, const u32 K46, const u32 K47, const u32 K48, const u32 K49, const u32 K50, const u32 K51, const u32 K52, const u32 K53, const u32 K54, const u32 K55, u32 *D00, u32 *D01, u32 *D02, u32 *D03, u32 *D04, u32 *D05, u32 *D06, u32 *D07, u32 *D08, u32 *D09, u32 *D10, u32 *D11, u32 *D12, u32 *D13, u32 *D14, u32 *D15, u32 *D16, u32 *D17, u32 *D18, u32 *D19, u32 *D20, u32 *D21, u32 *D22, u32 *D23, u32 *D24, u32 *D25, u32 *D26, u32 *D27, u32 *D28, u32 *D29, u32 *D30, u32 *D31, u32 *D32, u32 *D33, u32 *D34, u32 *D35, u32 *D36, u32 *D37, u32 *D38, u32 *D39, u32 *D40, u32 *D41, u32 *D42, u32 *D43, u32 *D44, u32 *D45, u32 *D46, u32 *D47, u32 *D48, u32 *D49, u32 *D50, u32 *D51, u32 *D52, u32 *D53, u32 *D54, u32 *D55, u32 *D56, u32 *D57, u32 *D58, u32 *D59, u32 *D60, u32 *D61, u32 *D62, u32 *D63)
|
||||||
{
|
{
|
||||||
const u32 s001 = (0x001 & SALT) ? 0xffffffff : 0;
|
const u32 s001 = (0x001 & SALT) ? 1 : 0;
|
||||||
const u32 s002 = (0x002 & SALT) ? 0xffffffff : 0;
|
const u32 s002 = (0x002 & SALT) ? 1 : 0;
|
||||||
const u32 s004 = (0x004 & SALT) ? 0xffffffff : 0;
|
const u32 s004 = (0x004 & SALT) ? 1 : 0;
|
||||||
const u32 s008 = (0x008 & SALT) ? 0xffffffff : 0;
|
const u32 s008 = (0x008 & SALT) ? 1 : 0;
|
||||||
const u32 s010 = (0x010 & SALT) ? 0xffffffff : 0;
|
const u32 s010 = (0x010 & SALT) ? 1 : 0;
|
||||||
const u32 s020 = (0x020 & SALT) ? 0xffffffff : 0;
|
const u32 s020 = (0x020 & SALT) ? 1 : 0;
|
||||||
const u32 s040 = (0x040 & SALT) ? 0xffffffff : 0;
|
const u32 s040 = (0x040 & SALT) ? 1 : 0;
|
||||||
const u32 s080 = (0x080 & SALT) ? 0xffffffff : 0;
|
const u32 s080 = (0x080 & SALT) ? 1 : 0;
|
||||||
const u32 s100 = (0x100 & SALT) ? 0xffffffff : 0;
|
const u32 s100 = (0x100 & SALT) ? 1 : 0;
|
||||||
const u32 s200 = (0x200 & SALT) ? 0xffffffff : 0;
|
const u32 s200 = (0x200 & SALT) ? 1 : 0;
|
||||||
const u32 s400 = (0x400 & SALT) ? 0xffffffff : 0;
|
const u32 s400 = (0x400 & SALT) ? 1 : 0;
|
||||||
const u32 s800 = (0x800 & SALT) ? 0xffffffff : 0;
|
const u32 s800 = (0x800 & SALT) ? 1 : 0;
|
||||||
|
|
||||||
KXX_DECL u32 k00, k01, k02, k03, k04, k05;
|
KXX_DECL u32 k00, k01, k02, k03, k04, k05;
|
||||||
KXX_DECL u32 k06, k07, k08, k09, k10, k11;
|
KXX_DECL u32 k06, k07, k08, k09, k10, k11;
|
||||||
|
@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -44,12 +44,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
tmp4 = hc_bytealign (in3, 0, offset);
|
tmp4 = hc_bytealign (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -138,7 +144,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -152,12 +158,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
tmp4 = hc_bytealign (in3, in4, offset);
|
tmp4 = hc_bytealign (in3, in4, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -245,7 +257,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
u32 tmp2;
|
u32 tmp2;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
|
||||||
@ -254,12 +266,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
tmp2 = hc_bytealign (in1, 0, offset);
|
tmp2 = hc_bytealign (in1, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
|||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
u32 tmp5;
|
u32 tmp5;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -2134,12 +2134,18 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
|||||||
tmp5 = hc_bytealign (in4, 0, offset);
|
tmp5 = hc_bytealign (in4, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -41,12 +41,18 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
|||||||
tmp4 = hc_bytealign (in3, 0, offset);
|
tmp4 = hc_bytealign (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -135,7 +141,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
u32 tmp3;
|
u32 tmp3;
|
||||||
u32 tmp4;
|
u32 tmp4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
@ -149,12 +155,18 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
|||||||
tmp4 = hc_bytealign (in3, in4, offset);
|
tmp4 = hc_bytealign (in3, in4, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
@ -242,7 +254,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
u32 tmp2;
|
u32 tmp2;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
|
||||||
@ -251,12 +263,18 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
|||||||
tmp2 = hc_bytealign (in1, 0, offset);
|
tmp2 = hc_bytealign (in1, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
const int offset_mod_4 = offset & 3;
|
const int offset_mod_4 = offset & 3;
|
||||||
|
|
||||||
const int offset_minus_4 = 4 - offset_mod_4;
|
const int offset_minus_4 = 4 - offset_mod_4;
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
u32 in0 = append[0];
|
u32 in0 = append[0];
|
||||||
u32 in1 = append[1];
|
u32 in1 = append[1];
|
||||||
|
@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||||
@ -53,8 +53,15 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -165,7 +172,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
|||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||||
@ -173,8 +180,15 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
|||||||
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
const u32 tmp4 = hc_bytealign_be (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -322,7 +336,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
|
|||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
u32 in4 = append[4];
|
u32 in4 = append[4];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||||
@ -331,8 +345,15 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
|
|||||||
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -456,7 +477,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
|
|||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
u32 in4 = append[4];
|
u32 in4 = append[4];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||||
@ -465,8 +486,15 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
|
|||||||
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
const u32 tmp5 = hc_bytealign_be (in4, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -756,7 +784,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
u32 in2 = append[2];
|
u32 in2 = append[2];
|
||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||||
@ -764,8 +792,15 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
|
const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -915,7 +950,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
|||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
u32 in4 = 0x80000000;
|
u32 in4 = 0x80000000;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||||
@ -923,8 +958,15 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
|||||||
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
@ -1074,7 +1116,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
u32 in3 = append[3];
|
u32 in3 = append[3];
|
||||||
u32 in4 = append[4];
|
u32 in4 = append[4];
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||||
@ -1083,8 +1125,15 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
|
|||||||
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
|
const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
const u32 tmp0 = hc_byte_perm_S (in0, 0, selector);
|
||||||
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
const u32 tmp1 = hc_byte_perm_S (in1, in0, selector);
|
||||||
|
@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
|||||||
|
|
||||||
u32 i;
|
u32 i;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||||
sc[idx++] = pw[i]
|
sc[idx++] = pw[i]
|
||||||
| hc_bytealign_be (bl[0], 0, pm4);
|
| hc_bytealign_be (bl[0], 0, pm4);
|
||||||
@ -242,8 +242,15 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
|||||||
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
|
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
int selector = (0x76543210 >> (pm4 * 4)) & 0xffff;
|
|
||||||
|
#if defined IS_NV
|
||||||
|
const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||||
sc[idx++] = pw[i]
|
sc[idx++] = pw[i]
|
||||||
@ -263,16 +270,22 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
|
|||||||
const u32 om = m % 4;
|
const u32 om = m % 4;
|
||||||
const u32 od = m / 4;
|
const u32 od = m / 4;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
|
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
|
||||||
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
|
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
|
||||||
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
|
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
|
||||||
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
|
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
int selector = (0x76543210 >> (om * 4)) & 0xffff;
|
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
|
const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8));
|
||||||
|
#endif
|
||||||
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
|
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
|
||||||
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
|
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
|
||||||
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
|
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
|
||||||
|
@ -42,13 +42,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
|||||||
u32 tmp0;
|
u32 tmp0;
|
||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp0 = hc_byte_perm (append, 0, selector);
|
tmp0 = hc_byte_perm (append, 0, selector);
|
||||||
tmp1 = hc_byte_perm (0, append, selector);
|
tmp1 = hc_byte_perm (0, append, selector);
|
||||||
|
@ -37,13 +37,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
|||||||
u32 tmp0;
|
u32 tmp0;
|
||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp0 = hc_byte_perm (append, 0, selector);
|
tmp0 = hc_byte_perm (append, 0, selector);
|
||||||
tmp1 = hc_byte_perm (0, append, selector);
|
tmp1 = hc_byte_perm (0, append, selector);
|
||||||
|
@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
u32x tmp15;
|
u32x tmp15;
|
||||||
u32x tmp16;
|
u32x tmp16;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||||
@ -71,8 +71,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||||
|
@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
u32x tmp15;
|
u32x tmp15;
|
||||||
u32x tmp16;
|
u32x tmp16;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||||
@ -69,8 +69,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||||
|
@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
u32x tmp15;
|
u32x tmp15;
|
||||||
u32x tmp16;
|
u32x tmp16;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||||
@ -68,8 +68,15 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
|||||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||||
|
@ -145,13 +145,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
|||||||
u32 tmp0;
|
u32 tmp0;
|
||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp0 = hc_byte_perm (append, 0, selector);
|
tmp0 = hc_byte_perm (append, 0, selector);
|
||||||
tmp1 = hc_byte_perm (0, append, selector);
|
tmp1 = hc_byte_perm (0, append, selector);
|
||||||
|
@ -56,13 +56,20 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
|||||||
u32 tmp0;
|
u32 tmp0;
|
||||||
u32 tmp1;
|
u32 tmp1;
|
||||||
|
|
||||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_NV
|
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||||
|
|
||||||
|
#if defined IS_NV
|
||||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if (defined IS_AMD || defined IS_HIP)
|
||||||
|
const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8));
|
||||||
|
#endif
|
||||||
|
|
||||||
tmp0 = hc_byte_perm (append, 0, selector);
|
tmp0 = hc_byte_perm (append, 0, selector);
|
||||||
tmp1 = hc_byte_perm (0, append, selector);
|
tmp1 = hc_byte_perm (0, append, selector);
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
## Improvements
|
## Improvements
|
||||||
##
|
##
|
||||||
|
|
||||||
|
- AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+
|
||||||
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
|
- Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase
|
||||||
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
|
- Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M
|
||||||
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
|
- Kernel Threads: Use warp size / wavefront size query instead of hardcoded values as base for kernel threads
|
||||||
@ -28,6 +29,7 @@
|
|||||||
## Technical
|
## Technical
|
||||||
##
|
##
|
||||||
|
|
||||||
|
- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs
|
||||||
- Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
|
- Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte
|
||||||
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
|
- Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename
|
||||||
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
|
- HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows
|
||||||
|
@ -184,7 +184,11 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
|||||||
{
|
{
|
||||||
if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
|
if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
|
||||||
{
|
{
|
||||||
hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
|
hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll -fno-experimental-new-pass-manager", hashes->salts_buf[0].salt_buf[0] & 0xfff);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
hc_asprintf (&jit_build_options, "-D _unroll -fno-experimental-new-pass-manager");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
Loading…
Reference in New Issue
Block a user