Update large switch() cases in inc_common.cl and some inline assembly common functions for devices managed with HIP backend

pull/2883/head
Jens Steube 3 years ago
parent 9c134833a6
commit cf512faa53

@ -528,7 +528,7 @@ DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32)
asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf));
#endif
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 0) & 0xff;
@ -575,7 +575,7 @@ DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32)
asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf));
#endif
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 8) & 0xff;
@ -622,7 +622,7 @@ DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32)
asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf));
#endif
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 16) & 0xff;
@ -669,7 +669,7 @@ DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32)
asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf));
#endif
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 24) & 0xff;
@ -684,7 +684,7 @@ DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32)
#if defined IS_NV && HAS_BFE == 1
asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32));
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 0) & 0xff;
@ -699,7 +699,7 @@ DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32)
#if defined IS_NV && HAS_BFE == 1
asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32));
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 8) & 0xff;
@ -714,7 +714,7 @@ DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32)
#if defined IS_NV && HAS_BFE == 1
asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32));
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 16) & 0xff;
@ -729,7 +729,7 @@ DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32)
#if defined IS_NV && HAS_BFE == 1
asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32));
//#elif defined IS_AMD && HAS_VBFE == 1
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
#else
r = (v32 >> 24) & 0xff;
@ -939,9 +939,9 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
{
#if defined _CPU_OPENCL_EMU_H
return rotl64 (a, n);
#elif defined IS_CUDA || defined IS_HIP
#elif defined IS_CUDA
return rotl64 (a, n);
#elif defined IS_AMD
#elif (defined IS_AMD || defined IS_HIP)
return rotl64 (a, n);
#else
#ifdef USE_ROTATE
@ -956,9 +956,9 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
{
#if defined _CPU_OPENCL_EMU_H
return rotr64 (a, n);
#elif defined IS_CUDA || defined IS_HIP
#elif defined IS_CUDA
return rotr64 (a, n);
#elif defined IS_AMD
#elif (defined IS_AMD || defined IS_HIP)
return rotr64 (a, n);
#else
#ifdef USE_ROTATE
@ -973,9 +973,9 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
{
#if defined _CPU_OPENCL_EMU_H
return rotl64 (a, n);
#elif defined IS_CUDA || defined IS_HIP
#elif defined IS_CUDA
return rotl64_S (a, n);
#elif defined IS_AMD
#elif (defined IS_AMD || defined IS_HIP)
return rotl64_S (a, n);
#else
#ifdef USE_ROTATE
@ -990,9 +990,9 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n)
{
#if defined _CPU_OPENCL_EMU_H
return rotr64 (a, n);
#elif defined IS_CUDA || defined IS_HIP
#elif defined IS_CUDA
return rotr64_S (a, n);
#elif defined IS_AMD
#elif (defined IS_AMD || defined IS_HIP)
return rotr64_S (a, n);
#else
#ifdef USE_ROTATE
@ -1012,7 +1012,7 @@ DECLSPEC u32x hc_swap32 (const u32x v)
#ifdef _CPU_OPENCL_EMU_H
r = byte_swap_32 (v);
#else
#if defined IS_AMD && HAS_VPERM == 1
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
const u32 m = 0x00010203;
@ -1109,7 +1109,7 @@ DECLSPEC u32 hc_swap32_S (const u32 v)
#ifdef _CPU_OPENCL_EMU_H
r = byte_swap_32 (v);
#else
#if defined IS_AMD && HAS_VPERM == 1
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
__asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203));
#elif defined IS_NV && HAS_PRMT == 1
asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
@ -1135,7 +1135,7 @@ DECLSPEC u64x hc_swap64 (const u64x v)
#ifdef _CPU_OPENCL_EMU_H
r = byte_swap_64 (v);
#else
#if defined IS_AMD && HAS_VPERM == 1
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
const u32 m = 0x00010203;
@ -1354,7 +1354,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
#ifdef _CPU_OPENCL_EMU_H
r = byte_swap_64 (v);
#else
#if defined IS_AMD && HAS_VPERM == 1
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
const u32 m = 0x00010203;
const u32 v0 = h32_from_64_S (v);
@ -1399,7 +1399,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
return r;
}
#ifdef IS_AMD
#if (defined IS_AMD || defined IS_HIP)
DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
{
@ -2767,7 +2767,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x3727);
out1[0] = hc_byte_perm (in[0], 0, 0x1707);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
@ -2805,7 +2805,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x1707);
out1[0] = hc_byte_perm (in[0], 0, 0x3727);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
@ -2843,7 +2843,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7372);
out1[0] = hc_byte_perm (in[0], 0, 0x7170);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
@ -2881,7 +2881,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
out1[1] = hc_byte_perm (in[0], 0, 0x7170);
out1[0] = hc_byte_perm (in[0], 0, 0x7372);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
@ -2915,7 +2915,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
@ -2945,7 +2945,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
@ -3069,7 +3069,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -3394,7 +3394,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -3404,7 +3404,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -3737,7 +3737,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3
{
const int offset_switch = offset / 4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -4665,7 +4665,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -4990,13 +4990,13 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -5329,7 +5329,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -5790,13 +5790,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -6265,7 +6265,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -7422,7 +7422,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -7432,7 +7432,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -8005,7 +8005,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -9690,7 +9690,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -9700,7 +9700,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -11393,7 +11393,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -12550,13 +12550,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -13721,7 +13721,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -15406,13 +15406,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -17105,7 +17105,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -21462,7 +21462,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -21472,7 +21472,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -25837,7 +25837,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -30194,13 +30194,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -36533,7 +36533,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
@ -36571,7 +36571,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
@ -36605,7 +36605,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
@ -36635,7 +36635,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
#elif defined IS_AMD && HAS_VPERM
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
@ -36660,7 +36660,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -36985,7 +36985,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -36995,7 +36995,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -37328,7 +37328,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32
{
const int offset_switch = offset / 4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -38256,7 +38256,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -38581,13 +38581,13 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -38920,7 +38920,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -39381,13 +39381,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -39856,7 +39856,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -41013,7 +41013,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -41023,7 +41023,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -41596,7 +41596,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -43281,7 +43281,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -43291,7 +43291,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -44984,7 +44984,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -46141,13 +46141,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -47312,7 +47312,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -48997,13 +48997,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif
@ -50696,7 +50696,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -55053,7 +55053,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -55063,7 +55063,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -59428,7 +59428,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
{
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
switch (offset_switch)
{
case 0:
@ -63785,13 +63785,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
#if defined IS_NV
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
#endif

@ -124,7 +124,7 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
);
#elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1
#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
__asm__ __volatile__
(
"V_SUB_U32 %0, %9, %17;"
@ -176,7 +176,7 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
);
#elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1
#elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
__asm__ __volatile__
(
"V_ADD_U32 %0, %9, %17;"

@ -781,7 +781,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
const int offset_switch = offset / 4;
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
const u32 src_r00 = src_r0[0];
const u32 src_r01 = src_r0[1];
const u32 src_r02 = src_r0[2];
@ -884,7 +884,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
}
#endif
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
const int offset_mod_4 = offset & 3;
@ -894,7 +894,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
#endif
#if defined IS_AMD
#if (defined IS_AMD || defined IS_HIP)
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
#endif
@ -1359,11 +1359,7 @@ DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c
const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
const u32 mr = ~ml;
#ifdef IS_AMD
const int p0_switch = p0 / 4;
#else
const int p0_switch = p0 / 4;
#endif
switch (p0_switch)
{
@ -1466,11 +1462,7 @@ DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const
const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
const u32 mr = ~ml;
#ifdef IS_AMD
const int p0_switch = p0 / 4;
#else
const int p0_switch = p0 / 4;
#endif
switch (p0_switch)
{
@ -1552,11 +1544,7 @@ DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons
const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
#ifdef IS_AMD
const int p0_switch = p0 / 4;
#else
const int p0_switch = p0 / 4;
#endif
switch (p0_switch)
{

@ -96,7 +96,7 @@
#elif VENDOR_ID == (1 << 8)
#define IS_AMD_USE_HIP
// TODO HIP optimization potential
#define IS_GENERIC
//#define IS_GENERIC
#else
#define IS_GENERIC
#endif

@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -139,7 +139,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -246,7 +246,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];

@ -19,7 +19,7 @@
#define KXX_DECL
#endif
#ifdef IS_AMD
#if (defined IS_AMD || defined IS_HIP)
#define KXX_DECL
#endif
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
#endif
#endif
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
/*
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC

@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -138,7 +138,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -245,7 +245,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];

@ -19,7 +19,7 @@
#define KXX_DECL
#endif
#ifdef IS_AMD
#if (defined IS_AMD || defined IS_HIP)
#define KXX_DECL
#endif
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
#endif
#endif
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
/*
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC

@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
u32 tmp4;
u32 tmp5;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];

@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -135,7 +135,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];
u32 in2 = append[2];
@ -242,7 +242,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
u32 in0 = append[0];
u32 in1 = append[1];

@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in2 = append[2];
u32 in3 = append[3];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -165,7 +165,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
u32 in2 = append[2];
u32 in3 = append[3];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -322,7 +322,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
u32 in3 = append[3];
u32 in4 = append[4];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -456,7 +456,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
u32 in3 = append[3];
u32 in4 = append[4];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
@ -756,7 +756,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in2 = append[2];
u32 in3 = append[3];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@ -915,7 +915,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
u32 in3 = append[3];
u32 in4 = 0x80000000;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
@ -1074,7 +1074,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
u32 in3 = append[3];
u32 in4 = append[4];
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);

@ -17,13 +17,15 @@
#include "inc_hash_md5.cl"
#endif
/*
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
*/
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
#define SETSHIFTEDINT(a,n,v) \
{ \

@ -15,13 +15,8 @@
#include "inc_hash_md5.cl"
#endif
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
#define SETSHIFTEDINT(a,n,v) \
{ \

@ -15,13 +15,8 @@
#include "inc_hash_md5.cl"
#endif
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
CONSTANT_VK u32a sapb_trans_tbl[256] =
{

@ -17,13 +17,8 @@
#include "inc_hash_md5.cl"
#endif
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
#define SETSHIFTEDINT(a,n,v) \
{ \

@ -15,13 +15,8 @@
#include "inc_hash_md5.cl"
#endif
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
#define SETSHIFTEDINT(a,n,v) \
{ \

@ -15,13 +15,8 @@
#include "inc_hash_md5.cl"
#endif
#ifdef IS_AMD
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
#else
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
#endif
CONSTANT_VK u32a sapb_trans_tbl[256] =
{

@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
u32 i;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
sc[idx++] = pw[i]
| hc_bytealign_be (bl[0], 0, pm4);
@ -263,7 +263,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
const u32 om = m % 4;
const u32 od = m / 4;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);

@ -42,7 +42,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0;
u32 tmp1;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif

@ -37,7 +37,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0;
u32 tmp1;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif

@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15;
u32x tmp16;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);

@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15;
u32x tmp16;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);

@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
u32x tmp15;
u32x tmp16;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);

@ -19,7 +19,7 @@
#define KXX_DECL
#endif
#ifdef IS_AMD
#if (defined IS_AMD || defined IS_HIP)
#define KXX_DECL
#endif
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
#endif
#endif
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
/*
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC

@ -145,7 +145,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0;
u32 tmp1;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif

@ -56,7 +56,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
u32 tmp0;
u32 tmp1;
#if defined IS_AMD || defined IS_GENERIC
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
tmp0 = hc_bytealign_be (0, append, func_len);
tmp1 = hc_bytealign_be (append, 0, func_len);
#endif

@ -8339,17 +8339,17 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
device_param->has_mov64 = false;
device_param->has_prmt = false;
device_param->has_vadd = false;
device_param->has_vaddc = false;
device_param->has_vadd_co = false;
device_param->has_vaddc_co = false;
device_param->has_vsub = false;
device_param->has_vsubb = false;
device_param->has_vsub_co = false;
device_param->has_vsubb_co = false;
device_param->has_vadd3 = false;
device_param->has_vbfe = false;
device_param->has_vperm = false;
device_param->has_vadd = true;
device_param->has_vaddc = true;
device_param->has_vadd_co = true;
device_param->has_vaddc_co = true;
device_param->has_vsub = true;
device_param->has_vsubb = true;
device_param->has_vsub_co = true;
device_param->has_vsubb_co = true;
device_param->has_vadd3 = true;
device_param->has_vbfe = true;
device_param->has_vperm = true;
// device_available_mem

Loading…
Cancel
Save