mirror of
https://github.com/hashcat/hashcat.git
synced 2025-02-22 12:32:04 +00:00
commit
141b59b80e
@ -528,7 +528,7 @@ DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32)
|
||||
asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf));
|
||||
#endif
|
||||
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 0) & 0xff;
|
||||
@ -575,7 +575,7 @@ DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32)
|
||||
asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf));
|
||||
#endif
|
||||
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 8) & 0xff;
|
||||
@ -622,7 +622,7 @@ DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32)
|
||||
asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf));
|
||||
#endif
|
||||
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 16) & 0xff;
|
||||
@ -669,7 +669,7 @@ DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32)
|
||||
asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf));
|
||||
#endif
|
||||
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 24) & 0xff;
|
||||
@ -684,7 +684,7 @@ DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32)
|
||||
|
||||
#if defined IS_NV && HAS_BFE == 1
|
||||
asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32));
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 0) & 0xff;
|
||||
@ -699,7 +699,7 @@ DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32)
|
||||
|
||||
#if defined IS_NV && HAS_BFE == 1
|
||||
asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32));
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 8) & 0xff;
|
||||
@ -714,7 +714,7 @@ DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32)
|
||||
|
||||
#if defined IS_NV && HAS_BFE == 1
|
||||
asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32));
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 16) & 0xff;
|
||||
@ -729,7 +729,7 @@ DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32)
|
||||
|
||||
#if defined IS_NV && HAS_BFE == 1
|
||||
asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32));
|
||||
//#elif defined IS_AMD && HAS_VBFE == 1
|
||||
//#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1
|
||||
//__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32));
|
||||
#else
|
||||
r = (v32 >> 24) & 0xff;
|
||||
@ -879,7 +879,7 @@ DECLSPEC u32x hc_rotl32 (const u32x a, const int n)
|
||||
{
|
||||
#if defined _CPU_OPENCL_EMU_H
|
||||
return rotl32 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
#elif defined IS_CUDA || defined IS_HIP
|
||||
return rotl32 (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -894,7 +894,7 @@ DECLSPEC u32x hc_rotr32 (const u32x a, const int n)
|
||||
{
|
||||
#if defined _CPU_OPENCL_EMU_H
|
||||
return rotr32 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
#elif defined IS_CUDA || defined IS_HIP
|
||||
return rotr32 (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -909,7 +909,7 @@ DECLSPEC u32 hc_rotl32_S (const u32 a, const int n)
|
||||
{
|
||||
#if defined _CPU_OPENCL_EMU_H
|
||||
return rotl32 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
#elif defined IS_CUDA || defined IS_HIP
|
||||
return rotl32_S (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -924,7 +924,7 @@ DECLSPEC u32 hc_rotr32_S (const u32 a, const int n)
|
||||
{
|
||||
#if defined _CPU_OPENCL_EMU_H
|
||||
return rotr32 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
#elif defined IS_CUDA || defined IS_HIP
|
||||
return rotr32_S (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -941,7 +941,7 @@ DECLSPEC u64x hc_rotl64 (const u64x a, const int n)
|
||||
return rotl64 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
return rotl64 (a, n);
|
||||
#elif defined IS_AMD
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
return rotl64 (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -958,7 +958,7 @@ DECLSPEC u64x hc_rotr64 (const u64x a, const int n)
|
||||
return rotr64 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
return rotr64 (a, n);
|
||||
#elif defined IS_AMD
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
return rotr64 (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -975,7 +975,7 @@ DECLSPEC u64 hc_rotl64_S (const u64 a, const int n)
|
||||
return rotl64 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
return rotl64_S (a, n);
|
||||
#elif defined IS_AMD
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
return rotl64_S (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -992,7 +992,7 @@ DECLSPEC u64 hc_rotr64_S (const u64 a, const int n)
|
||||
return rotr64 (a, n);
|
||||
#elif defined IS_CUDA
|
||||
return rotr64_S (a, n);
|
||||
#elif defined IS_AMD
|
||||
#elif (defined IS_AMD || defined IS_HIP)
|
||||
return rotr64_S (a, n);
|
||||
#else
|
||||
#ifdef USE_ROTATE
|
||||
@ -1012,7 +1012,7 @@ DECLSPEC u32x hc_swap32 (const u32x v)
|
||||
#ifdef _CPU_OPENCL_EMU_H
|
||||
r = byte_swap_32 (v);
|
||||
#else
|
||||
#if defined IS_AMD && HAS_VPERM == 1
|
||||
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
|
||||
const u32 m = 0x00010203;
|
||||
|
||||
@ -1109,7 +1109,7 @@ DECLSPEC u32 hc_swap32_S (const u32 v)
|
||||
#ifdef _CPU_OPENCL_EMU_H
|
||||
r = byte_swap_32 (v);
|
||||
#else
|
||||
#if defined IS_AMD && HAS_VPERM == 1
|
||||
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
__asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203));
|
||||
#elif defined IS_NV && HAS_PRMT == 1
|
||||
asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v));
|
||||
@ -1135,7 +1135,7 @@ DECLSPEC u64x hc_swap64 (const u64x v)
|
||||
#ifdef _CPU_OPENCL_EMU_H
|
||||
r = byte_swap_64 (v);
|
||||
#else
|
||||
#if defined IS_AMD && HAS_VPERM == 1
|
||||
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
|
||||
const u32 m = 0x00010203;
|
||||
|
||||
@ -1354,7 +1354,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
|
||||
#ifdef _CPU_OPENCL_EMU_H
|
||||
r = byte_swap_64 (v);
|
||||
#else
|
||||
#if defined IS_AMD && HAS_VPERM == 1
|
||||
#if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1
|
||||
const u32 m = 0x00010203;
|
||||
|
||||
const u32 v0 = h32_from_64_S (v);
|
||||
@ -1399,7 +1399,7 @@ DECLSPEC u64 hc_swap64_S (const u64 v)
|
||||
return r;
|
||||
}
|
||||
|
||||
#ifdef IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
|
||||
DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
@ -2767,7 +2767,7 @@ DECLSPEC void make_utf16be (const u32x *in, u32x *out1, u32x *out2)
|
||||
out1[1] = hc_byte_perm (in[0], 0, 0x3727);
|
||||
out1[0] = hc_byte_perm (in[0], 0, 0x1707);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm (in[3], 0, 0x03070207);
|
||||
out2[2] = hc_byte_perm (in[3], 0, 0x01070007);
|
||||
@ -2805,7 +2805,7 @@ DECLSPEC void make_utf16beN (const u32x *in, u32x *out1, u32x *out2)
|
||||
out1[1] = hc_byte_perm (in[0], 0, 0x1707);
|
||||
out1[0] = hc_byte_perm (in[0], 0, 0x3727);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm (in[3], 0, 0x01070007);
|
||||
out2[2] = hc_byte_perm (in[3], 0, 0x03070207);
|
||||
@ -2843,7 +2843,7 @@ DECLSPEC void make_utf16le (const u32x *in, u32x *out1, u32x *out2)
|
||||
out1[1] = hc_byte_perm (in[0], 0, 0x7372);
|
||||
out1[0] = hc_byte_perm (in[0], 0, 0x7170);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm (in[3], 0, 0x07030702);
|
||||
out2[2] = hc_byte_perm (in[3], 0, 0x07010700);
|
||||
@ -2881,7 +2881,7 @@ DECLSPEC void make_utf16leN (const u32x *in, u32x *out1, u32x *out2)
|
||||
out1[1] = hc_byte_perm (in[0], 0, 0x7170);
|
||||
out1[0] = hc_byte_perm (in[0], 0, 0x7372);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm (in[3], 0, 0x07010700);
|
||||
out2[2] = hc_byte_perm (in[3], 0, 0x07030702);
|
||||
@ -2915,7 +2915,7 @@ DECLSPEC void undo_utf16be (const u32x *in1, const u32x *in2, u32x *out)
|
||||
out[2] = hc_byte_perm (in2[0], in2[1], 0x4602);
|
||||
out[3] = hc_byte_perm (in2[2], in2[3], 0x4602);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002);
|
||||
out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002);
|
||||
@ -2945,7 +2945,7 @@ DECLSPEC void undo_utf16le (const u32x *in1, const u32x *in2, u32x *out)
|
||||
out[2] = hc_byte_perm (in2[0], in2[1], 0x6420);
|
||||
out[3] = hc_byte_perm (in2[2], in2[3], 0x6420);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && defined HAS_VPERM
|
||||
|
||||
out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200);
|
||||
out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200);
|
||||
@ -3069,7 +3069,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -3394,7 +3394,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -3404,7 +3404,7 @@ DECLSPEC void switch_buffer_by_offset_le (u32x *w0, u32x *w1, u32x *w2, u32x *w3
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -3737,7 +3737,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le (u32x *w0, u32x *w1, u32x *w2, u3
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -4665,7 +4665,7 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -4990,13 +4990,13 @@ DECLSPEC void switch_buffer_by_offset_be (u32x *w0, u32x *w1, u32x *w2, u32x *w3
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -5329,7 +5329,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -5790,13 +5790,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be (u32x *w0, u32x *w1, u32x *w2, u3
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -6265,7 +6265,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -7422,7 +7422,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -7432,7 +7432,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (u32x *w0, u32x *w1, u32x *w2, u32x
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -8005,7 +8005,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -9690,7 +9690,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -9700,7 +9700,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (u32x *w0, u32x *w1, u32x *w2
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -11393,7 +11393,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -12550,13 +12550,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (u32x *w0, u32x *w1, u32x *w2, u32x
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -13721,7 +13721,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -15406,13 +15406,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (u32x *w0, u32x *w1, u32x *w2
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -17105,7 +17105,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -21462,7 +21462,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -21472,7 +21472,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (u32x *w, const u32 offset)
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -25837,7 +25837,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -30194,13 +30194,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (u32x *w, const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -36533,7 +36533,7 @@ DECLSPEC void make_utf16be_S (const u32 *in, u32 *out1, u32 *out2)
|
||||
out1[1] = hc_byte_perm_S (in[0], 0, 0x3727);
|
||||
out1[0] = hc_byte_perm_S (in[0], 0, 0x1707);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207);
|
||||
out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007);
|
||||
@ -36571,7 +36571,7 @@ DECLSPEC void make_utf16le_S (const u32 *in, u32 *out1, u32 *out2)
|
||||
out1[1] = hc_byte_perm_S (in[0], 0, 0x7372);
|
||||
out1[0] = hc_byte_perm_S (in[0], 0, 0x7170);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
||||
|
||||
out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702);
|
||||
out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700);
|
||||
@ -36605,7 +36605,7 @@ DECLSPEC void undo_utf16be_S (const u32 *in1, const u32 *in2, u32 *out)
|
||||
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602);
|
||||
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
||||
|
||||
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002);
|
||||
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002);
|
||||
@ -36635,7 +36635,7 @@ DECLSPEC void undo_utf16le_S (const u32 *in1, const u32 *in2, u32 *out)
|
||||
out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420);
|
||||
out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420);
|
||||
|
||||
#elif defined IS_AMD && HAS_VPERM
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM
|
||||
|
||||
out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200);
|
||||
out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200);
|
||||
@ -36660,7 +36660,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -36985,7 +36985,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -36995,7 +36995,7 @@ DECLSPEC void switch_buffer_by_offset_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -37328,7 +37328,7 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (u32 *w0, u32 *w1, u32 *w2, u32
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -38256,7 +38256,7 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -38581,13 +38581,13 @@ DECLSPEC void switch_buffer_by_offset_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *w3,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -38920,7 +38920,7 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -39381,13 +39381,13 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (u32 *w0, u32 *w1, u32 *w2, u32
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -39856,7 +39856,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -41013,7 +41013,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -41023,7 +41023,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (u32 *w0, u32 *w1, u32 *w2, u32 *
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -41596,7 +41596,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -43281,7 +43281,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -43291,7 +43291,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (u32 *w0, u32 *w1, u32 *w2,
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -44984,7 +44984,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -46141,13 +46141,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (u32 *w0, u32 *w1, u32 *w2, u32 *
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -47312,7 +47312,7 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -48997,13 +48997,13 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (u32 *w0, u32 *w1, u32 *w2,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
@ -50696,7 +50696,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -55053,7 +55053,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -55063,7 +55063,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (u32 *w, const u32 offset)
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -59428,7 +59428,7 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
|
||||
{
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
@ -63785,13 +63785,13 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (u32 *w, const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8));
|
||||
#endif
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
* - P19: Type of the esalt_bufs structure with additional data, or void.
|
||||
*/
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#define KERN_ATTR(p2,p4,p5,p6,p19) \
|
||||
MAYBE_UNUSED GLOBAL_AS pw_t *pws, \
|
||||
MAYBE_UNUSED p2 const kernel_rule_t *g_rules_buf, \
|
||||
@ -113,7 +113,7 @@
|
||||
* do not use rules or tmps, etc.
|
||||
*/
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
#define KERN_ATTR_BASIC() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, void)
|
||||
#define KERN_ATTR_BITSLICE() KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bs_word_t *g_words_buf_s, void, void, void)
|
||||
#define KERN_ATTR_ESALT(e) KERN_ATTR (GLOBAL_AS, GLOBAL_AS const bf_t *g_bfs_buf, void, void, e)
|
||||
|
@ -124,7 +124,7 @@ DECLSPEC u32 sub (u32 *r, const u32 *a, const u32 *b)
|
||||
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
||||
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
||||
);
|
||||
#elif defined IS_AMD && HAS_VSUB == 1 && HAS_VSUBB == 1
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VSUB == 1 && HAS_VSUBB == 1
|
||||
__asm__ __volatile__
|
||||
(
|
||||
"V_SUB_U32 %0, %9, %17;"
|
||||
@ -176,7 +176,7 @@ DECLSPEC u32 add (u32 *r, const u32 *a, const u32 *b)
|
||||
: "r"(a[0]), "r"(a[1]), "r"(a[2]), "r"(a[3]), "r"(a[4]), "r"(a[5]), "r"(a[6]), "r"(a[7]),
|
||||
"r"(b[0]), "r"(b[1]), "r"(b[2]), "r"(b[3]), "r"(b[4]), "r"(b[5]), "r"(b[6]), "r"(b[7])
|
||||
);
|
||||
#elif defined IS_AMD && HAS_VADD == 1 && HAS_VADDC == 1
|
||||
#elif (defined IS_AMD || defined IS_HIP) && HAS_VADD == 1 && HAS_VADDC == 1
|
||||
__asm__ __volatile__
|
||||
(
|
||||
"V_ADD_U32 %0, %9, %17;"
|
||||
|
@ -60,7 +60,7 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
|
||||
#if ATTACK_EXEC == 11
|
||||
|
||||
@ -164,6 +164,143 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
||||
#define SYNC_THREADS() __syncthreads ()
|
||||
#endif
|
||||
|
||||
#if defined IS_HIP
|
||||
|
||||
#if ATTACK_EXEC == 11
|
||||
|
||||
CONSTANT_VK u32 generic_constant[8192] __attribute__((used)); // 32k
|
||||
|
||||
#if ATTACK_KERN == 0
|
||||
#define bfs_buf g_bfs_buf
|
||||
#define rules_buf ((const kernel_rule_t *) generic_constant)
|
||||
#define words_buf_s g_words_buf_s
|
||||
#define words_buf_r g_words_buf_r
|
||||
#elif ATTACK_KERN == 1
|
||||
#define bfs_buf g_bfs_buf
|
||||
#define rules_buf g_rules_buf
|
||||
#define words_buf_s g_words_buf_s
|
||||
#define words_buf_r g_words_buf_r
|
||||
#elif ATTACK_KERN == 3
|
||||
#define rules_buf g_rules_buf
|
||||
#define bfs_buf ((const bf_t *) generic_constant)
|
||||
#define words_buf_s ((const bs_word_t *) generic_constant)
|
||||
#define words_buf_r ((const u32x *) generic_constant)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
DECLSPEC u32 hc_atomic_dec (GLOBAL_AS u32 *p)
|
||||
{
|
||||
volatile const u32 val = 1;
|
||||
|
||||
return atomicSub (p, val);
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_atomic_inc (GLOBAL_AS u32 *p)
|
||||
{
|
||||
volatile const u32 val = 1;
|
||||
|
||||
return atomicAdd (p, val);
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_atomic_or (GLOBAL_AS u32 *p, volatile const u32 val)
|
||||
{
|
||||
return atomicOr (p, val);
|
||||
}
|
||||
|
||||
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)))
|
||||
{
|
||||
return (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
}
|
||||
|
||||
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)))
|
||||
{
|
||||
return threadIdx.x;
|
||||
}
|
||||
|
||||
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)))
|
||||
{
|
||||
// verify
|
||||
return blockDim.x;
|
||||
}
|
||||
|
||||
DECLSPEC u32x rotl32 (const u32x a, const int n)
|
||||
{
|
||||
return ((a << n) | ((a >> (32 - n))));
|
||||
}
|
||||
|
||||
DECLSPEC u32x rotr32 (const u32x a, const int n)
|
||||
{
|
||||
return ((a >> n) | ((a << (32 - n))));
|
||||
}
|
||||
|
||||
DECLSPEC u32 rotl32_S (const u32 a, const int n)
|
||||
{
|
||||
return ((a << n) | ((a >> (32 - n))));
|
||||
}
|
||||
|
||||
DECLSPEC u32 rotr32_S (const u32 a, const int n)
|
||||
{
|
||||
return ((a >> n) | ((a << (32 - n))));
|
||||
}
|
||||
|
||||
DECLSPEC u64x rotl64 (const u64x a, const int n)
|
||||
{
|
||||
return rotr64 (a, 64 - n);
|
||||
}
|
||||
|
||||
DECLSPEC u32 amd_bitalign_S (const u32 a, const u32 b, const int n)
|
||||
{
|
||||
u32 r = 0;
|
||||
|
||||
__asm__ ("V_ALIGNBIT_B32 %0, %1, %2, %3;" : "=v"(r): "v"(a), "v"(b), "I"(n));
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u64x rotr64 (const u64x a, const int n)
|
||||
{
|
||||
#if VECT_SIZE == 1
|
||||
return rotr64_S (a, n);
|
||||
#else
|
||||
return ((a >> n) | ((a << (64 - n))));
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC u64 rotl64_S (const u64 a, const int n)
|
||||
{
|
||||
return rotr64_S (a, 64 - n);
|
||||
}
|
||||
|
||||
DECLSPEC u64 rotr64_S (const u64 a, const int n)
|
||||
{
|
||||
vconv64_t in;
|
||||
|
||||
in.v64 = a;
|
||||
|
||||
const u32 a0 = in.v32.a;
|
||||
const u32 a1 = in.v32.b;
|
||||
|
||||
vconv64_t out;
|
||||
|
||||
if (n < 32)
|
||||
{
|
||||
out.v32.a = amd_bitalign_S (a1, a0, n);
|
||||
out.v32.b = amd_bitalign_S (a0, a1, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
out.v32.a = amd_bitalign_S (a0, a1, n - 32);
|
||||
out.v32.b = amd_bitalign_S (a1, a0, n - 32);
|
||||
}
|
||||
|
||||
return out.v64;
|
||||
}
|
||||
|
||||
#define FIXED_THREAD_COUNT(n) __launch_bounds__((n), 0)
|
||||
#define SYNC_THREADS() __syncthreads ()
|
||||
#endif
|
||||
|
||||
#ifdef IS_OPENCL
|
||||
|
||||
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p)
|
||||
|
@ -43,4 +43,26 @@ DECLSPEC u64 rotr64_S (const u64 a, const int n);
|
||||
#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
|
||||
#endif
|
||||
|
||||
#ifdef IS_HIP
|
||||
DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p);
|
||||
DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p);
|
||||
DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val);
|
||||
|
||||
DECLSPEC size_t get_global_id (const u32 dimindx __attribute__((unused)));
|
||||
DECLSPEC size_t get_local_id (const u32 dimindx __attribute__((unused)));
|
||||
DECLSPEC size_t get_local_size (const u32 dimindx __attribute__((unused)));
|
||||
|
||||
DECLSPEC u32x rotl32 (const u32x a, const int n);
|
||||
DECLSPEC u32x rotr32 (const u32x a, const int n);
|
||||
DECLSPEC u32 rotl32_S (const u32 a, const int n);
|
||||
DECLSPEC u32 rotr32_S (const u32 a, const int n);
|
||||
DECLSPEC u64x rotl64 (const u64x a, const int n);
|
||||
DECLSPEC u64x rotr64 (const u64x a, const int n);
|
||||
DECLSPEC u64 rotl64_S (const u64 a, const int n);
|
||||
DECLSPEC u64 rotr64_S (const u64 a, const int n);
|
||||
|
||||
//#define rotate(a,n) (((a) << (n)) | ((a) >> (32 - (n))))
|
||||
#define bitselect(a,b,c) ((a) ^ ((c) & ((b) ^ (a))))
|
||||
#endif
|
||||
|
||||
#endif // _INC_PLATFORM_H
|
||||
|
@ -781,7 +781,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
|
||||
|
||||
const int offset_switch = offset / 4;
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC
|
||||
const u32 src_r00 = src_r0[0];
|
||||
const u32 src_r01 = src_r0[1];
|
||||
const u32 src_r02 = src_r0[2];
|
||||
@ -884,7 +884,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (defined IS_AMD && HAS_VPERM == 1) || defined IS_NV
|
||||
#if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV
|
||||
|
||||
const int offset_mod_4 = offset & 3;
|
||||
|
||||
@ -894,7 +894,7 @@ DECLSPEC void append_block8_optimized (const u32 offset, u32 *buf0, u32 *buf1, c
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8));
|
||||
#endif
|
||||
|
||||
@ -1359,11 +1359,7 @@ DECLSPEC u32 rule_op_mangle_delete_at (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED c
|
||||
const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
|
||||
const u32 mr = ~ml;
|
||||
|
||||
#ifdef IS_AMD
|
||||
const int p0_switch = p0 / 4;
|
||||
#else
|
||||
const int p0_switch = p0 / 4;
|
||||
#endif
|
||||
|
||||
switch (p0_switch)
|
||||
{
|
||||
@ -1466,11 +1462,7 @@ DECLSPEC u32 rule_op_mangle_omit (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const
|
||||
const u32 ml = (1 << ((p0 & 3) * 8)) - 1;
|
||||
const u32 mr = ~ml;
|
||||
|
||||
#ifdef IS_AMD
|
||||
const int p0_switch = p0 / 4;
|
||||
#else
|
||||
const int p0_switch = p0 / 4;
|
||||
#endif
|
||||
|
||||
switch (p0_switch)
|
||||
{
|
||||
@ -1552,11 +1544,7 @@ DECLSPEC u32 rule_op_mangle_insert (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED cons
|
||||
|
||||
const u32 mr = 0xffffff00 << ((p0 & 3) * 8);
|
||||
|
||||
#ifdef IS_AMD
|
||||
const int p0_switch = p0 / 4;
|
||||
#else
|
||||
const int p0_switch = p0 / 4;
|
||||
#endif
|
||||
|
||||
switch (p0_switch)
|
||||
{
|
||||
|
@ -68,7 +68,7 @@ typedef u64 u64x;
|
||||
#define make_u64x (u64)
|
||||
|
||||
#else
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
|
||||
#if VECT_SIZE == 2
|
||||
|
||||
@ -824,6 +824,766 @@ typedef __device_builtin__ struct u64x u64x;
|
||||
#define make_u32x u32x
|
||||
#define make_u64x u64x
|
||||
|
||||
#elif defined IS_HIP
|
||||
|
||||
// seems to work, but slow
|
||||
|
||||
/*
|
||||
#if VECT_SIZE == 2
|
||||
|
||||
struct u8x
|
||||
{
|
||||
u8 s0;
|
||||
u8 s1;
|
||||
|
||||
inline __device__ u8x (const u8 a, const u8 b) : s0(a), s1(b) { }
|
||||
inline __device__ u8x (const u8 a) : s0(a), s1(a) { }
|
||||
|
||||
inline __device__ u8x (void) : s0(0), s1(0) { }
|
||||
inline __device__ ~u8x (void) { }
|
||||
};
|
||||
|
||||
struct u16x
|
||||
{
|
||||
u16 s0;
|
||||
u16 s1;
|
||||
|
||||
inline __device__ u16x (const u16 a, const u16 b) : s0(a), s1(b) { }
|
||||
inline __device__ u16x (const u16 a) : s0(a), s1(a) { }
|
||||
|
||||
inline __device__ u16x (void) : s0(0), s1(0) { }
|
||||
inline __device__ ~u16x (void) { }
|
||||
};
|
||||
|
||||
struct u32x
|
||||
{
|
||||
u32 s0;
|
||||
u32 s1;
|
||||
|
||||
inline __device__ u32x (const u32 a, const u32 b) : s0(a), s1(b) { }
|
||||
inline __device__ u32x (const u32 a) : s0(a), s1(a) { }
|
||||
|
||||
inline __device__ u32x (void) : s0(0), s1(0) { }
|
||||
inline __device__ ~u32x (void) { }
|
||||
};
|
||||
|
||||
struct u64x
|
||||
{
|
||||
u64 s0;
|
||||
u64 s1;
|
||||
|
||||
inline __device__ u64x (const u64 a, const u64 b) : s0(a), s1(b) { }
|
||||
inline __device__ u64x (const u64 a) : s0(a), s1(a) { }
|
||||
|
||||
inline __device__ u64x (void) : s0(0), s1(0) { }
|
||||
inline __device__ ~u64x (void) { }
|
||||
};
|
||||
|
||||
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b)); }
|
||||
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
|
||||
|
||||
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; }
|
||||
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; }
|
||||
|
||||
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; }
|
||||
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; }
|
||||
|
||||
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; }
|
||||
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; }
|
||||
|
||||
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; }
|
||||
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; }
|
||||
|
||||
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; }
|
||||
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; }
|
||||
|
||||
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; }
|
||||
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; }
|
||||
|
||||
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; }
|
||||
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; }
|
||||
|
||||
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; }
|
||||
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; }
|
||||
|
||||
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) ); }
|
||||
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1)); }
|
||||
|
||||
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) ); }
|
||||
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1)); }
|
||||
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) ); }
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); }
|
||||
|
||||
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) ); }
|
||||
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1)); }
|
||||
|
||||
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) ); }
|
||||
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1)); }
|
||||
|
||||
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) ); }
|
||||
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1)); }
|
||||
|
||||
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) ); }
|
||||
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1)); }
|
||||
|
||||
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) ); }
|
||||
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1)); }
|
||||
|
||||
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) ); }
|
||||
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1)); }
|
||||
|
||||
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1); }
|
||||
|
||||
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b)); }
|
||||
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1)); }
|
||||
|
||||
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; }
|
||||
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; }
|
||||
|
||||
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; }
|
||||
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; }
|
||||
|
||||
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; }
|
||||
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; }
|
||||
|
||||
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; }
|
||||
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; }
|
||||
|
||||
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; }
|
||||
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; }
|
||||
|
||||
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; }
|
||||
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; }
|
||||
|
||||
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; }
|
||||
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; }
|
||||
|
||||
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; }
|
||||
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; }
|
||||
|
||||
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) ); }
|
||||
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1)); }
|
||||
|
||||
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) ); }
|
||||
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1)); }
|
||||
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) ); }
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1)); }
|
||||
|
||||
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) ); }
|
||||
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1)); }
|
||||
|
||||
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) ); }
|
||||
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1)); }
|
||||
|
||||
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) ); }
|
||||
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1)); }
|
||||
|
||||
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) ); }
|
||||
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1)); }
|
||||
|
||||
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) ); }
|
||||
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1)); }
|
||||
|
||||
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) ); }
|
||||
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1)); }
|
||||
|
||||
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1); }
|
||||
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 4
|
||||
|
||||
struct u8x
|
||||
{
|
||||
u8 s0;
|
||||
u8 s1;
|
||||
u8 s2;
|
||||
u8 s3;
|
||||
|
||||
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d) : s0(a), s1(b), s2(c), s3(d) { }
|
||||
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a) { }
|
||||
|
||||
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0) { }
|
||||
inline __device__ ~u8x (void) { }
|
||||
};
|
||||
|
||||
struct u16x
|
||||
{
|
||||
u16 s0;
|
||||
u16 s1;
|
||||
u16 s2;
|
||||
u16 s3;
|
||||
|
||||
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d) : s0(a), s1(b), s2(c), s3(d) { }
|
||||
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a) { }
|
||||
|
||||
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0) { }
|
||||
inline __device__ ~u16x (void) { }
|
||||
};
|
||||
|
||||
struct u32x
|
||||
{
|
||||
u32 s0;
|
||||
u32 s1;
|
||||
u32 s2;
|
||||
u32 s3;
|
||||
|
||||
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d) : s0(a), s1(b), s2(c), s3(d) { }
|
||||
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a) { }
|
||||
|
||||
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0) { }
|
||||
inline __device__ ~u32x (void) { }
|
||||
};
|
||||
|
||||
struct u64x
|
||||
{
|
||||
u64 s0;
|
||||
u64 s1;
|
||||
u64 s2;
|
||||
u64 s3;
|
||||
|
||||
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d) : s0(a), s1(b), s2(c), s3(d) { }
|
||||
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a) { }
|
||||
|
||||
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0) { }
|
||||
inline __device__ ~u64x (void) { }
|
||||
};
|
||||
|
||||
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); }
|
||||
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
|
||||
|
||||
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; }
|
||||
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; }
|
||||
|
||||
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; }
|
||||
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; }
|
||||
|
||||
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; }
|
||||
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; }
|
||||
|
||||
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; }
|
||||
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; }
|
||||
|
||||
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; }
|
||||
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; }
|
||||
|
||||
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; }
|
||||
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; }
|
||||
|
||||
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; }
|
||||
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; }
|
||||
|
||||
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; }
|
||||
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; }
|
||||
|
||||
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); }
|
||||
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); }
|
||||
|
||||
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); }
|
||||
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); }
|
||||
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); }
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); }
|
||||
|
||||
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); }
|
||||
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); }
|
||||
|
||||
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); }
|
||||
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); }
|
||||
|
||||
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); }
|
||||
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); }
|
||||
|
||||
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); }
|
||||
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); }
|
||||
|
||||
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); }
|
||||
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); }
|
||||
|
||||
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); }
|
||||
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); }
|
||||
|
||||
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
|
||||
|
||||
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) ); }
|
||||
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3)); }
|
||||
|
||||
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; }
|
||||
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; }
|
||||
|
||||
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; }
|
||||
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; }
|
||||
|
||||
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; }
|
||||
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; }
|
||||
|
||||
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; }
|
||||
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; }
|
||||
|
||||
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; }
|
||||
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; }
|
||||
|
||||
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; }
|
||||
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; }
|
||||
|
||||
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; }
|
||||
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; }
|
||||
|
||||
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; }
|
||||
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; }
|
||||
|
||||
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) ); }
|
||||
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3)); }
|
||||
|
||||
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) ); }
|
||||
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3)); }
|
||||
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) ); }
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3)); }
|
||||
|
||||
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) ); }
|
||||
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3)); }
|
||||
|
||||
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) ); }
|
||||
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3)); }
|
||||
|
||||
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) ); }
|
||||
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3)); }
|
||||
|
||||
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) ); }
|
||||
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3)); }
|
||||
|
||||
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) ); }
|
||||
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3)); }
|
||||
|
||||
inline __device__ u64x operator % (const u64x a, const u32 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) ); }
|
||||
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3)); }
|
||||
|
||||
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3); }
|
||||
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 8
|
||||
|
||||
struct u8x
|
||||
{
|
||||
u8 s0;
|
||||
u8 s1;
|
||||
u8 s2;
|
||||
u8 s3;
|
||||
u8 s4;
|
||||
u8 s5;
|
||||
u8 s6;
|
||||
u8 s7;
|
||||
|
||||
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
|
||||
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
|
||||
|
||||
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
|
||||
inline __device__ ~u8x (void) { }
|
||||
};
|
||||
|
||||
struct u16x
|
||||
{
|
||||
u16 s0;
|
||||
u16 s1;
|
||||
u16 s2;
|
||||
u16 s3;
|
||||
u16 s4;
|
||||
u16 s5;
|
||||
u16 s6;
|
||||
u16 s7;
|
||||
|
||||
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
|
||||
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
|
||||
|
||||
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
|
||||
inline __device__ ~u16x (void) { }
|
||||
};
|
||||
|
||||
struct u32x
|
||||
{
|
||||
u32 s0;
|
||||
u32 s1;
|
||||
u32 s2;
|
||||
u32 s3;
|
||||
u32 s4;
|
||||
u32 s5;
|
||||
u32 s6;
|
||||
u32 s7;
|
||||
|
||||
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
|
||||
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
|
||||
|
||||
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
|
||||
inline __device__ ~u32x (void) { }
|
||||
};
|
||||
|
||||
struct u64x
|
||||
{
|
||||
u64 s0;
|
||||
u64 s1;
|
||||
u64 s2;
|
||||
u64 s3;
|
||||
u64 s4;
|
||||
u64 s5;
|
||||
u64 s6;
|
||||
u64 s7;
|
||||
|
||||
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h) { }
|
||||
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a) { }
|
||||
|
||||
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0) { }
|
||||
inline __device__ ~u64x (void) { }
|
||||
};
|
||||
|
||||
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); }
|
||||
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
|
||||
|
||||
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; }
|
||||
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; }
|
||||
|
||||
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; }
|
||||
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; }
|
||||
|
||||
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; }
|
||||
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; }
|
||||
|
||||
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; }
|
||||
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; }
|
||||
|
||||
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; }
|
||||
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; }
|
||||
|
||||
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; }
|
||||
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; }
|
||||
|
||||
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; }
|
||||
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; }
|
||||
|
||||
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; }
|
||||
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; }
|
||||
|
||||
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); }
|
||||
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); }
|
||||
|
||||
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); }
|
||||
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); }
|
||||
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); }
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); }
|
||||
|
||||
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); }
|
||||
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); }
|
||||
|
||||
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); }
|
||||
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); }
|
||||
|
||||
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); }
|
||||
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); }
|
||||
|
||||
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); }
|
||||
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); }
|
||||
|
||||
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); }
|
||||
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); }
|
||||
|
||||
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); }
|
||||
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); }
|
||||
|
||||
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
|
||||
|
||||
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) ); }
|
||||
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7)); }
|
||||
|
||||
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; }
|
||||
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; }
|
||||
|
||||
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; }
|
||||
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; }
|
||||
|
||||
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; }
|
||||
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; }
|
||||
|
||||
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; }
|
||||
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; }
|
||||
|
||||
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; }
|
||||
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; }
|
||||
|
||||
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; }
|
||||
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; }
|
||||
|
||||
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; }
|
||||
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; }
|
||||
|
||||
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; }
|
||||
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; }
|
||||
|
||||
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b) ); }
|
||||
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7)); }
|
||||
|
||||
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b) ); }
|
||||
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7)); }
|
||||
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b) ); }
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7)); }
|
||||
|
||||
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b) ); }
|
||||
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7)); }
|
||||
|
||||
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b) ); }
|
||||
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7)); }
|
||||
|
||||
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b) ); }
|
||||
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7)); }
|
||||
|
||||
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b) ); }
|
||||
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7)); }
|
||||
|
||||
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b) ); }
|
||||
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7)); }
|
||||
|
||||
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b) ); }
|
||||
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7)); }
|
||||
|
||||
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7); }
|
||||
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 16
|
||||
|
||||
struct u8x
|
||||
{
|
||||
u8 s0;
|
||||
u8 s1;
|
||||
u8 s2;
|
||||
u8 s3;
|
||||
u8 s4;
|
||||
u8 s5;
|
||||
u8 s6;
|
||||
u8 s7;
|
||||
u8 s8;
|
||||
u8 s9;
|
||||
u8 sa;
|
||||
u8 sb;
|
||||
u8 sc;
|
||||
u8 sd;
|
||||
u8 se;
|
||||
u8 sf;
|
||||
|
||||
inline __device__ u8x (const u8 a, const u8 b, const u8 c, const u8 d, const u8 e, const u8 f, const u8 g, const u8 h, const u8 i, const u8 j, const u8 k, const u8 l, const u8 m, const u8 n, const u8 o, const u8 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
|
||||
inline __device__ u8x (const u8 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
|
||||
|
||||
inline __device__ u8x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
|
||||
inline __device__ ~u8x (void) { }
|
||||
};
|
||||
|
||||
struct u16x
|
||||
{
|
||||
u16 s0;
|
||||
u16 s1;
|
||||
u16 s2;
|
||||
u16 s3;
|
||||
u16 s4;
|
||||
u16 s5;
|
||||
u16 s6;
|
||||
u16 s7;
|
||||
u16 s8;
|
||||
u16 s9;
|
||||
u16 sa;
|
||||
u16 sb;
|
||||
u16 sc;
|
||||
u16 sd;
|
||||
u16 se;
|
||||
u16 sf;
|
||||
|
||||
inline __device__ u16x (const u16 a, const u16 b, const u16 c, const u16 d, const u16 e, const u16 f, const u16 g, const u16 h, const u16 i, const u16 j, const u16 k, const u16 l, const u16 m, const u16 n, const u16 o, const u16 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
|
||||
inline __device__ u16x (const u16 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
|
||||
|
||||
inline __device__ u16x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
|
||||
inline __device__ ~u16x (void) { }
|
||||
};
|
||||
|
||||
struct u32x
|
||||
{
|
||||
u32 s0;
|
||||
u32 s1;
|
||||
u32 s2;
|
||||
u32 s3;
|
||||
u32 s4;
|
||||
u32 s5;
|
||||
u32 s6;
|
||||
u32 s7;
|
||||
u32 s8;
|
||||
u32 s9;
|
||||
u32 sa;
|
||||
u32 sb;
|
||||
u32 sc;
|
||||
u32 sd;
|
||||
u32 se;
|
||||
u32 sf;
|
||||
|
||||
inline __device__ u32x (const u32 a, const u32 b, const u32 c, const u32 d, const u32 e, const u32 f, const u32 g, const u32 h, const u32 i, const u32 j, const u32 k, const u32 l, const u32 m, const u32 n, const u32 o, const u32 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
|
||||
inline __device__ u32x (const u32 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
|
||||
|
||||
inline __device__ u32x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0){ }
|
||||
inline __device__ ~u32x (void) { }
|
||||
};
|
||||
|
||||
struct u64x
|
||||
{
|
||||
u64 s0;
|
||||
u64 s1;
|
||||
u64 s2;
|
||||
u64 s3;
|
||||
u64 s4;
|
||||
u64 s5;
|
||||
u64 s6;
|
||||
u64 s7;
|
||||
u64 s8;
|
||||
u64 s9;
|
||||
u64 sa;
|
||||
u64 sb;
|
||||
u64 sc;
|
||||
u64 sd;
|
||||
u64 se;
|
||||
u64 sf;
|
||||
|
||||
inline __device__ u64x (const u64 a, const u64 b, const u64 c, const u64 d, const u64 e, const u64 f, const u64 g, const u64 h, const u64 i, const u64 j, const u64 k, const u64 l, const u64 m, const u64 n, const u64 o, const u64 p) : s0(a), s1(b), s2(c), s3(d), s4(e), s5(f), s6(g), s7(h), s8(i), s9(j), sa(k), sb(l), sc(m), sd(n), se(o), sf(p) { }
|
||||
inline __device__ u64x (const u64 a) : s0(a), s1(a), s2(a), s3(a), s4(a), s5(a), s6(a), s7(a), s8(a), s9(a), sa(a), sb(a), sc(a), sd(a), se(a), sf(a) { }
|
||||
|
||||
inline __device__ u64x (void) : s0(0), s1(0), s2(0), s3(0), s4(0), s5(0), s6(0), s7(0), s8(0), s9(0), sa(0), sb(0), sc(0), sd(0), se(0), sf(0) { }
|
||||
inline __device__ ~u64x (void) { }
|
||||
};
|
||||
|
||||
inline __device__ bool operator != (const u32x a, const u32 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); }
|
||||
inline __device__ bool operator != (const u32x a, const u32x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
|
||||
|
||||
inline __device__ void operator ^= (u32x &a, const u32 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; }
|
||||
inline __device__ void operator ^= (u32x &a, const u32x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
|
||||
|
||||
inline __device__ void operator |= (u32x &a, const u32 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; }
|
||||
inline __device__ void operator |= (u32x &a, const u32x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
|
||||
|
||||
inline __device__ void operator &= (u32x &a, const u32 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; }
|
||||
inline __device__ void operator &= (u32x &a, const u32x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
|
||||
|
||||
inline __device__ void operator += (u32x &a, const u32 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; }
|
||||
inline __device__ void operator += (u32x &a, const u32x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
|
||||
|
||||
inline __device__ void operator -= (u32x &a, const u32 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; }
|
||||
inline __device__ void operator -= (u32x &a, const u32x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
|
||||
|
||||
inline __device__ void operator *= (u32x &a, const u32 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; }
|
||||
inline __device__ void operator *= (u32x &a, const u32x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
|
||||
|
||||
inline __device__ void operator >>= (u32x &a, const u32 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; }
|
||||
inline __device__ void operator >>= (u32x &a, const u32x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
|
||||
|
||||
inline __device__ void operator <<= (u32x &a, const u32 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; }
|
||||
inline __device__ void operator <<= (u32x &a, const u32x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
|
||||
|
||||
inline __device__ u32x operator << (const u32x a, const u32 b) { return u32x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); }
|
||||
inline __device__ u32x operator << (const u32x a, const u32x b) { return u32x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); }
|
||||
|
||||
inline __device__ u32x operator >> (const u32x a, const u32 b) { return u32x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); }
|
||||
inline __device__ u32x operator >> (const u32x a, const u32x b) { return u32x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); }
|
||||
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32 b) { return u32x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); }
|
||||
inline __device__ u32x operator ^ (const u32x a, const u32x b) { return u32x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); }
|
||||
|
||||
inline __device__ u32x operator | (const u32x a, const u32 b) { return u32x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); }
|
||||
inline __device__ u32x operator | (const u32x a, const u32x b) { return u32x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); }
|
||||
|
||||
inline __device__ u32x operator & (const u32x a, const u32 b) { return u32x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); }
|
||||
inline __device__ u32x operator & (const u32x a, const u32x b) { return u32x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); }
|
||||
|
||||
inline __device__ u32x operator + (const u32x a, const u32 b) { return u32x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); }
|
||||
inline __device__ u32x operator + (const u32x a, const u32x b) { return u32x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); }
|
||||
|
||||
inline __device__ u32x operator - (const u32x a, const u32 b) { return u32x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); }
|
||||
inline __device__ u32x operator - (const u32x a, const u32x b) { return u32x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); }
|
||||
|
||||
inline __device__ u32x operator * (const u32x a, const u32 b) { return u32x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); }
|
||||
inline __device__ u32x operator * (const u32x a, const u32x b) { return u32x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); }
|
||||
|
||||
inline __device__ u32x operator % (const u32x a, const u32 b) { return u32x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); }
|
||||
inline __device__ u32x operator % (const u32x a, const u32x b) { return u32x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); }
|
||||
|
||||
inline __device__ u32x operator ~ (const u32x a) { return u32x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
|
||||
|
||||
inline __device__ bool operator != (const u64x a, const u64 b) { return ((a.s0 != b) && (a.s1 != b) && (a.s2 != b) && (a.s3 != b) && (a.s4 != b) && (a.s5 != b) && (a.s6 != b) && (a.s7 != b) && (a.s8 != b) && (a.s9 != b) && (a.sa != b) && (a.sb != b) && (a.sc != b) && (a.sd != b) && (a.se != b) && (a.sf != b) ); }
|
||||
inline __device__ bool operator != (const u64x a, const u64x b) { return ((a.s0 != b.s0) && (a.s1 != b.s1) && (a.s2 != b.s2) && (a.s3 != b.s3) && (a.s4 != b.s4) && (a.s5 != b.s5) && (a.s6 != b.s6) && (a.s7 != b.s7) && (a.s8 != b.s8) && (a.s9 != b.s9) && (a.sa != b.sa) && (a.sb != b.sb) && (a.sc != b.sc) && (a.sd != b.sd) && (a.se != b.se) && (a.sf != b.sf)); }
|
||||
|
||||
inline __device__ void operator ^= (u64x &a, const u64 b) { a.s0 ^= b; a.s1 ^= b; a.s2 ^= b; a.s3 ^= b; a.s4 ^= b; a.s5 ^= b; a.s6 ^= b; a.s7 ^= b; a.s8 ^= b; a.s9 ^= b; a.sa ^= b; a.sb ^= b; a.sc ^= b; a.sd ^= b; a.se ^= b; a.sf ^= b; }
|
||||
inline __device__ void operator ^= (u64x &a, const u64x b) { a.s0 ^= b.s0; a.s1 ^= b.s1; a.s2 ^= b.s2; a.s3 ^= b.s3; a.s4 ^= b.s4; a.s5 ^= b.s5; a.s6 ^= b.s6; a.s7 ^= b.s7; a.s8 ^= b.s8; a.s9 ^= b.s9; a.sa ^= b.sa; a.sb ^= b.sb; a.sc ^= b.sc; a.sd ^= b.sd; a.se ^= b.se; a.sf ^= b.sf; }
|
||||
|
||||
inline __device__ void operator |= (u64x &a, const u64 b) { a.s0 |= b; a.s1 |= b; a.s2 |= b; a.s3 |= b; a.s4 |= b; a.s5 |= b; a.s6 |= b; a.s7 |= b; a.s8 |= b; a.s9 |= b; a.sa |= b; a.sb |= b; a.sc |= b; a.sd |= b; a.se |= b; a.sf |= b; }
|
||||
inline __device__ void operator |= (u64x &a, const u64x b) { a.s0 |= b.s0; a.s1 |= b.s1; a.s2 |= b.s2; a.s3 |= b.s3; a.s4 |= b.s4; a.s5 |= b.s5; a.s6 |= b.s6; a.s7 |= b.s7; a.s8 |= b.s8; a.s9 |= b.s9; a.sa |= b.sa; a.sb |= b.sb; a.sc |= b.sc; a.sd |= b.sd; a.se |= b.se; a.sf |= b.sf; }
|
||||
|
||||
inline __device__ void operator &= (u64x &a, const u64 b) { a.s0 &= b; a.s1 &= b; a.s2 &= b; a.s3 &= b; a.s4 &= b; a.s5 &= b; a.s6 &= b; a.s7 &= b; a.s8 &= b; a.s9 &= b; a.sa &= b; a.sb &= b; a.sc &= b; a.sd &= b; a.se &= b; a.sf &= b; }
|
||||
inline __device__ void operator &= (u64x &a, const u64x b) { a.s0 &= b.s0; a.s1 &= b.s1; a.s2 &= b.s2; a.s3 &= b.s3; a.s4 &= b.s4; a.s5 &= b.s5; a.s6 &= b.s6; a.s7 &= b.s7; a.s8 &= b.s8; a.s9 &= b.s9; a.sa &= b.sa; a.sb &= b.sb; a.sc &= b.sc; a.sd &= b.sd; a.se &= b.se; a.sf &= b.sf; }
|
||||
|
||||
inline __device__ void operator += (u64x &a, const u64 b) { a.s0 += b; a.s1 += b; a.s2 += b; a.s3 += b; a.s4 += b; a.s5 += b; a.s6 += b; a.s7 += b; a.s8 += b; a.s9 += b; a.sa += b; a.sb += b; a.sc += b; a.sd += b; a.se += b; a.sf += b; }
|
||||
inline __device__ void operator += (u64x &a, const u64x b) { a.s0 += b.s0; a.s1 += b.s1; a.s2 += b.s2; a.s3 += b.s3; a.s4 += b.s4; a.s5 += b.s5; a.s6 += b.s6; a.s7 += b.s7; a.s8 += b.s8; a.s9 += b.s9; a.sa += b.sa; a.sb += b.sb; a.sc += b.sc; a.sd += b.sd; a.se += b.se; a.sf += b.sf; }
|
||||
|
||||
inline __device__ void operator -= (u64x &a, const u64 b) { a.s0 -= b; a.s1 -= b; a.s2 -= b; a.s3 -= b; a.s4 -= b; a.s5 -= b; a.s6 -= b; a.s7 -= b; a.s8 -= b; a.s9 -= b; a.sa -= b; a.sb -= b; a.sc -= b; a.sd -= b; a.se -= b; a.sf -= b; }
|
||||
inline __device__ void operator -= (u64x &a, const u64x b) { a.s0 -= b.s0; a.s1 -= b.s1; a.s2 -= b.s2; a.s3 -= b.s3; a.s4 -= b.s4; a.s5 -= b.s5; a.s6 -= b.s6; a.s7 -= b.s7; a.s8 -= b.s8; a.s9 -= b.s9; a.sa -= b.sa; a.sb -= b.sb; a.sc -= b.sc; a.sd -= b.sd; a.se -= b.se; a.sf -= b.sf; }
|
||||
|
||||
inline __device__ void operator *= (u64x &a, const u64 b) { a.s0 *= b; a.s1 *= b; a.s2 *= b; a.s3 *= b; a.s4 *= b; a.s5 *= b; a.s6 *= b; a.s7 *= b; a.s8 *= b; a.s9 *= b; a.sa *= b; a.sb *= b; a.sc *= b; a.sd *= b; a.se *= b; a.sf *= b; }
|
||||
inline __device__ void operator *= (u64x &a, const u64x b) { a.s0 *= b.s0; a.s1 *= b.s1; a.s2 *= b.s2; a.s3 *= b.s3; a.s4 *= b.s4; a.s5 *= b.s5; a.s6 *= b.s6; a.s7 *= b.s7; a.s8 *= b.s8; a.s9 *= b.s9; a.sa *= b.sa; a.sb *= b.sb; a.sc *= b.sc; a.sd *= b.sd; a.se *= b.se; a.sf *= b.sf; }
|
||||
|
||||
inline __device__ void operator >>= (u64x &a, const u64 b) { a.s0 >>= b; a.s1 >>= b; a.s2 >>= b; a.s3 >>= b; a.s4 >>= b; a.s5 >>= b; a.s6 >>= b; a.s7 >>= b; a.s8 >>= b; a.s9 >>= b; a.sa >>= b; a.sb >>= b; a.sc >>= b; a.sd >>= b; a.se >>= b; a.sf >>= b; }
|
||||
inline __device__ void operator >>= (u64x &a, const u64x b) { a.s0 >>= b.s0; a.s1 >>= b.s1; a.s2 >>= b.s2; a.s3 >>= b.s3; a.s4 >>= b.s4; a.s5 >>= b.s5; a.s6 >>= b.s6; a.s7 >>= b.s7; a.s8 >>= b.s8; a.s9 >>= b.s9; a.sa >>= b.sa; a.sb >>= b.sb; a.sc >>= b.sc; a.sd >>= b.sd; a.se >>= b.se; a.sf >>= b.sf; }
|
||||
|
||||
inline __device__ void operator <<= (u64x &a, const u64 b) { a.s0 <<= b; a.s1 <<= b; a.s2 <<= b; a.s3 <<= b; a.s4 <<= b; a.s5 <<= b; a.s6 <<= b; a.s7 <<= b; a.s8 <<= b; a.s9 <<= b; a.sa <<= b; a.sb <<= b; a.sc <<= b; a.sd <<= b; a.se <<= b; a.sf <<= b; }
|
||||
inline __device__ void operator <<= (u64x &a, const u64x b) { a.s0 <<= b.s0; a.s1 <<= b.s1; a.s2 <<= b.s2; a.s3 <<= b.s3; a.s4 <<= b.s4; a.s5 <<= b.s5; a.s6 <<= b.s6; a.s7 <<= b.s7; a.s8 <<= b.s8; a.s9 <<= b.s9; a.sa <<= b.sa; a.sb <<= b.sb; a.sc <<= b.sc; a.sd <<= b.sd; a.se <<= b.se; a.sf <<= b.sf; }
|
||||
|
||||
inline __device__ u64x operator << (const u64x a, const u64 b) { return u64x ((a.s0 << b), (a.s1 << b) , (a.s2 << b), (a.s3 << b) , (a.s4 << b), (a.s5 << b) , (a.s6 << b), (a.s7 << b), (a.s8 << b), (a.s9 << b) , (a.sa << b), (a.sb << b) , (a.sc << b), (a.sd << b) , (a.se << b), (a.sf << b) ); }
|
||||
inline __device__ u64x operator << (const u64x a, const u64x b) { return u64x ((a.s0 << b.s0), (a.s1 << b.s1), (a.s2 << b.s2), (a.s3 << b.s3), (a.s4 << b.s4), (a.s5 << b.s5), (a.s6 << b.s6), (a.s7 << b.s7), (a.s8 << b.s8), (a.s9 << b.s9), (a.sa << b.sa), (a.sb << b.sb), (a.sc << b.sc), (a.sd << b.sd), (a.se << b.se), (a.sf << b.sf)); }
|
||||
|
||||
inline __device__ u64x operator >> (const u64x a, const u64 b) { return u64x ((a.s0 >> b), (a.s1 >> b) , (a.s2 >> b), (a.s3 >> b) , (a.s4 >> b), (a.s5 >> b) , (a.s6 >> b), (a.s7 >> b), (a.s8 >> b), (a.s9 >> b) , (a.sa >> b), (a.sb >> b) , (a.sc >> b), (a.sd >> b) , (a.se >> b), (a.sf >> b) ); }
|
||||
inline __device__ u64x operator >> (const u64x a, const u64x b) { return u64x ((a.s0 >> b.s0), (a.s1 >> b.s1), (a.s2 >> b.s2), (a.s3 >> b.s3), (a.s4 >> b.s4), (a.s5 >> b.s5), (a.s6 >> b.s6), (a.s7 >> b.s7), (a.s8 >> b.s8), (a.s9 >> b.s9), (a.sa >> b.sa), (a.sb >> b.sb), (a.sc >> b.sc), (a.sd >> b.sd), (a.se >> b.se), (a.sf >> b.sf)); }
|
||||
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64 b) { return u64x ((a.s0 ^ b), (a.s1 ^ b) , (a.s2 ^ b), (a.s3 ^ b) , (a.s4 ^ b), (a.s5 ^ b) , (a.s6 ^ b), (a.s7 ^ b), (a.s8 ^ b), (a.s9 ^ b) , (a.sa ^ b), (a.sb ^ b) , (a.sc ^ b), (a.sd ^ b) , (a.se ^ b), (a.sf ^ b) ); }
|
||||
inline __device__ u64x operator ^ (const u64x a, const u64x b) { return u64x ((a.s0 ^ b.s0), (a.s1 ^ b.s1), (a.s2 ^ b.s2), (a.s3 ^ b.s3), (a.s4 ^ b.s4), (a.s5 ^ b.s5), (a.s6 ^ b.s6), (a.s7 ^ b.s7), (a.s8 ^ b.s8), (a.s9 ^ b.s9), (a.sa ^ b.sa), (a.sb ^ b.sb), (a.sc ^ b.sc), (a.sd ^ b.sd), (a.se ^ b.se), (a.sf ^ b.sf)); }
|
||||
|
||||
inline __device__ u64x operator | (const u64x a, const u64 b) { return u64x ((a.s0 | b), (a.s1 | b) , (a.s2 | b), (a.s3 | b) , (a.s4 | b), (a.s5 | b) , (a.s6 | b), (a.s7 | b), (a.s8 | b), (a.s9 | b) , (a.sa | b), (a.sb | b) , (a.sc | b), (a.sd | b) , (a.se | b), (a.sf | b) ); }
|
||||
inline __device__ u64x operator | (const u64x a, const u64x b) { return u64x ((a.s0 | b.s0), (a.s1 | b.s1), (a.s2 | b.s2), (a.s3 | b.s3), (a.s4 | b.s4), (a.s5 | b.s5), (a.s6 | b.s6), (a.s7 | b.s7), (a.s8 | b.s8), (a.s9 | b.s9), (a.sa | b.sa), (a.sb | b.sb), (a.sc | b.sc), (a.sd | b.sd), (a.se | b.se), (a.sf | b.sf)); }
|
||||
|
||||
inline __device__ u64x operator & (const u64x a, const u64 b) { return u64x ((a.s0 & b), (a.s1 & b) , (a.s2 & b), (a.s3 & b) , (a.s4 & b), (a.s5 & b) , (a.s6 & b), (a.s7 & b), (a.s8 & b), (a.s9 & b) , (a.sa & b), (a.sb & b) , (a.sc & b), (a.sd & b) , (a.se & b), (a.sf & b) ); }
|
||||
inline __device__ u64x operator & (const u64x a, const u64x b) { return u64x ((a.s0 & b.s0), (a.s1 & b.s1), (a.s2 & b.s2), (a.s3 & b.s3), (a.s4 & b.s4), (a.s5 & b.s5), (a.s6 & b.s6), (a.s7 & b.s7), (a.s8 & b.s8), (a.s9 & b.s9), (a.sa & b.sa), (a.sb & b.sb), (a.sc & b.sc), (a.sd & b.sd), (a.se & b.se), (a.sf & b.sf)); }
|
||||
|
||||
inline __device__ u64x operator + (const u64x a, const u64 b) { return u64x ((a.s0 + b), (a.s1 + b) , (a.s2 + b), (a.s3 + b) , (a.s4 + b), (a.s5 + b) , (a.s6 + b), (a.s7 + b), (a.s8 + b), (a.s9 + b) , (a.sa + b), (a.sb + b) , (a.sc + b), (a.sd + b) , (a.se + b), (a.sf + b) ); }
|
||||
inline __device__ u64x operator + (const u64x a, const u64x b) { return u64x ((a.s0 + b.s0), (a.s1 + b.s1), (a.s2 + b.s2), (a.s3 + b.s3), (a.s4 + b.s4), (a.s5 + b.s5), (a.s6 + b.s6), (a.s7 + b.s7), (a.s8 + b.s8), (a.s9 + b.s9), (a.sa + b.sa), (a.sb + b.sb), (a.sc + b.sc), (a.sd + b.sd), (a.se + b.se), (a.sf + b.sf)); }
|
||||
|
||||
inline __device__ u64x operator - (const u64x a, const u64 b) { return u64x ((a.s0 - b), (a.s1 - b) , (a.s2 - b), (a.s3 - b) , (a.s4 - b), (a.s5 - b) , (a.s6 - b), (a.s7 - b), (a.s8 - b), (a.s9 - b) , (a.sa - b), (a.sb - b) , (a.sc - b), (a.sd - b) , (a.se - b), (a.sf - b) ); }
|
||||
inline __device__ u64x operator - (const u64x a, const u64x b) { return u64x ((a.s0 - b.s0), (a.s1 - b.s1), (a.s2 - b.s2), (a.s3 - b.s3), (a.s4 - b.s4), (a.s5 - b.s5), (a.s6 - b.s6), (a.s7 - b.s7), (a.s8 - b.s8), (a.s9 - b.s9), (a.sa - b.sa), (a.sb - b.sb), (a.sc - b.sc), (a.sd - b.sd), (a.se - b.se), (a.sf - b.sf)); }
|
||||
|
||||
inline __device__ u64x operator * (const u64x a, const u64 b) { return u64x ((a.s0 * b), (a.s1 * b) , (a.s2 * b), (a.s3 * b) , (a.s4 * b), (a.s5 * b) , (a.s6 * b), (a.s7 * b), (a.s8 * b), (a.s9 * b) , (a.sa * b), (a.sb * b) , (a.sc * b), (a.sd * b) , (a.se * b), (a.sf * b) ); }
|
||||
inline __device__ u64x operator * (const u64x a, const u64x b) { return u64x ((a.s0 * b.s0), (a.s1 * b.s1), (a.s2 * b.s2), (a.s3 * b.s3), (a.s4 * b.s4), (a.s5 * b.s5), (a.s6 * b.s6), (a.s7 * b.s7), (a.s8 * b.s8), (a.s9 * b.s9), (a.sa * b.sa), (a.sb * b.sb), (a.sc * b.sc), (a.sd * b.sd), (a.se * b.se), (a.sf * b.sf)); }
|
||||
|
||||
inline __device__ u64x operator % (const u64x a, const u64 b) { return u64x ((a.s0 % b), (a.s1 % b) , (a.s2 % b), (a.s3 % b) , (a.s4 % b), (a.s5 % b) , (a.s6 % b), (a.s7 % b), (a.s8 % b), (a.s9 % b) , (a.sa % b), (a.sb % b) , (a.sc % b), (a.sd % b) , (a.se % b), (a.sf % b) ); }
|
||||
inline __device__ u64x operator % (const u64x a, const u64x b) { return u64x ((a.s0 % b.s0), (a.s1 % b.s1), (a.s2 % b.s2), (a.s3 % b.s3), (a.s4 % b.s4), (a.s5 % b.s5), (a.s6 % b.s6), (a.s7 % b.s7), (a.s8 % b.s8), (a.s9 % b.s9), (a.sa % b.sa), (a.sb % b.sb), (a.sc % b.sc), (a.sd % b.sd), (a.se % b.se), (a.sf % b.sf)); }
|
||||
|
||||
inline __device__ u64x operator ~ (const u64x a) { return u64x (~a.s0, ~a.s1, ~a.s2, ~a.s3, ~a.s4, ~a.s5, ~a.s6, ~a.s7, ~a.s8, ~a.s9, ~a.sa, ~a.sb, ~a.sc, ~a.sd, ~a.se, ~a.sf); }
|
||||
|
||||
#endif
|
||||
|
||||
typedef struct u8x u8x;
|
||||
typedef struct u16x u16x;
|
||||
typedef struct u32x u32x;
|
||||
typedef struct u64x u64x;
|
||||
|
||||
#define make_u8x u8x
|
||||
#define make_u16x u16x
|
||||
#define make_u32x u32x
|
||||
#define make_u64x u64x
|
||||
*/
|
||||
|
||||
#else
|
||||
typedef VTYPE(uchar, VECT_SIZE) u8x;
|
||||
typedef VTYPE(ushort, VECT_SIZE) u16x;
|
||||
|
@ -10,10 +10,16 @@
|
||||
#define IS_NATIVE
|
||||
#elif defined __CUDACC__
|
||||
#define IS_CUDA
|
||||
#elif defined __HIPCC__
|
||||
#define IS_HIP
|
||||
#else
|
||||
#define IS_OPENCL
|
||||
#endif
|
||||
|
||||
#ifdef IS_HIP
|
||||
#include <hip/hip_runtime.h>
|
||||
#endif
|
||||
|
||||
#if defined IS_NATIVE
|
||||
#define CONSTANT_VK
|
||||
#define CONSTANT_AS
|
||||
@ -28,6 +34,13 @@
|
||||
#define LOCAL_VK __shared__
|
||||
#define LOCAL_AS
|
||||
#define KERNEL_FQ extern "C" __global__
|
||||
#elif defined IS_HIP
|
||||
#define CONSTANT_VK __constant__
|
||||
#define CONSTANT_AS
|
||||
#define GLOBAL_AS
|
||||
#define LOCAL_VK __shared__
|
||||
#define LOCAL_AS
|
||||
#define KERNEL_FQ extern "C" __global__
|
||||
#elif defined IS_OPENCL
|
||||
#define CONSTANT_VK __constant
|
||||
#define CONSTANT_AS __constant
|
||||
@ -80,6 +93,8 @@
|
||||
#elif VENDOR_ID == (1 << 6)
|
||||
#define IS_POCL
|
||||
#define IS_GENERIC
|
||||
#elif VENDOR_ID == (1 << 8)
|
||||
#define IS_AMD_USE_HIP
|
||||
#else
|
||||
#define IS_GENERIC
|
||||
#endif
|
||||
@ -113,6 +128,8 @@
|
||||
|
||||
#if defined IS_AMD && defined IS_GPU
|
||||
#define DECLSPEC inline static
|
||||
#elif defined IS_HIP
|
||||
#define DECLSPEC inline static __device__
|
||||
#else
|
||||
#define DECLSPEC
|
||||
#endif
|
||||
@ -138,6 +155,11 @@
|
||||
#define USE_ROTATE
|
||||
#endif
|
||||
|
||||
#ifdef IS_HIP
|
||||
#define USE_BITSELECT
|
||||
#define USE_ROTATE
|
||||
#endif
|
||||
|
||||
#ifdef IS_ROCM
|
||||
#define USE_BITSELECT
|
||||
#define USE_ROTATE
|
||||
|
@ -32,7 +32,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -139,7 +139,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -246,7 +246,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
|
@ -19,7 +19,7 @@
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
|
||||
/*
|
||||
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
|
||||
|
@ -31,7 +31,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -138,7 +138,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -245,7 +245,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -775,7 +775,7 @@ KERNEL_FQ void m02500_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_eapol_t)
|
||||
s_te4[i] = te4[i];
|
||||
}
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
__syncthreads();
|
||||
#else
|
||||
SYNC_THREADS ();
|
||||
|
@ -19,7 +19,7 @@
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
|
||||
/*
|
||||
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
|
||||
|
@ -2119,7 +2119,7 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
u32 tmp4;
|
||||
u32 tmp5;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
|
@ -28,7 +28,7 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -135,7 +135,7 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 tmp3;
|
||||
u32 tmp4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
@ -242,7 +242,7 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 tmp1;
|
||||
u32 tmp2;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
|
@ -45,7 +45,7 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
@ -165,7 +165,7 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
@ -322,7 +322,7 @@ DECLSPEC u32 memcat16s (u32 *block, const u32 offset, const u32 *append, const u
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
@ -456,7 +456,7 @@ DECLSPEC u32 memcat16sc (u32 *block, const u32 offset, const u32 *append, const
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be (in1, in2, offset);
|
||||
@ -756,7 +756,7 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
@ -915,7 +915,7 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
@ -1074,7 +1074,7 @@ DECLSPEC u32 memcat24 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset);
|
||||
const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset);
|
||||
const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset);
|
||||
|
@ -17,13 +17,15 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
*/
|
||||
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
#define SETSHIFTEDINT(a,n,v) \
|
||||
{ \
|
||||
|
@ -15,13 +15,8 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
#define SETSHIFTEDINT(a,n,v) \
|
||||
{ \
|
||||
|
@ -15,13 +15,8 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
CONSTANT_VK u32a sapb_trans_tbl[256] =
|
||||
{
|
||||
|
@ -17,13 +17,8 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
#define SETSHIFTEDINT(a,n,v) \
|
||||
{ \
|
||||
|
@ -15,13 +15,8 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
#define SETSHIFTEDINT(a,n,v) \
|
||||
{ \
|
||||
|
@ -15,13 +15,8 @@
|
||||
#include "inc_hash_md5.cl"
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#define GETCHAR(a,p) (((a)[(p) / 4] >> (((p) & 3) * 8)) & 0xff)
|
||||
#define PUTCHAR(a,p,c) ((a)[(p) / 4] = (((a)[(p) / 4] & ~(0xff << (((p) & 3) * 8))) | ((c) << (((p) & 3) * 8))))
|
||||
#else
|
||||
#define GETCHAR(a,p) ((u8 *)(a))[(p)]
|
||||
#define PUTCHAR(a,p,c) ((u8 *)(a))[(p)] = (u8) (c)
|
||||
#endif
|
||||
|
||||
CONSTANT_VK u32a sapb_trans_tbl[256] =
|
||||
{
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -143,7 +143,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -141,7 +141,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
|
||||
|
||||
ROUND_STEP_Z (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_STEP_Z (16);
|
||||
ROUND_STEP_Z (32);
|
||||
ROUND_STEP_Z (48);
|
||||
|
@ -24,7 +24,7 @@ typedef struct
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
|
||||
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
|
||||
@ -57,7 +57,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
|
||||
|
||||
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
#define SALSA20_2R() \
|
||||
{ \
|
||||
@ -303,7 +303,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
digest[6] = sha256_hmac_ctx2.opad.h[6];
|
||||
digest[7] = sha256_hmac_ctx2.opad.h[7];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
|
||||
#else
|
||||
@ -331,7 +331,7 @@ KERNEL_FQ void m08900_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
uint4 X[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
@ -441,7 +441,7 @@ KERNEL_FQ void m08900_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
uint4 T[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
|
@ -232,7 +232,7 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
||||
|
||||
u32 i;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| hc_bytealign_be (bl[0], 0, pm4);
|
||||
@ -263,7 +263,7 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
|
||||
const u32 om = m % 4;
|
||||
const u32 od = m / 4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -42,7 +42,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
@ -37,7 +37,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
@ -51,7 +51,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
|
@ -49,7 +49,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
|
@ -48,7 +48,7 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp15;
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
|
@ -19,7 +19,7 @@
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
#ifdef IS_AMD
|
||||
#if (defined IS_AMD || defined IS_HIP)
|
||||
#define KXX_DECL
|
||||
#endif
|
||||
|
||||
@ -896,7 +896,7 @@ DECLSPEC void s8 (const u32 a1, const u32 a2, const u32 a3, const u32 a4, const
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
|
||||
/*
|
||||
* Bitslice DES S-boxes for x86 with MMX/SSE2/AVX and for typical RISC
|
||||
|
@ -31,7 +31,7 @@ typedef struct ethereum_scrypt
|
||||
|
||||
} ethereum_scrypt_t;
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
|
||||
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
|
||||
@ -64,7 +64,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
|
||||
|
||||
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
#define SALSA20_2R() \
|
||||
{ \
|
||||
@ -439,7 +439,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
|
||||
digest[6] = sha256_hmac_ctx2.opad.h[6];
|
||||
digest[7] = sha256_hmac_ctx2.opad.h[7];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
|
||||
#else
|
||||
@ -467,7 +467,7 @@ KERNEL_FQ void m15700_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
|
||||
|
||||
uint4 X[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
@ -577,7 +577,7 @@ KERNEL_FQ void m15700_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, ethereum_scrypt_
|
||||
|
||||
uint4 T[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
|
@ -89,7 +89,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_full (const u32x *w0, const u32x *w1, const u32x
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
@ -182,7 +182,7 @@ DECLSPEC void sha512_transform_opt (const u32x *w0, const u32x *w1, const u32x *
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -797,7 +797,7 @@ KERNEL_FQ void m22000_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pbkdf2_tmp_t, wpa_t))
|
||||
s_te4[i] = te4[i];
|
||||
}
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
__syncthreads();
|
||||
#else
|
||||
SYNC_THREADS ();
|
||||
|
@ -610,7 +610,7 @@ KERNEL_FQ void m22001_aux3 (KERN_ATTR_TMPS_ESALT (wpa_pmk_tmp_t, wpa_t))
|
||||
s_te4[i] = te4[i];
|
||||
}
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
__syncthreads();
|
||||
#else
|
||||
SYNC_THREADS ();
|
||||
|
@ -86,7 +86,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -84,7 +84,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
|
||||
|
||||
ROUND_STEP (0);
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA
|
||||
ROUND_EXPAND (); ROUND_STEP (16);
|
||||
ROUND_EXPAND (); ROUND_STEP (32);
|
||||
ROUND_EXPAND (); ROUND_STEP (48);
|
||||
|
@ -72,7 +72,7 @@ DECLSPEC int is_valid_bitcoinj (const u32 *w)
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
inline __device__ uint4 operator & (const uint4 a, const u32 b) { return make_uint4 ((a.x & b ), (a.y & b ), (a.z & b ), (a.w & b )); }
|
||||
inline __device__ uint4 operator << (const uint4 a, const u32 b) { return make_uint4 ((a.x << b ), (a.y << b ), (a.z << b ), (a.w << b )); }
|
||||
@ -105,7 +105,7 @@ DECLSPEC uint4 hc_swap32_4 (uint4 v)
|
||||
|
||||
#define ADD_ROTATE_XOR(r,i1,i2,s) (r) ^= rotate ((i1) + (i2), (s));
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
|
||||
#define SALSA20_2R() \
|
||||
{ \
|
||||
@ -374,7 +374,7 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
digest[6] = sha256_hmac_ctx2.opad.h[6];
|
||||
digest[7] = sha256_hmac_ctx2.opad.h[7];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
const uint4 tmp0 = make_uint4 (digest[0], digest[1], digest[2], digest[3]);
|
||||
const uint4 tmp1 = make_uint4 (digest[4], digest[5], digest[6], digest[7]);
|
||||
#else
|
||||
@ -402,7 +402,7 @@ KERNEL_FQ void m22700_init (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
uint4 X[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
X[0] = make_uint4 (T[0].x, T[1].y, T[2].z, T[3].w);
|
||||
X[1] = make_uint4 (T[1].x, T[2].y, T[3].z, T[0].w);
|
||||
X[2] = make_uint4 (T[2].x, T[3].y, T[0].z, T[1].w);
|
||||
@ -575,7 +575,7 @@ KERNEL_FQ void m22700_comp (KERN_ATTR_TMPS (scrypt_tmp_t))
|
||||
|
||||
uint4 T[4];
|
||||
|
||||
#ifdef IS_CUDA
|
||||
#if defined IS_CUDA || defined IS_HIP
|
||||
T[0] = make_uint4 (X[0].x, X[3].y, X[2].z, X[1].w);
|
||||
T[1] = make_uint4 (X[1].x, X[0].y, X[3].z, X[2].w);
|
||||
T[2] = make_uint4 (X[2].x, X[1].y, X[0].z, X[3].w);
|
||||
|
@ -145,7 +145,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
@ -56,7 +56,7 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
@ -126,6 +126,11 @@ KERNEL_FQ void gpu_memset (GLOBAL_AS uint4 *buf, const u32 value, const u64 gid_
|
||||
r.y = value;
|
||||
r.z = value;
|
||||
r.w = value;
|
||||
#elif defined IS_HIP
|
||||
r.x = value;
|
||||
r.y = value;
|
||||
r.z = value;
|
||||
r.w = value;
|
||||
#endif
|
||||
|
||||
buf[gid] = r;
|
||||
|
@ -274,6 +274,12 @@ GeForce_RTX_3070 ALIAS_nv_sm50_or_higher
|
||||
GeForce_RTX_3080 ALIAS_nv_sm50_or_higher
|
||||
GeForce_RTX_3090 ALIAS_nv_sm50_or_higher
|
||||
|
||||
##
|
||||
## Unmapped GPU
|
||||
##
|
||||
|
||||
Device_738c ALIAS_AMD_MI100
|
||||
|
||||
#############
|
||||
## ENTRIES ##
|
||||
#############
|
||||
@ -498,14 +504,20 @@ GeForce_RTX_2080_Ti * 9300 1 532
|
||||
GeForce_RTX_2080_Ti * 15700 1 68 A
|
||||
GeForce_RTX_2080_Ti * 22700 1 68 A
|
||||
|
||||
## 4GB
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A
|
||||
|
||||
## 8GB
|
||||
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 8900 1 28 A
|
||||
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 9300 1 442 A
|
||||
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 15700 1 28 A
|
||||
Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 22700 1 28 A
|
||||
|
||||
## 4GB
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A
|
||||
AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A
|
||||
## 32GB
|
||||
ALIAS_AMD_MI100 * 8900 1 76 A
|
||||
ALIAS_AMD_MI100 * 9300 1 63 A
|
||||
ALIAS_AMD_MI100 * 15700 1 76 A
|
||||
ALIAS_AMD_MI100 * 22700 1 76 A
|
||||
|
@ -22,14 +22,20 @@ static const char CL_VENDOR_MESA[] = "Mesa";
|
||||
static const char CL_VENDOR_NV[] = "NVIDIA Corporation";
|
||||
static const char CL_VENDOR_POCL[] = "The pocl project";
|
||||
|
||||
int cuda_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void cuda_close (hashcat_ctx_t *hashcat_ctx);
|
||||
int cuda_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void cuda_close (hashcat_ctx_t *hashcat_ctx);
|
||||
|
||||
int nvrtc_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void nvrtc_close (hashcat_ctx_t *hashcat_ctx);
|
||||
int hip_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void hip_close (hashcat_ctx_t *hashcat_ctx);
|
||||
|
||||
int ocl_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void ocl_close (hashcat_ctx_t *hashcat_ctx);
|
||||
int ocl_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void ocl_close (hashcat_ctx_t *hashcat_ctx);
|
||||
|
||||
int nvrtc_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void nvrtc_close (hashcat_ctx_t *hashcat_ctx);
|
||||
|
||||
int hiprtc_init (hashcat_ctx_t *hashcat_ctx);
|
||||
void hiprtc_close (hashcat_ctx_t *hashcat_ctx);
|
||||
|
||||
int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
|
||||
int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
|
||||
@ -79,6 +85,54 @@ int hc_cuLinkAddData (hashcat_ctx_t *hashcat_ctx, CUlinkState state,
|
||||
int hc_cuLinkDestroy (hashcat_ctx_t *hashcat_ctx, CUlinkState state);
|
||||
int hc_cuLinkComplete (hashcat_ctx_t *hashcat_ctx, CUlinkState state, void **cubinOut, size_t *sizeOut);
|
||||
|
||||
int hc_nvrtcCreateProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog, const char *src, const char *name, int numHeaders, const char * const *headers, const char * const *includeNames);
|
||||
int hc_nvrtcDestroyProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram *prog);
|
||||
int hc_nvrtcCompileProgram (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, int numOptions, const char * const *options);
|
||||
int hc_nvrtcGetProgramLogSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *logSizeRet);
|
||||
int hc_nvrtcGetProgramLog (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *log);
|
||||
int hc_nvrtcGetPTXSize (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, size_t *ptxSizeRet);
|
||||
int hc_nvrtcGetPTX (hashcat_ctx_t *hashcat_ctx, nvrtcProgram prog, char *ptx);
|
||||
int hc_nvrtcVersion (hashcat_ctx_t *hashcat_ctx, int *major, int *minor);
|
||||
|
||||
int hc_hipCtxCreate (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx, unsigned int flags, HIPdevice dev);
|
||||
int hc_hipCtxDestroy (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
|
||||
int hc_hipCtxSetCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
|
||||
int hc_hipCtxSetCacheConfig (hashcat_ctx_t *hashcat_ctx, HIPfunc_cache config);
|
||||
int hc_hipCtxSynchronize (hashcat_ctx_t *hashcat_ctx);
|
||||
int hc_hipDeviceGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPdevice_attribute attrib, HIPdevice dev);
|
||||
int hc_hipDeviceGetCount (hashcat_ctx_t *hashcat_ctx, int *count);
|
||||
int hc_hipDeviceGet (hashcat_ctx_t *hashcat_ctx, HIPdevice *device, int ordinal);
|
||||
int hc_hipDeviceGetName (hashcat_ctx_t *hashcat_ctx, char *name, int len, HIPdevice dev);
|
||||
int hc_hipDeviceTotalMem (hashcat_ctx_t *hashcat_ctx, size_t *bytes, HIPdevice dev);
|
||||
int hc_hipDriverGetVersion (hashcat_ctx_t *hashcat_ctx, int *driverVersion);
|
||||
int hc_hipEventCreate (hashcat_ctx_t *hashcat_ctx, HIPevent *phEvent, unsigned int Flags);
|
||||
int hc_hipEventDestroy (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
|
||||
int hc_hipEventElapsedTime (hashcat_ctx_t *hashcat_ctx, float *pMilliseconds, HIPevent hStart, HIPevent hEnd);
|
||||
int hc_hipEventQuery (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
|
||||
int hc_hipEventRecord (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent, HIPstream hStream);
|
||||
int hc_hipEventSynchronize (hashcat_ctx_t *hashcat_ctx, HIPevent hEvent);
|
||||
int hc_hipFuncGetAttribute (hashcat_ctx_t *hashcat_ctx, int *pi, HIPfunction_attribute attrib, HIPfunction hfunc);
|
||||
//int hc_hipFuncSetAttribute (hashcat_ctx_t *hashcat_ctx, HIPfunction hfunc, HIPfunction_attribute attrib, int value);
|
||||
int hc_hipInit (hashcat_ctx_t *hashcat_ctx, unsigned int Flags);
|
||||
int hc_hipLaunchKernel (hashcat_ctx_t *hashcat_ctx, HIPfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, unsigned int sharedMemBytes, HIPstream hStream, void **kernelParams, void **extra);
|
||||
int hc_hipMemAlloc (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr *dptr, size_t bytesize);
|
||||
int hc_hipMemcpyDtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, HIPdeviceptr srcDevice, size_t ByteCount);
|
||||
int hc_hipMemcpyDtoH (hashcat_ctx_t *hashcat_ctx, void *dstHost, HIPdeviceptr srcDevice, size_t ByteCount);
|
||||
int hc_hipMemcpyHtoD (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
|
||||
int hc_hipMemFree (hashcat_ctx_t *hashcat_ctx, HIPdeviceptr dptr);
|
||||
int hc_hipModuleGetFunction (hashcat_ctx_t *hashcat_ctx, HIPfunction *hfunc, HIPmodule hmod, const char *name);
|
||||
int hc_hipModuleLoadDataEx (hashcat_ctx_t *hashcat_ctx, HIPmodule *module, const void *image, unsigned int numOptions, HIPjit_option *options, void **optionValues);
|
||||
int hc_hipModuleUnload (hashcat_ctx_t *hashcat_ctx, HIPmodule hmod);
|
||||
int hc_hipStreamCreate (hashcat_ctx_t *hashcat_ctx, HIPstream *phStream, unsigned int Flags);
|
||||
int hc_hipStreamDestroy (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
|
||||
int hc_hipStreamSynchronize (hashcat_ctx_t *hashcat_ctx, HIPstream hStream);
|
||||
int hc_hipCtxPushCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext ctx);
|
||||
int hc_hipCtxPopCurrent (hashcat_ctx_t *hashcat_ctx, HIPcontext *pctx);
|
||||
int hc_hipLinkCreate (hashcat_ctx_t *hashcat_ctx, unsigned int numOptions, HIPjit_option *options, void **optionValues, HIPlinkState *stateOut);
|
||||
int hc_hipLinkAddData (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, HIPjitInputType type, void *data, size_t size, const char *name, unsigned int numOptions, HIPjit_option *options, void **optionValues);
|
||||
int hc_hipLinkDestroy (hashcat_ctx_t *hashcat_ctx, HIPlinkState state);
|
||||
int hc_hipLinkComplete (hashcat_ctx_t *hashcat_ctx, HIPlinkState state, void **cubinOut, size_t *sizeOut);
|
||||
|
||||
int hc_clBuildProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
|
||||
int hc_clCompileProgram (hashcat_ctx_t *hashcat_ctx, cl_program program, cl_uint num_devices, const cl_device_id *device_list, const char *options, cl_uint num_input_headers, const cl_program *input_headers, const char **header_include_names, void (CL_CALLBACK *pfn_notify) (cl_program program, void *user_data), void *user_data);
|
||||
int hc_clCreateBuffer (hashcat_ctx_t *hashcat_ctx, cl_context context, cl_mem_flags flags, size_t size, void *host_ptr, cl_mem *mem);
|
||||
@ -121,15 +175,20 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param,
|
||||
|
||||
void rebuild_pws_compressed_append (hc_device_param_t *device_param, const u64 pws_cnt, const u8 chr);
|
||||
|
||||
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
|
||||
int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
|
||||
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
|
||||
int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
|
||||
int run_cuda_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
|
||||
int run_cuda_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 num);
|
||||
int run_cuda_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u32 value, const u64 size);
|
||||
int run_cuda_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, CUdeviceptr buf, const u64 size);
|
||||
|
||||
int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
|
||||
int run_hip_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
|
||||
int run_hip_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 num);
|
||||
int run_hip_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u32 value, const u64 size);
|
||||
int run_hip_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, HIPdeviceptr buf, const u64 size);
|
||||
|
||||
int run_opencl_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
|
||||
int run_opencl_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 num);
|
||||
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
|
||||
int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
|
||||
int run_opencl_kernel_memset (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u32 value, const u64 size);
|
||||
int run_opencl_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, cl_mem buf, const u64 size);
|
||||
|
||||
int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 pws_pos, const u64 num, const u32 event_update, const u32 iteration);
|
||||
int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kern_run, const u64 num);
|
||||
|
1131
include/ext_hip.h
Normal file
1131
include/ext_hip.h
Normal file
File diff suppressed because it is too large
Load Diff
87
include/ext_hiprtc.h
Normal file
87
include/ext_hiprtc.h
Normal file
@ -0,0 +1,87 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#ifndef _EXT_HIPRTC_H
|
||||
#define _EXT_HIPRTC_H
|
||||
|
||||
/**
|
||||
* from hip_runtime.h (/opt/rocm/hip/include/hip/amd_detail/hiprtc.h)
|
||||
*/
|
||||
|
||||
/**
|
||||
* \ingroup error
|
||||
* \brief The enumerated type hiprtcResult defines API call result codes.
|
||||
* HIPRTC API functions return hiprtcResult to indicate the call
|
||||
* result.
|
||||
*/
|
||||
typedef enum {
|
||||
HIPRTC_SUCCESS = 0,
|
||||
HIPRTC_ERROR_OUT_OF_MEMORY = 1,
|
||||
HIPRTC_ERROR_PROGRAM_CREATION_FAILURE = 2,
|
||||
HIPRTC_ERROR_INVALID_INPUT = 3,
|
||||
HIPRTC_ERROR_INVALID_PROGRAM = 4,
|
||||
HIPRTC_ERROR_INVALID_OPTION = 5,
|
||||
HIPRTC_ERROR_COMPILATION = 6,
|
||||
HIPRTC_ERROR_BUILTIN_OPERATION_FAILURE = 7,
|
||||
HIPRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION = 8,
|
||||
HIPRTC_ERROR_NO_LOWERED_NAMES_BEFORE_COMPILATION = 9,
|
||||
HIPRTC_ERROR_NAME_EXPRESSION_NOT_VALID = 10,
|
||||
HIPRTC_ERROR_INTERNAL_ERROR = 11
|
||||
} hiprtcResult;
|
||||
|
||||
/**
|
||||
* \ingroup compilation
|
||||
* \brief hiprtcProgram is the unit of compilation, and an opaque handle for
|
||||
* a program.
|
||||
*
|
||||
* To compile a CUDA program string, an instance of hiprtcProgram must be
|
||||
* created first with ::hiprtcCreateProgram, then compiled with
|
||||
* ::hiprtcCompileProgram.
|
||||
*/
|
||||
typedef struct _hiprtcProgram *hiprtcProgram;
|
||||
|
||||
#ifdef _WIN32
|
||||
#define HIPRTCAPI __stdcall
|
||||
#else
|
||||
#define HIPRTCAPI
|
||||
#endif
|
||||
|
||||
#define HIPRTC_API_CALL HIPRTCAPI
|
||||
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCADDNAMEEXPRESSION) (hiprtcProgram, const char * const);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCOMPILEPROGRAM) (hiprtcProgram, int, const char * const *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCCREATEPROGRAM) (hiprtcProgram *, const char *, const char *, int, const char * const *, const char * const *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCDESTROYPROGRAM) (hiprtcProgram *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETLOWEREDNAME) (hiprtcProgram, const char * const, const char **);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTX) (hiprtcProgram, char *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPTXSIZE) (hiprtcProgram, size_t *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOG) (hiprtcProgram, char *);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCGETPROGRAMLOGSIZE) (hiprtcProgram, size_t *);
|
||||
typedef const char * (HIPRTC_API_CALL *HIPRTC_HIPRTCGETERRORSTRING) (hiprtcResult);
|
||||
typedef hiprtcResult (HIPRTC_API_CALL *HIPRTC_HIPRTCVERSION) (int *, int *);
|
||||
|
||||
typedef struct hc_hiprtc_lib
|
||||
{
|
||||
hc_dynlib_t lib;
|
||||
|
||||
HIPRTC_HIPRTCADDNAMEEXPRESSION hiprtcAddNameExpression;
|
||||
HIPRTC_HIPRTCCOMPILEPROGRAM hiprtcCompileProgram;
|
||||
HIPRTC_HIPRTCCREATEPROGRAM hiprtcCreateProgram;
|
||||
HIPRTC_HIPRTCDESTROYPROGRAM hiprtcDestroyProgram;
|
||||
HIPRTC_HIPRTCGETLOWEREDNAME hiprtcGetLoweredName;
|
||||
HIPRTC_HIPRTCGETPTX hiprtcGetCode;
|
||||
HIPRTC_HIPRTCGETPTXSIZE hiprtcGetCodeSize;
|
||||
HIPRTC_HIPRTCGETPROGRAMLOG hiprtcGetProgramLog;
|
||||
HIPRTC_HIPRTCGETPROGRAMLOGSIZE hiprtcGetProgramLogSize;
|
||||
HIPRTC_HIPRTCGETERRORSTRING hiprtcGetErrorString;
|
||||
HIPRTC_HIPRTCVERSION hiprtcVersion;
|
||||
|
||||
} hc_hiprtc_lib_t;
|
||||
|
||||
typedef hc_hiprtc_lib_t HIPRTC_PTR;
|
||||
|
||||
int hiprtc_make_options_array_from_string (char *string, char **options);
|
||||
|
||||
#endif // _EXT_HIPRTC_H
|
258
include/types.h
258
include/types.h
@ -184,6 +184,7 @@ typedef enum vendor_id
|
||||
VENDOR_ID_NV = (1U << 5),
|
||||
VENDOR_ID_POCL = (1U << 6),
|
||||
VENDOR_ID_AMD_USE_INTEL = (1U << 7),
|
||||
VENDOR_ID_AMD_USE_HIP = (1U << 8),
|
||||
VENDOR_ID_GENERIC = (1U << 31)
|
||||
|
||||
} vendor_id_t;
|
||||
@ -645,6 +646,7 @@ typedef enum user_options_defaults
|
||||
MARKOV_THRESHOLD = 0,
|
||||
NONCE_ERROR_CORRECTIONS = 8,
|
||||
BACKEND_IGNORE_CUDA = false,
|
||||
BACKEND_IGNORE_HIP = false,
|
||||
BACKEND_IGNORE_OPENCL = false,
|
||||
BACKEND_INFO = false,
|
||||
BACKEND_VECTOR_WIDTH = 0,
|
||||
@ -695,113 +697,114 @@ typedef enum user_options_map
|
||||
IDX_ATTACK_MODE = 'a',
|
||||
IDX_BACKEND_DEVICES = 'd',
|
||||
IDX_BACKEND_IGNORE_CUDA = 0xff01,
|
||||
IDX_BACKEND_IGNORE_OPENCL = 0xff02,
|
||||
IDX_BACKEND_IGNORE_HIP = 0xff02,
|
||||
IDX_BACKEND_IGNORE_OPENCL = 0xff03,
|
||||
IDX_BACKEND_INFO = 'I',
|
||||
IDX_BACKEND_VECTOR_WIDTH = 0xff03,
|
||||
IDX_BENCHMARK_ALL = 0xff04,
|
||||
IDX_BACKEND_VECTOR_WIDTH = 0xff04,
|
||||
IDX_BENCHMARK_ALL = 0xff05,
|
||||
IDX_BENCHMARK = 'b',
|
||||
IDX_BITMAP_MAX = 0xff05,
|
||||
IDX_BITMAP_MIN = 0xff06,
|
||||
IDX_BITMAP_MAX = 0xff06,
|
||||
IDX_BITMAP_MIN = 0xff07,
|
||||
#ifdef WITH_BRAIN
|
||||
IDX_BRAIN_CLIENT = 'z',
|
||||
IDX_BRAIN_CLIENT_FEATURES = 0xff07,
|
||||
IDX_BRAIN_HOST = 0xff08,
|
||||
IDX_BRAIN_PASSWORD = 0xff09,
|
||||
IDX_BRAIN_PORT = 0xff0a,
|
||||
IDX_BRAIN_SERVER = 0xff0b,
|
||||
IDX_BRAIN_SERVER_TIMER = 0xff0c,
|
||||
IDX_BRAIN_SESSION = 0xff0d,
|
||||
IDX_BRAIN_SESSION_WHITELIST = 0xff0e,
|
||||
IDX_BRAIN_CLIENT_FEATURES = 0xff08,
|
||||
IDX_BRAIN_HOST = 0xff09,
|
||||
IDX_BRAIN_PASSWORD = 0xff0a,
|
||||
IDX_BRAIN_PORT = 0xff0b,
|
||||
IDX_BRAIN_SERVER = 0xff0c,
|
||||
IDX_BRAIN_SERVER_TIMER = 0xff0d,
|
||||
IDX_BRAIN_SESSION = 0xff0e,
|
||||
IDX_BRAIN_SESSION_WHITELIST = 0xff0f,
|
||||
#endif
|
||||
IDX_CPU_AFFINITY = 0xff0f,
|
||||
IDX_CPU_AFFINITY = 0xff10,
|
||||
IDX_CUSTOM_CHARSET_1 = '1',
|
||||
IDX_CUSTOM_CHARSET_2 = '2',
|
||||
IDX_CUSTOM_CHARSET_3 = '3',
|
||||
IDX_CUSTOM_CHARSET_4 = '4',
|
||||
IDX_DEBUG_FILE = 0xff10,
|
||||
IDX_DEBUG_MODE = 0xff11,
|
||||
IDX_ENCODING_FROM = 0xff12,
|
||||
IDX_ENCODING_TO = 0xff13,
|
||||
IDX_HASH_INFO = 0xff14,
|
||||
IDX_FORCE = 0xff15,
|
||||
IDX_HWMON_DISABLE = 0xff16,
|
||||
IDX_HWMON_TEMP_ABORT = 0xff17,
|
||||
IDX_DEBUG_FILE = 0xff11,
|
||||
IDX_DEBUG_MODE = 0xff12,
|
||||
IDX_ENCODING_FROM = 0xff13,
|
||||
IDX_ENCODING_TO = 0xff14,
|
||||
IDX_HASH_INFO = 0xff15,
|
||||
IDX_FORCE = 0xff16,
|
||||
IDX_HWMON_DISABLE = 0xff17,
|
||||
IDX_HWMON_TEMP_ABORT = 0xff18,
|
||||
IDX_HASH_MODE = 'm',
|
||||
IDX_HCCAPX_MESSAGE_PAIR = 0xff18,
|
||||
IDX_HCCAPX_MESSAGE_PAIR = 0xff19,
|
||||
IDX_HELP = 'h',
|
||||
IDX_HEX_CHARSET = 0xff19,
|
||||
IDX_HEX_SALT = 0xff1a,
|
||||
IDX_HEX_WORDLIST = 0xff1b,
|
||||
IDX_HOOK_THREADS = 0xff1c,
|
||||
IDX_IDENTIFY = 0xff1d,
|
||||
IDX_HEX_CHARSET = 0xff1a,
|
||||
IDX_HEX_SALT = 0xff1b,
|
||||
IDX_HEX_WORDLIST = 0xff1c,
|
||||
IDX_HOOK_THREADS = 0xff1d,
|
||||
IDX_IDENTIFY = 0xff1e,
|
||||
IDX_INCREMENT = 'i',
|
||||
IDX_INCREMENT_MAX = 0xff1e,
|
||||
IDX_INCREMENT_MIN = 0xff1f,
|
||||
IDX_INDUCTION_DIR = 0xff20,
|
||||
IDX_KEEP_GUESSING = 0xff21,
|
||||
IDX_INCREMENT_MAX = 0xff1f,
|
||||
IDX_INCREMENT_MIN = 0xff20,
|
||||
IDX_INDUCTION_DIR = 0xff21,
|
||||
IDX_KEEP_GUESSING = 0xff22,
|
||||
IDX_KERNEL_ACCEL = 'n',
|
||||
IDX_KERNEL_LOOPS = 'u',
|
||||
IDX_KERNEL_THREADS = 'T',
|
||||
IDX_KEYBOARD_LAYOUT_MAPPING = 0xff22,
|
||||
IDX_KEYSPACE = 0xff23,
|
||||
IDX_LEFT = 0xff24,
|
||||
IDX_KEYBOARD_LAYOUT_MAPPING = 0xff23,
|
||||
IDX_KEYSPACE = 0xff24,
|
||||
IDX_LEFT = 0xff25,
|
||||
IDX_LIMIT = 'l',
|
||||
IDX_LOGFILE_DISABLE = 0xff25,
|
||||
IDX_LOOPBACK = 0xff26,
|
||||
IDX_MACHINE_READABLE = 0xff27,
|
||||
IDX_MARKOV_CLASSIC = 0xff28,
|
||||
IDX_MARKOV_DISABLE = 0xff29,
|
||||
IDX_MARKOV_HCSTAT2 = 0xff2a,
|
||||
IDX_MARKOV_INVERSE = 0xff2b,
|
||||
IDX_LOGFILE_DISABLE = 0xff26,
|
||||
IDX_LOOPBACK = 0xff27,
|
||||
IDX_MACHINE_READABLE = 0xff28,
|
||||
IDX_MARKOV_CLASSIC = 0xff29,
|
||||
IDX_MARKOV_DISABLE = 0xff2a,
|
||||
IDX_MARKOV_HCSTAT2 = 0xff2b,
|
||||
IDX_MARKOV_INVERSE = 0xff2c,
|
||||
IDX_MARKOV_THRESHOLD = 't',
|
||||
IDX_NONCE_ERROR_CORRECTIONS = 0xff2c,
|
||||
IDX_NONCE_ERROR_CORRECTIONS = 0xff2d,
|
||||
IDX_OPENCL_DEVICE_TYPES = 'D',
|
||||
IDX_OPTIMIZED_KERNEL_ENABLE = 'O',
|
||||
IDX_OUTFILE_AUTOHEX_DISABLE = 0xff2d,
|
||||
IDX_OUTFILE_CHECK_DIR = 0xff2e,
|
||||
IDX_OUTFILE_CHECK_TIMER = 0xff2f,
|
||||
IDX_OUTFILE_FORMAT = 0xff30,
|
||||
IDX_OUTFILE_AUTOHEX_DISABLE = 0xff2e,
|
||||
IDX_OUTFILE_CHECK_DIR = 0xff2f,
|
||||
IDX_OUTFILE_CHECK_TIMER = 0xff30,
|
||||
IDX_OUTFILE_FORMAT = 0xff31,
|
||||
IDX_OUTFILE = 'o',
|
||||
IDX_POTFILE_DISABLE = 0xff31,
|
||||
IDX_POTFILE_PATH = 0xff32,
|
||||
IDX_PROGRESS_ONLY = 0xff33,
|
||||
IDX_QUIET = 0xff34,
|
||||
IDX_REMOVE = 0xff35,
|
||||
IDX_REMOVE_TIMER = 0xff36,
|
||||
IDX_RESTORE = 0xff37,
|
||||
IDX_RESTORE_DISABLE = 0xff38,
|
||||
IDX_RESTORE_FILE_PATH = 0xff39,
|
||||
IDX_POTFILE_DISABLE = 0xff32,
|
||||
IDX_POTFILE_PATH = 0xff33,
|
||||
IDX_PROGRESS_ONLY = 0xff34,
|
||||
IDX_QUIET = 0xff35,
|
||||
IDX_REMOVE = 0xff36,
|
||||
IDX_REMOVE_TIMER = 0xff37,
|
||||
IDX_RESTORE = 0xff38,
|
||||
IDX_RESTORE_DISABLE = 0xff39,
|
||||
IDX_RESTORE_FILE_PATH = 0xff3a,
|
||||
IDX_RP_FILE = 'r',
|
||||
IDX_RP_GEN_FUNC_MAX = 0xff3a,
|
||||
IDX_RP_GEN_FUNC_MIN = 0xff3b,
|
||||
IDX_RP_GEN_FUNC_MAX = 0xff3b,
|
||||
IDX_RP_GEN_FUNC_MIN = 0xff3c,
|
||||
IDX_RP_GEN = 'g',
|
||||
IDX_RP_GEN_SEED = 0xff3c,
|
||||
IDX_RP_GEN_SEED = 0xff3d,
|
||||
IDX_RULE_BUF_L = 'j',
|
||||
IDX_RULE_BUF_R = 'k',
|
||||
IDX_RUNTIME = 0xff3d,
|
||||
IDX_SCRYPT_TMTO = 0xff3e,
|
||||
IDX_RUNTIME = 0xff3e,
|
||||
IDX_SCRYPT_TMTO = 0xff3f,
|
||||
IDX_SEGMENT_SIZE = 'c',
|
||||
IDX_SELF_TEST_DISABLE = 0xff3f,
|
||||
IDX_SELF_TEST_DISABLE = 0xff40,
|
||||
IDX_SEPARATOR = 'p',
|
||||
IDX_SESSION = 0xff40,
|
||||
IDX_SHOW = 0xff41,
|
||||
IDX_SESSION = 0xff41,
|
||||
IDX_SHOW = 0xff42,
|
||||
IDX_SKIP = 's',
|
||||
IDX_SLOW_CANDIDATES = 'S',
|
||||
IDX_SPEED_ONLY = 0xff42,
|
||||
IDX_SPIN_DAMP = 0xff43,
|
||||
IDX_STATUS = 0xff44,
|
||||
IDX_STATUS_JSON = 0xff45,
|
||||
IDX_STATUS_TIMER = 0xff46,
|
||||
IDX_STDOUT_FLAG = 0xff47,
|
||||
IDX_STDIN_TIMEOUT_ABORT = 0xff48,
|
||||
IDX_TRUECRYPT_KEYFILES = 0xff49,
|
||||
IDX_USERNAME = 0xff4a,
|
||||
IDX_VERACRYPT_KEYFILES = 0xff4b,
|
||||
IDX_VERACRYPT_PIM_START = 0xff4c,
|
||||
IDX_VERACRYPT_PIM_STOP = 0xff4d,
|
||||
IDX_SPEED_ONLY = 0xff43,
|
||||
IDX_SPIN_DAMP = 0xff44,
|
||||
IDX_STATUS = 0xff45,
|
||||
IDX_STATUS_JSON = 0xff46,
|
||||
IDX_STATUS_TIMER = 0xff47,
|
||||
IDX_STDOUT_FLAG = 0xff48,
|
||||
IDX_STDIN_TIMEOUT_ABORT = 0xff49,
|
||||
IDX_TRUECRYPT_KEYFILES = 0xff4a,
|
||||
IDX_USERNAME = 0xff4b,
|
||||
IDX_VERACRYPT_KEYFILES = 0xff4c,
|
||||
IDX_VERACRYPT_PIM_START = 0xff4d,
|
||||
IDX_VERACRYPT_PIM_STOP = 0xff4e,
|
||||
IDX_VERSION_LOWER = 'v',
|
||||
IDX_VERSION = 'V',
|
||||
IDX_WORDLIST_AUTOHEX_DISABLE = 0xff4e,
|
||||
IDX_WORDLIST_AUTOHEX_DISABLE = 0xff4f,
|
||||
IDX_WORKLOAD_PROFILE = 'w',
|
||||
|
||||
} user_options_map_t;
|
||||
@ -1077,7 +1080,10 @@ typedef struct hc_fp
|
||||
} HCFILE;
|
||||
|
||||
#include "ext_nvrtc.h"
|
||||
#include "ext_hiprtc.h"
|
||||
|
||||
#include "ext_cuda.h"
|
||||
#include "ext_hip.h"
|
||||
#include "ext_OpenCL.h"
|
||||
|
||||
typedef struct hc_device_param
|
||||
@ -1478,6 +1484,86 @@ typedef struct hc_device_param
|
||||
CUdeviceptr cuda_d_st_salts_buf;
|
||||
CUdeviceptr cuda_d_st_esalts_buf;
|
||||
|
||||
// API: hip
|
||||
|
||||
bool is_hip;
|
||||
|
||||
int hip_warp_size;
|
||||
|
||||
HIPdevice hip_device;
|
||||
HIPcontext hip_context;
|
||||
HIPstream hip_stream;
|
||||
|
||||
HIPevent hip_event1;
|
||||
HIPevent hip_event2;
|
||||
|
||||
HIPmodule hip_module;
|
||||
HIPmodule hip_module_shared;
|
||||
HIPmodule hip_module_mp;
|
||||
HIPmodule hip_module_amp;
|
||||
|
||||
HIPfunction hip_function1;
|
||||
HIPfunction hip_function12;
|
||||
HIPfunction hip_function2p;
|
||||
HIPfunction hip_function2;
|
||||
HIPfunction hip_function2e;
|
||||
HIPfunction hip_function23;
|
||||
HIPfunction hip_function3;
|
||||
HIPfunction hip_function4;
|
||||
HIPfunction hip_function_init2;
|
||||
HIPfunction hip_function_loop2p;
|
||||
HIPfunction hip_function_loop2;
|
||||
HIPfunction hip_function_mp;
|
||||
HIPfunction hip_function_mp_l;
|
||||
HIPfunction hip_function_mp_r;
|
||||
HIPfunction hip_function_amp;
|
||||
HIPfunction hip_function_tm;
|
||||
HIPfunction hip_function_memset;
|
||||
HIPfunction hip_function_atinit;
|
||||
HIPfunction hip_function_utf8toutf16le;
|
||||
HIPfunction hip_function_decompress;
|
||||
HIPfunction hip_function_aux1;
|
||||
HIPfunction hip_function_aux2;
|
||||
HIPfunction hip_function_aux3;
|
||||
HIPfunction hip_function_aux4;
|
||||
|
||||
HIPdeviceptr hip_d_pws_buf;
|
||||
HIPdeviceptr hip_d_pws_amp_buf;
|
||||
HIPdeviceptr hip_d_pws_comp_buf;
|
||||
HIPdeviceptr hip_d_pws_idx;
|
||||
HIPdeviceptr hip_d_rules;
|
||||
HIPdeviceptr hip_d_rules_c;
|
||||
HIPdeviceptr hip_d_combs;
|
||||
HIPdeviceptr hip_d_combs_c;
|
||||
HIPdeviceptr hip_d_bfs;
|
||||
HIPdeviceptr hip_d_bfs_c;
|
||||
HIPdeviceptr hip_d_tm_c;
|
||||
HIPdeviceptr hip_d_bitmap_s1_a;
|
||||
HIPdeviceptr hip_d_bitmap_s1_b;
|
||||
HIPdeviceptr hip_d_bitmap_s1_c;
|
||||
HIPdeviceptr hip_d_bitmap_s1_d;
|
||||
HIPdeviceptr hip_d_bitmap_s2_a;
|
||||
HIPdeviceptr hip_d_bitmap_s2_b;
|
||||
HIPdeviceptr hip_d_bitmap_s2_c;
|
||||
HIPdeviceptr hip_d_bitmap_s2_d;
|
||||
HIPdeviceptr hip_d_plain_bufs;
|
||||
HIPdeviceptr hip_d_digests_buf;
|
||||
HIPdeviceptr hip_d_digests_shown;
|
||||
HIPdeviceptr hip_d_salt_bufs;
|
||||
HIPdeviceptr hip_d_esalt_bufs;
|
||||
HIPdeviceptr hip_d_tmps;
|
||||
HIPdeviceptr hip_d_hooks;
|
||||
HIPdeviceptr hip_d_result;
|
||||
HIPdeviceptr hip_d_extra0_buf;
|
||||
HIPdeviceptr hip_d_extra1_buf;
|
||||
HIPdeviceptr hip_d_extra2_buf;
|
||||
HIPdeviceptr hip_d_extra3_buf;
|
||||
HIPdeviceptr hip_d_root_css_buf;
|
||||
HIPdeviceptr hip_d_markov_css_buf;
|
||||
HIPdeviceptr hip_d_st_digests_buf;
|
||||
HIPdeviceptr hip_d_st_salts_buf;
|
||||
HIPdeviceptr hip_d_st_esalts_buf;
|
||||
|
||||
// API: opencl
|
||||
|
||||
bool is_opencl;
|
||||
@ -1569,18 +1655,25 @@ typedef struct backend_ctx
|
||||
{
|
||||
bool enabled;
|
||||
|
||||
void *ocl;
|
||||
void *cuda;
|
||||
void *hip;
|
||||
void *ocl;
|
||||
|
||||
void *nvrtc;
|
||||
void *hiprtc;
|
||||
|
||||
int backend_device_from_cuda[DEVICES_MAX]; // from cuda device index to backend device index
|
||||
int backend_device_from_hip[DEVICES_MAX]; // from hip device index to backend device index
|
||||
int backend_device_from_opencl[DEVICES_MAX]; // from opencl device index to backend device index
|
||||
int backend_device_from_opencl_platform[CL_PLATFORMS_MAX][DEVICES_MAX]; // from opencl device index to backend device index (by platform)
|
||||
|
||||
int backend_devices_cnt;
|
||||
int backend_devices_active;
|
||||
|
||||
int cuda_devices_cnt;
|
||||
int cuda_devices_active;
|
||||
int hip_devices_cnt;
|
||||
int hip_devices_active;
|
||||
int opencl_devices_cnt;
|
||||
int opencl_devices_active;
|
||||
|
||||
@ -1614,6 +1707,14 @@ typedef struct backend_ctx
|
||||
int nvrtc_driver_version;
|
||||
int cuda_driver_version;
|
||||
|
||||
// hip
|
||||
|
||||
int rc_hip_init;
|
||||
int rc_hiprtc_init;
|
||||
|
||||
int hiprtc_driver_version;
|
||||
int hip_driver_version;
|
||||
|
||||
// opencl
|
||||
|
||||
cl_platform_id *opencl_platforms;
|
||||
@ -2014,6 +2115,7 @@ typedef struct user_options
|
||||
bool markov_disable;
|
||||
bool markov_inverse;
|
||||
bool backend_ignore_cuda;
|
||||
bool backend_ignore_hip;
|
||||
bool backend_ignore_opencl;
|
||||
bool backend_info;
|
||||
bool optimized_kernel_enable;
|
||||
|
@ -360,7 +360,7 @@ EMU_OBJS_ALL += emu_inc_rp emu_inc_rp_optimized
|
||||
EMU_OBJS_ALL += emu_inc_hash_md4 emu_inc_hash_md5 emu_inc_hash_ripemd160 emu_inc_hash_sha1 emu_inc_hash_sha256 emu_inc_hash_sha384 emu_inc_hash_sha512 emu_inc_hash_streebog256 emu_inc_hash_streebog512 emu_inc_ecc_secp256k1
|
||||
EMU_OBJS_ALL += emu_inc_cipher_aes emu_inc_cipher_camellia emu_inc_cipher_des emu_inc_cipher_kuznyechik emu_inc_cipher_serpent emu_inc_cipher_twofish
|
||||
|
||||
OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_nvapi ext_nvml ext_nvrtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
|
||||
OBJS_ALL := affinity autotune backend benchmark bitmap bitops combinator common convert cpt cpu_crc32 debugfile dictstat dispatch dynloader event ext_ADL ext_cuda ext_hip ext_nvapi ext_nvml ext_nvrtc ext_hiprtc ext_OpenCL ext_sysfs_amdgpu ext_sysfs_cpu ext_iokit ext_lzma filehandling folder hashcat hashes hlfmt hwmon induct interface keyboard_layout locking logfile loopback memory monitor mpsp outfile_check outfile pidfile potfile restore rp rp_cpu selftest slow_candidates shared status stdout straight terminal thread timer tuningdb usage user_options wordlist $(EMU_OBJS_ALL)
|
||||
|
||||
ifeq ($(ENABLE_BRAIN),1)
|
||||
OBJS_ALL += brain
|
||||
|
@ -157,8 +157,9 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
|
||||
const u32 kernel_power_max = device_param->hardware_power * kernel_accel_max;
|
||||
|
||||
int CL_rc;
|
||||
int CU_rc;
|
||||
int HIP_rc;
|
||||
int CL_rc;
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
@ -167,6 +168,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = run_hip_kernel_atinit (hashcat_ctx, device_param, device_param->hip_d_pws_buf, kernel_power_max);
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = run_opencl_kernel_atinit (hashcat_ctx, device_param, device_param->opencl_d_pws_buf, kernel_power_max);
|
||||
@ -190,6 +198,13 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = hc_hipMemcpyDtoD (hashcat_ctx, device_param->hip_d_rules_c, device_param->hip_d_rules, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t));
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = hc_clEnqueueCopyBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_rules, device_param->opencl_d_rules_c, 0, 0, MIN (kernel_loops_max, KERNEL_RULES) * sizeof (kernel_rule_t), 0, NULL, NULL);
|
||||
@ -383,6 +398,27 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
int HIP_rc;
|
||||
|
||||
HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_pws_buf, 0, device_param->size_pws);
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
|
||||
HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_plain_bufs, 0, device_param->size_plains);
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
|
||||
HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_digests_shown, 0, device_param->size_shown);
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
|
||||
HIP_rc = run_hip_kernel_memset (hashcat_ctx, device_param, device_param->hip_d_result, 0, device_param->size_results);
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
int CL_rc;
|
||||
@ -451,9 +487,12 @@ HC_API_CALL void *thread_autotune (void *p)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
const int rc_cuCtxSetCurrent = hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context);
|
||||
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (rc_cuCtxSetCurrent == -1) return NULL;
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
const int rc_autotune = autotune (hashcat_ctx, device_param);
|
||||
@ -463,5 +502,15 @@ HC_API_CALL void *thread_autotune (void *p)
|
||||
// we should do something here, tell hashcat main that autotune failed to abort
|
||||
}
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
3959
src/backend.c
3959
src/backend.c
File diff suppressed because it is too large
Load Diff
@ -347,7 +347,12 @@ HC_API_CALL void *thread_calc_stdin (void *p)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
|
||||
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (calc_stdin (hashcat_ctx, device_param) == -1)
|
||||
@ -357,6 +362,16 @@ HC_API_CALL void *thread_calc_stdin (void *p)
|
||||
status_ctx->devices_status = STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -1581,7 +1596,12 @@ HC_API_CALL void *thread_calc (void *p)
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuCtxSetCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
|
||||
if (hc_cuCtxPushCurrent (hashcat_ctx, device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPushCurrent (hashcat_ctx, device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (calc (hashcat_ctx, device_param) == -1)
|
||||
@ -1591,5 +1611,15 @@ HC_API_CALL void *thread_calc (void *p)
|
||||
status_ctx->devices_status = STATUS_ERROR;
|
||||
}
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
{
|
||||
if (hc_cuCtxPopCurrent (hashcat_ctx, &device_param->cuda_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
if (hc_hipCtxPopCurrent (hashcat_ctx, &device_param->hip_context) == -1) return NULL;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
8
src/ext_hip.c
Normal file
8
src/ext_hip.c
Normal file
@ -0,0 +1,8 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "types.h"
|
||||
#include "ext_hip.h"
|
27
src/ext_hiprtc.c
Normal file
27
src/ext_hiprtc.c
Normal file
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "types.h"
|
||||
#include "ext_hiprtc.h"
|
||||
|
||||
int hiprtc_make_options_array_from_string (char *string, char **options)
|
||||
{
|
||||
char *saveptr = NULL;
|
||||
|
||||
char *next = strtok_r (string, " ", &saveptr);
|
||||
|
||||
int cnt = 0;
|
||||
|
||||
do
|
||||
{
|
||||
options[cnt] = next;
|
||||
|
||||
cnt++;
|
||||
|
||||
} while ((next = strtok_r ((char *) NULL, " ", &saveptr)) != NULL);
|
||||
|
||||
return cnt;
|
||||
}
|
34
src/hashes.c
34
src/hashes.c
@ -322,6 +322,11 @@ void check_hash (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, pl
|
||||
hc_cuMemcpyDtoH (hashcat_ctx, tmps, device_param->cuda_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
hc_hipMemcpyDtoH (hashcat_ctx, tmps, device_param->hip_d_tmps + (plain->gidvid * hashconfig->tmp_size), hashconfig->tmp_size);
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_tmps, CL_TRUE, plain->gidvid * hashconfig->tmp_size, hashconfig->tmp_size, tmps, 0, NULL, NULL);
|
||||
@ -481,6 +486,7 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
|
||||
u32 num_cracked = 0;
|
||||
|
||||
int CU_rc;
|
||||
int HIP_rc;
|
||||
int CL_rc;
|
||||
|
||||
if (device_param->is_cuda == true)
|
||||
@ -490,6 +496,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, &num_cracked, device_param->hip_d_result, sizeof (u32));
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
|
||||
@ -516,6 +529,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = hc_hipMemcpyDtoH (hashcat_ctx, cracked, device_param->hip_d_plain_bufs, num_cracked * sizeof (plain_t));
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = hc_clEnqueueReadBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_plain_bufs, CL_TRUE, 0, num_cracked * sizeof (plain_t), cracked, 0, NULL, NULL);
|
||||
@ -573,6 +593,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_digests_shown + (salt_buf->digests_offset * sizeof (u32)), &hashes->digests_shown_tmp[salt_buf->digests_offset], salt_buf->digests_cnt * sizeof (u32));
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_digests_shown, CL_TRUE, salt_buf->digests_offset * sizeof (u32), salt_buf->digests_cnt * sizeof (u32), &hashes->digests_shown_tmp[salt_buf->digests_offset], 0, NULL, NULL);
|
||||
@ -611,6 +638,13 @@ int check_cracked (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
|
||||
if (CU_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_hip == true)
|
||||
{
|
||||
HIP_rc = hc_hipMemcpyHtoD (hashcat_ctx, device_param->hip_d_result, &num_cracked, sizeof (u32));
|
||||
|
||||
if (HIP_rc == -1) return -1;
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
{
|
||||
CL_rc = hc_clEnqueueWriteBuffer (hashcat_ctx, device_param->opencl_command_queue, device_param->opencl_d_result, CL_TRUE, 0, sizeof (u32), &num_cracked, 0, NULL, NULL);
|
||||
|
52
src/hwmon.c
52
src/hwmon.c
@ -95,11 +95,11 @@ int hm_get_threshold_slowdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -176,11 +176,11 @@ int hm_get_threshold_shutdown_with_devices_idx (hashcat_ctx_t *hashcat_ctx, cons
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -245,7 +245,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
@ -313,7 +313,7 @@ int hm_get_temperature_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
|
||||
}
|
||||
#endif
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -401,11 +401,11 @@ int hm_get_fanpolicy_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -499,11 +499,11 @@ int hm_get_fanspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -609,11 +609,11 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -696,11 +696,11 @@ int hm_get_utilization_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -800,11 +800,11 @@ int hm_get_memoryspeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int b
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -887,11 +887,11 @@ int hm_get_corespeed_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int bac
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
if (hwmon_ctx->hm_adl)
|
||||
{
|
||||
@ -1003,11 +1003,11 @@ int hm_get_throttle_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back
|
||||
}
|
||||
}
|
||||
|
||||
if (backend_ctx->devices_param[backend_device_idx].is_opencl == true)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].is_opencl == true) || (backend_ctx->devices_param[backend_device_idx].is_hip == true))
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD) || (backend_ctx->devices_param[backend_device_idx].opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
}
|
||||
|
||||
@ -1382,11 +1382,11 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
|
||||
{
|
||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) == 0) continue;
|
||||
|
||||
if (device_param->opencl_device_vendor_id != VENDOR_ID_AMD) continue;
|
||||
if ((device_param->opencl_device_vendor_id != VENDOR_ID_AMD) && (device_param->opencl_device_vendor_id != VENDOR_ID_AMD_USE_HIP)) continue;
|
||||
|
||||
for (int i = 0; i < tmp_in; i++)
|
||||
{
|
||||
@ -1438,7 +1438,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
|
||||
{
|
||||
const u32 device_id = device_param->device_id;
|
||||
|
||||
@ -1485,7 +1485,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
|
||||
{
|
||||
const u32 device_id = device_param->device_id;
|
||||
|
||||
@ -1594,7 +1594,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
|
||||
}
|
||||
}
|
||||
|
||||
if (device_param->is_opencl == true)
|
||||
if ((device_param->is_opencl == true) || (device_param->is_hip == true))
|
||||
{
|
||||
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
@ -1655,7 +1655,7 @@ int hwmon_ctx_init (hashcat_ctx_t *hashcat_ctx)
|
||||
}
|
||||
#endif
|
||||
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) || (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP))
|
||||
{
|
||||
hwmon_ctx->hm_device[backend_devices_idx].adl = hm_adapters_adl[device_id].adl;
|
||||
hwmon_ctx->hm_device[backend_devices_idx].sysfs_amdgpu = hm_adapters_sysfs_amdgpu[device_id].sysfs_amdgpu;
|
||||
|
@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -179,6 +179,14 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
|
||||
}
|
||||
}
|
||||
// ROCM
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-DDESCRYPT_SALT=%u -D _unroll", hashes->salts_buf[0].salt_buf[0] & 0xfff);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((user_options->attack_mode == ATTACK_MODE_BF) && (hashes->salts_cnt == 1) && (user_options->slow_candidates == false))
|
||||
|
@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -58,6 +58,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -59,6 +59,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -438,6 +438,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-fno-unroll-loops");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -81,6 +81,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
|
||||
return false;
|
||||
}
|
||||
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
// Extra treatment for Apple systems
|
||||
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
|
||||
{
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const bool potfile_disable = true;
|
||||
@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
|
@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
|
||||
return false;
|
||||
}
|
||||
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
// Extra treatment for Apple systems
|
||||
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
|
||||
{
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const bool potfile_disable = true;
|
||||
@ -303,7 +284,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
|
@ -83,25 +83,6 @@ bool module_unstable_warning (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE
|
||||
return false;
|
||||
}
|
||||
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
// Extra treatment for Apple systems
|
||||
if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE)
|
||||
{
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
bool module_potfile_disable (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const bool potfile_disable = true;
|
||||
@ -301,7 +282,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
|
@ -78,6 +78,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -244,6 +244,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -245,6 +245,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -80,6 +80,17 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
native_threads = 64;
|
||||
}
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
if (device_param->device_local_mem_size < 49152)
|
||||
{
|
||||
native_threads = 32;
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 32;
|
||||
|
@ -79,6 +79,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -20,6 +20,7 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_PASSWORD_MANAGER;
|
||||
static const char *HASH_NAME = "1Password, cloudkeychain";
|
||||
static const u64 KERN_TYPE = 8200;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE
|
||||
| OPTI_TYPE_USES_BITS_64
|
||||
| OPTI_TYPE_SLOW_HASH_SIMD_LOOP;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
|
@ -60,6 +60,19 @@ u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED con
|
||||
return pw_max;
|
||||
}
|
||||
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-fno-unroll-loops");
|
||||
}
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
|
||||
{
|
||||
u32 *digest = (u32 *) digest_buf;
|
||||
@ -179,7 +192,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
|
@ -85,6 +85,12 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// HIP
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
hc_asprintf (&jit_build_options, "-D _unroll");
|
||||
}
|
||||
|
||||
// ROCM
|
||||
if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true))
|
||||
{
|
||||
|
@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 32;
|
||||
|
@ -77,6 +77,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 32;
|
||||
|
@ -58,6 +58,41 @@ static const char *SIGNATURE_OLDOFFICE = "$oldoffice$";
|
||||
static const char *SIGNATURE_OLDOFFICE0 = "$oldoffice$0";
|
||||
static const char *SIGNATURE_OLDOFFICE1 = "$oldoffice$1";
|
||||
|
||||
char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param)
|
||||
{
|
||||
char *jit_build_options = NULL;
|
||||
|
||||
u32 native_threads = 0;
|
||||
|
||||
if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU)
|
||||
{
|
||||
native_threads = 1;
|
||||
}
|
||||
else if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||
{
|
||||
if (device_param->opencl_device_vendor_id == VENDOR_ID_INTEL_SDK)
|
||||
{
|
||||
native_threads = 8;
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD)
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 32;
|
||||
}
|
||||
}
|
||||
|
||||
hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D _unroll", native_threads);
|
||||
|
||||
return jit_build_options;
|
||||
}
|
||||
|
||||
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u64 esalt_size = (const u64) sizeof (oldoffice01_t);
|
||||
@ -273,7 +308,7 @@ void module_init (module_ctx_t *module_ctx)
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = module_jit_build_options;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
|
@ -79,6 +79,10 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP)
|
||||
{
|
||||
native_threads = 64;
|
||||
}
|
||||
else
|
||||
{
|
||||
native_threads = 32;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user