mirror of
https://github.com/hashcat/hashcat.git
synced 2025-03-24 19:35:41 +00:00
Add more inline assembly for AMD ROCm
This commit is contained in:
parent
6c619155c3
commit
48fbe81a09
@ -146,7 +146,8 @@ static float get_entropy (const u32 *buf, const int elems)
|
||||
|
||||
static void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x3727);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x1707);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x3727);
|
||||
@ -155,9 +156,20 @@ static void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = __byte_perm (in[1], 0, 0x1707);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x3727);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x1707);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x03070207);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x01070007);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x03070207);
|
||||
out2[0] = __byte_perm (in[2], 0, 0x01070007);
|
||||
out1[3] = __byte_perm (in[1], 0, 0x03070207);
|
||||
out1[2] = __byte_perm (in[1], 0, 0x01070007);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x03070207);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x01070007);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
||||
out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
||||
out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00);
|
||||
@ -166,12 +178,14 @@ static void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00);
|
||||
out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
||||
out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x1707);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x3727);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x1707);
|
||||
@ -180,9 +194,20 @@ static void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = __byte_perm (in[1], 0, 0x3727);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x1707);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x3727);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x01070007);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x03070207);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x01070007);
|
||||
out2[0] = __byte_perm (in[2], 0, 0x03070207);
|
||||
out1[3] = __byte_perm (in[1], 0, 0x01070007);
|
||||
out1[2] = __byte_perm (in[1], 0, 0x03070207);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x01070007);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x03070207);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
||||
out2[2] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
||||
out2[1] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00);
|
||||
@ -191,12 +216,14 @@ static void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00);
|
||||
out1[1] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
||||
out1[0] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x7372);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x7170);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x7372);
|
||||
@ -205,9 +232,20 @@ static void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = __byte_perm (in[1], 0, 0x7170);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x7372);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x7170);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x07030702);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x07010700);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x07030702);
|
||||
out2[0] = __byte_perm (in[2], 0, 0x07010700);
|
||||
out1[3] = __byte_perm (in[1], 0, 0x07030702);
|
||||
out1[2] = __byte_perm (in[1], 0, 0x07010700);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x07030702);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x07010700);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
||||
out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
||||
out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
|
||||
@ -216,12 +254,14 @@ static void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
|
||||
out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
||||
out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void make_utf16leN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x7170);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x7372);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x7170);
|
||||
@ -230,9 +270,20 @@ static void make_utf16leN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = __byte_perm (in[1], 0, 0x7372);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x7170);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x7372);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm (in[3], 0, 0x07010700);
|
||||
out2[2] = __byte_perm (in[3], 0, 0x07030702);
|
||||
out2[1] = __byte_perm (in[2], 0, 0x07010700);
|
||||
out2[0] = __byte_perm (in[2], 0, 0x07030702);
|
||||
out1[3] = __byte_perm (in[1], 0, 0x07010700);
|
||||
out1[2] = __byte_perm (in[1], 0, 0x07030702);
|
||||
out1[1] = __byte_perm (in[0], 0, 0x07010700);
|
||||
out1[0] = __byte_perm (in[0], 0, 0x07030702);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
||||
out2[2] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
||||
out2[1] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
|
||||
@ -241,19 +292,28 @@ static void make_utf16leN (const u32x in[4], u32x out1[4], u32x out2[4])
|
||||
out1[2] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
|
||||
out1[1] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
||||
out1[0] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out[0] = __byte_perm (in1[0], in1[1], 0x4602);
|
||||
out[1] = __byte_perm (in1[2], in1[3], 0x4602);
|
||||
out[2] = __byte_perm (in2[0], in2[1], 0x4602);
|
||||
out[3] = __byte_perm (in2[2], in2[3], 0x4602);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out[0] = __byte_perm (in1[0], in1[1], 0x04060002);
|
||||
out[1] = __byte_perm (in1[2], in1[3], 0x04060002);
|
||||
out[2] = __byte_perm (in2[0], in2[1], 0x04060002);
|
||||
out[3] = __byte_perm (in2[2], in2[3], 0x04060002);
|
||||
|
||||
#else
|
||||
|
||||
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
|
||||
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
|
||||
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
|
||||
@ -262,19 +322,28 @@ static void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
|
||||
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
|
||||
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
|
||||
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out[0] = __byte_perm (in1[0], in1[1], 0x6420);
|
||||
out[1] = __byte_perm (in1[2], in1[3], 0x6420);
|
||||
out[2] = __byte_perm (in2[0], in2[1], 0x6420);
|
||||
out[3] = __byte_perm (in2[2], in2[3], 0x6420);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out[0] = __byte_perm (in1[0], in1[1], 0x06040200);
|
||||
out[1] = __byte_perm (in1[2], in1[3], 0x06040200);
|
||||
out[2] = __byte_perm (in2[0], in2[1], 0x06040200);
|
||||
out[3] = __byte_perm (in2[2], in2[3], 0x06040200);
|
||||
|
||||
#else
|
||||
|
||||
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
|
||||
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
|
||||
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
|
||||
@ -283,6 +352,7 @@ static void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
|
||||
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
|
||||
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
|
||||
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3149,7 +3219,8 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
|
||||
|
||||
static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -3472,10 +3543,18 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -3799,12 +3878,13 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -4265,8 +4345,15 @@ static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4]
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -4735,7 +4822,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
w0[0] = swap32 (w0[0]);
|
||||
w0[1] = swap32 (w0[1]);
|
||||
w0[2] = swap32 (w0[2]);
|
||||
@ -5958,8 +6045,15 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
||||
w7[3] = swap32 (w7[3]);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -6528,7 +6622,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
||||
|
||||
static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -7685,8 +7779,15 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4],
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -8847,7 +8948,7 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4],
|
||||
|
||||
static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -10532,8 +10633,15 @@ static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -12226,7 +12334,7 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]);
|
||||
@ -16591,8 +16699,15 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -20953,7 +21068,7 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
||||
|
||||
static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -25310,8 +25425,15 @@ static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -32104,7 +32226,8 @@ static void append_0x80_8x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w
|
||||
|
||||
static void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm_S (in[3], 0, 0x3727);
|
||||
out2[2] = __byte_perm_S (in[3], 0, 0x1707);
|
||||
out2[1] = __byte_perm_S (in[2], 0, 0x3727);
|
||||
@ -32113,9 +32236,20 @@ static void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
out1[2] = __byte_perm_S (in[1], 0, 0x1707);
|
||||
out1[1] = __byte_perm_S (in[0], 0, 0x3727);
|
||||
out1[0] = __byte_perm_S (in[0], 0, 0x1707);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm_S (in[3], 0, 0x03070207);
|
||||
out2[2] = __byte_perm_S (in[3], 0, 0x01070007);
|
||||
out2[1] = __byte_perm_S (in[2], 0, 0x03070207);
|
||||
out2[0] = __byte_perm_S (in[2], 0, 0x01070007);
|
||||
out1[3] = __byte_perm_S (in[1], 0, 0x03070207);
|
||||
out1[2] = __byte_perm_S (in[1], 0, 0x01070007);
|
||||
out1[1] = __byte_perm_S (in[0], 0, 0x03070207);
|
||||
out1[0] = __byte_perm_S (in[0], 0, 0x01070007);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
||||
out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
||||
out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00);
|
||||
@ -32124,12 +32258,14 @@ static void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00);
|
||||
out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
||||
out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out2[3] = __byte_perm_S (in[3], 0, 0x7372);
|
||||
out2[2] = __byte_perm_S (in[3], 0, 0x7170);
|
||||
out2[1] = __byte_perm_S (in[2], 0, 0x7372);
|
||||
@ -32138,9 +32274,20 @@ static void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
out1[2] = __byte_perm_S (in[1], 0, 0x7170);
|
||||
out1[1] = __byte_perm_S (in[0], 0, 0x7372);
|
||||
out1[0] = __byte_perm_S (in[0], 0, 0x7170);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out2[3] = __byte_perm_S (in[3], 0, 0x07030702);
|
||||
out2[2] = __byte_perm_S (in[3], 0, 0x07010700);
|
||||
out2[1] = __byte_perm_S (in[2], 0, 0x07030702);
|
||||
out2[0] = __byte_perm_S (in[2], 0, 0x07010700);
|
||||
out1[3] = __byte_perm_S (in[1], 0, 0x07030702);
|
||||
out1[2] = __byte_perm_S (in[1], 0, 0x07010700);
|
||||
out1[1] = __byte_perm_S (in[0], 0, 0x07030702);
|
||||
out1[0] = __byte_perm_S (in[0], 0, 0x07010700);
|
||||
|
||||
#else
|
||||
|
||||
out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
||||
out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
||||
out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
|
||||
@ -32149,19 +32296,28 @@ static void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
||||
out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
|
||||
out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
||||
out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out[0] = __byte_perm_S (in1[0], in1[1], 0x4602);
|
||||
out[1] = __byte_perm_S (in1[2], in1[3], 0x4602);
|
||||
out[2] = __byte_perm_S (in2[0], in2[1], 0x4602);
|
||||
out[3] = __byte_perm_S (in2[2], in2[3], 0x4602);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out[0] = __byte_perm_S (in1[0], in1[1], 0x04060002);
|
||||
out[1] = __byte_perm_S (in1[2], in1[3], 0x04060002);
|
||||
out[2] = __byte_perm_S (in2[0], in2[1], 0x04060002);
|
||||
out[3] = __byte_perm_S (in2[2], in2[3], 0x04060002);
|
||||
|
||||
#else
|
||||
|
||||
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
|
||||
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
|
||||
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
|
||||
@ -32170,19 +32326,28 @@ static void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
||||
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
|
||||
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
|
||||
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
static void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
||||
{
|
||||
#ifdef IS_NV
|
||||
#if defined IS_NV
|
||||
|
||||
out[0] = __byte_perm_S (in1[0], in1[1], 0x6420);
|
||||
out[1] = __byte_perm_S (in1[2], in1[3], 0x6420);
|
||||
out[2] = __byte_perm_S (in2[0], in2[1], 0x6420);
|
||||
out[3] = __byte_perm_S (in2[2], in2[3], 0x6420);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#elif defined IS_AMD_ROCM
|
||||
|
||||
out[0] = __byte_perm_S (in1[0], in1[1], 0x06040200);
|
||||
out[1] = __byte_perm_S (in1[2], in1[3], 0x06040200);
|
||||
out[2] = __byte_perm_S (in2[0], in2[1], 0x06040200);
|
||||
out[3] = __byte_perm_S (in2[2], in2[3], 0x06040200);
|
||||
|
||||
#else
|
||||
|
||||
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
|
||||
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
|
||||
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
|
||||
@ -32191,6 +32356,7 @@ static void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
||||
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
|
||||
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
|
||||
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -34212,7 +34378,7 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
||||
|
||||
static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -34537,8 +34703,15 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -34867,7 +35040,7 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
||||
|
||||
static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -35328,8 +35501,15 @@ static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -35798,7 +35978,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
w0[0] = swap32_S (w0[0]);
|
||||
w0[1] = swap32_S (w0[1]);
|
||||
w0[2] = swap32_S (w0[2]);
|
||||
@ -37021,8 +37201,15 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
||||
w7[3] = swap32_S (w7[3]);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -37591,7 +37778,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
||||
|
||||
static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -38748,8 +38935,15 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -39910,7 +40104,7 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
||||
|
||||
static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -41595,8 +41789,15 @@ static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -43289,7 +43490,7 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
||||
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]);
|
||||
@ -47654,8 +47855,15 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
@ -52016,7 +52224,7 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
||||
|
||||
static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
|
||||
{
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
||||
switch (offset / 4)
|
||||
{
|
||||
case 0:
|
||||
@ -56373,8 +56581,15 @@ static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
#if defined IS_AMD_ROCM || defined IS_NV
|
||||
|
||||
#if defined IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD_ROCM
|
||||
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user