|
|
|
@ -1266,6 +1266,12 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
|
|
|
w0[1] = swap32 (w0[1]);
|
|
|
|
@ -1284,7 +1290,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
|
|
|
|
w3[2] = swap32 (w3[2]);
|
|
|
|
|
w3[3] = swap32 (w3[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
|
|
@ -1635,7 +1641,7 @@ static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = __byte_perm (w3[2], w3[3], selector);
|
|
|
|
@ -1967,6 +1973,12 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined IS_AMD || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
|
|
|
w0[1] = swap32 (w0[1]);
|
|
|
|
@ -1985,7 +1997,7 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
|
|
|
|
|
w3[2] = swap32 (w3[2]);
|
|
|
|
|
w3[3] = swap32 (w3[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign (w3[3], 0, offset);
|
|
|
|
@ -2480,7 +2492,7 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
|
|
|
|
|
|
|
|
|
|
#ifdef IS_NV
|
|
|
|
|
// todo
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
|
|
@ -3279,9 +3291,15 @@ static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4]
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
|
|
@ -3616,7 +3634,7 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = __byte_perm (w3[3], w3[2], selector);
|
|
|
|
@ -3944,8 +3962,14 @@ static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign (w3[3], 0, offset);
|
|
|
|
@ -4415,7 +4439,7 @@ static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4]
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = __byte_perm ( 0, w3[3], selector);
|
|
|
|
@ -4882,6 +4906,12 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
|
|
|
w0[1] = swap32 (w0[1]);
|
|
|
|
@ -4916,7 +4946,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
w7[2] = swap32 (w7[2]);
|
|
|
|
|
w7[3] = swap32 (w7[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
|
|
@ -6115,7 +6145,7 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = __byte_perm (w7[2], w7[3], selector);
|
|
|
|
@ -6682,8 +6712,14 @@ static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
|
|
@ -7849,7 +7885,7 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = __byte_perm (w7[3], w7[2], selector);
|
|
|
|
@ -9008,8 +9044,14 @@ static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4],
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign (w7[3], 0, offset);
|
|
|
|
@ -10703,7 +10745,7 @@ static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = __byte_perm ( 0, w7[3], selector);
|
|
|
|
@ -12394,12 +12436,18 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
|
|
|
|
|
#pragma unroll
|
|
|
|
|
for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = amd_bytealign (w[62], w[63], offset);
|
|
|
|
@ -16769,7 +16817,7 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = __byte_perm (w[62], w[63], selector);
|
|
|
|
@ -21128,8 +21176,14 @@ static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = amd_bytealign (w[62], w[63], offset);
|
|
|
|
@ -25495,7 +25549,7 @@ static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset)
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = __byte_perm (w[63], w[62], selector);
|
|
|
|
@ -32416,6 +32470,12 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
|
|
|
w0[1] = swap32_S (w0[1]);
|
|
|
|
@ -32434,7 +32494,7 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
|
|
|
|
w3[2] = swap32_S (w3[2]);
|
|
|
|
|
w3[3] = swap32_S (w3[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
|
|
@ -32785,7 +32845,7 @@ static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
|
|
@ -33116,6 +33176,12 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if defined IS_AMD || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
|
|
|
w0[1] = swap32_S (w0[1]);
|
|
|
|
@ -33134,7 +33200,7 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
|
|
|
|
w3[2] = swap32_S (w3[2]);
|
|
|
|
|
w3[3] = swap32_S (w3[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
|
|
@ -33629,7 +33695,7 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
|
|
|
|
|
|
|
|
|
#ifdef IS_NV
|
|
|
|
|
// todo
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
|
|
@ -34428,8 +34494,14 @@ static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
|
|
@ -34763,7 +34835,7 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w3[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
|
|
@ -35090,8 +35162,14 @@ static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
|
|
@ -35561,7 +35639,7 @@ static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4],
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = __byte_perm_S ( 0, w3[3], selector);
|
|
|
|
@ -36028,6 +36106,12 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
|
|
|
w0[1] = swap32_S (w0[1]);
|
|
|
|
@ -36062,7 +36146,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
w7[2] = swap32_S (w7[2]);
|
|
|
|
|
w7[3] = swap32_S (w7[3]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
|
|
@ -37261,7 +37345,7 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = __byte_perm_S (w7[2], w7[3], selector);
|
|
|
|
@ -37828,8 +37912,14 @@ static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
|
|
@ -38995,7 +39085,7 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w7[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
|
|
@ -40154,8 +40244,14 @@ static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
|
|
@ -41849,7 +41945,7 @@ static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
c0[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
|
|
@ -43540,12 +43636,18 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
|
|
|
|
|
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
|
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
|
|
|
|
|
#pragma unroll
|
|
|
|
|
for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]);
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = amd_bytealign_S (w[62], w[63], offset);
|
|
|
|
@ -47915,7 +48017,7 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
|
|
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = __byte_perm_S (w[62], w[63], selector);
|
|
|
|
@ -52274,8 +52376,14 @@ static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
|
|
|
|
|
|
|
|
|
static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
|
|
|
|
|
{
|
|
|
|
|
#ifdef IS_AMD
|
|
|
|
|
volatile const int offset_switch = offset / 4;
|
|
|
|
|
#else
|
|
|
|
|
const int offset_switch = offset / 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if (defined IS_AMD && AMD_GCN < 3) || defined IS_GENERIC
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = amd_bytealign_S (w[62], w[63], offset);
|
|
|
|
@ -56641,7 +56749,7 @@ static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
|
|
|
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
switch (offset / 4)
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
w[63] = __byte_perm_S (w[63], w[62], selector);
|
|
|
|
|