|
|
|
@ -136,26 +136,26 @@ void truncate_left (u32 *buf0, u32 *buf1, const u32 offset)
|
|
|
|
|
|
|
|
|
|
void lshift_block (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1)
|
|
|
|
|
{
|
|
|
|
|
out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[3] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out0[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[3] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void rshift_block (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1)
|
|
|
|
|
{
|
|
|
|
|
out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[0] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out1[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[0] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const u32 num)
|
|
|
|
@ -171,32 +171,32 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = in1[2];
|
|
|
|
|
out1[3] = in1[3];
|
|
|
|
|
break;
|
|
|
|
|
case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[3] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
break;
|
|
|
|
|
case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[3] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
break;
|
|
|
|
|
case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[3] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 1: out0[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[3] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
break;
|
|
|
|
|
case 2: out0[0] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[3] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
break;
|
|
|
|
|
case 3: out0[0] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[3] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
break;
|
|
|
|
|
case 4: out0[0] = in0[1];
|
|
|
|
|
out0[1] = in0[2];
|
|
|
|
@ -207,31 +207,31 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = in1[3];
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 5: out0[0] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 6: out0[0] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 7: out0[0] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 8: out0[0] = in0[2];
|
|
|
|
@ -243,30 +243,30 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 9: out0[0] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 10: out0[0] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 11: out0[0] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
@ -279,29 +279,29 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 13: out0[0] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 14: out0[0] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 15: out0[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
@ -315,28 +315,28 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 17: out0[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 18: out0[0] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 19: out0[0] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
@ -351,27 +351,27 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 21: out0[0] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 22: out0[0] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 23: out0[0] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
@ -387,8 +387,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 25: out0[0] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -396,8 +396,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 26: out0[0] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -405,8 +405,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 27: out0[0] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -423,7 +423,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
case 29: out0[0] = hc_bytealign_S ( 0, in1[3], 1);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -432,7 +432,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
case 30: out0[0] = hc_bytealign_S ( 0, in1[3], 2);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -441,7 +441,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[3] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
case 31: out0[0] = hc_bytealign_S ( 0, in1[3], 3);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -466,32 +466,32 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = in0[1];
|
|
|
|
|
out0[0] = in0[0];
|
|
|
|
|
break;
|
|
|
|
|
case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[0] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
break;
|
|
|
|
|
case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[0] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
break;
|
|
|
|
|
case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[0] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 1: out1[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[0] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
break;
|
|
|
|
|
case 2: out1[3] = hc_bytealign_S (in1[3], in1[2], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[0] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
break;
|
|
|
|
|
case 3: out1[3] = hc_bytealign_S (in1[3], in1[2], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[0] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
break;
|
|
|
|
|
case 4: out1[3] = in1[2];
|
|
|
|
|
out1[2] = in1[1];
|
|
|
|
@ -502,31 +502,31 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = in0[0];
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 5: out1[3] = hc_bytealign_S (in1[2], in1[1], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 6: out1[3] = hc_bytealign_S (in1[2], in1[1], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 7: out1[3] = hc_bytealign_S (in1[2], in1[1], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[1] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 8: out1[3] = in1[1];
|
|
|
|
@ -538,30 +538,30 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 9: out1[3] = hc_bytealign_S (in1[1], in1[0], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 10: out1[3] = hc_bytealign_S (in1[1], in1[0], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[2] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 11: out1[3] = hc_bytealign_S (in1[1], in1[0], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[2] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
@ -574,29 +574,29 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 13: out1[3] = hc_bytealign_S (in1[0], in0[3], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 14: out1[3] = hc_bytealign_S (in1[0], in0[3], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[3] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 15: out1[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out0[3] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
@ -610,28 +610,28 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 17: out1[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 18: out1[3] = hc_bytealign_S (in0[3], in0[2], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[0] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 19: out1[3] = hc_bytealign_S (in0[3], in0[2], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[0] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
@ -646,27 +646,27 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 21: out1[3] = hc_bytealign_S (in0[2], in0[1], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 22: out1[3] = hc_bytealign_S (in0[2], in0[1], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[1] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 23: out1[3] = hc_bytealign_S (in0[2], in0[1], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[1] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
|
out0[2] = 0;
|
|
|
|
@ -682,8 +682,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 25: out1[3] = hc_bytealign_S (in0[1], in0[0], 3);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -691,8 +691,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 26: out1[3] = hc_bytealign_S (in0[1], in0[0], 2);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -700,8 +700,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[2] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 27: out1[3] = hc_bytealign_S (in0[1], in0[0], 1);
|
|
|
|
|
out1[2] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
|
out0[3] = 0;
|
|
|
|
@ -718,7 +718,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
case 29: out1[3] = hc_bytealign_S (in0[0], 0, 3);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -727,7 +727,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
case 30: out1[3] = hc_bytealign_S (in0[0], 0, 2);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -736,7 +736,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
|
|
|
|
out0[1] = 0;
|
|
|
|
|
out0[0] = 0;
|
|
|
|
|
break;
|
|
|
|
|
case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
case 31: out1[3] = hc_bytealign_S (in0[0], 0, 1);
|
|
|
|
|
out1[2] = 0;
|
|
|
|
|
out1[1] = 0;
|
|
|
|
|
out1[0] = 0;
|
|
|
|
@ -803,44 +803,44 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
s7 = amd_bytealign_S (src_r12, src_r13, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r11, src_r12, offset);
|
|
|
|
|
s5 = amd_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s4 = amd_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s3 = amd_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s2 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s1 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s0 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r12, src_r13, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r11, src_r12, offset);
|
|
|
|
|
s5 = hc_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s4 = hc_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s3 = hc_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s2 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s1 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s0 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
s7 = amd_bytealign_S (src_r11, src_r12, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s5 = amd_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s4 = amd_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s3 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s2 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s1 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r11, src_r12, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s5 = hc_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s4 = hc_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s3 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s2 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s1 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s0 = 0;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
s7 = amd_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s5 = amd_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s4 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s3 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s2 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r10, src_r11, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s5 = hc_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s4 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s3 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s2 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s1 = 0;
|
|
|
|
|
s0 = 0;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
s7 = amd_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s5 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s4 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s3 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r03, src_r10, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s5 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s4 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s3 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s2 = 0;
|
|
|
|
|
s1 = 0;
|
|
|
|
|
s0 = 0;
|
|
|
|
@ -848,10 +848,10 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
|
s7 = amd_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s5 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s4 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r02, src_r03, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s5 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s4 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s3 = 0;
|
|
|
|
|
s2 = 0;
|
|
|
|
|
s1 = 0;
|
|
|
|
@ -859,9 +859,9 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 5:
|
|
|
|
|
s7 = amd_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s6 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s5 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r01, src_r02, offset);
|
|
|
|
|
s6 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s5 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s4 = 0;
|
|
|
|
|
s3 = 0;
|
|
|
|
|
s2 = 0;
|
|
|
|
@ -870,8 +870,8 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 6:
|
|
|
|
|
s7 = amd_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s6 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S (src_r00, src_r01, offset);
|
|
|
|
|
s6 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s5 = 0;
|
|
|
|
|
s4 = 0;
|
|
|
|
|
s3 = 0;
|
|
|
|
@ -881,7 +881,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 7:
|
|
|
|
|
s7 = amd_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s7 = hc_bytealign_S ( 0, src_r00, offset);
|
|
|
|
|
s6 = 0;
|
|
|
|
|
s5 = 0;
|
|
|
|
|
s4 = 0;
|
|
|
|
@ -928,44 +928,44 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
switch (offset_switch)
|
|
|
|
|
{
|
|
|
|
|
case 0:
|
|
|
|
|
s7 = __byte_perm_S (src_r12, src_r13, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r11, src_r12, selector);
|
|
|
|
|
s5 = __byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s4 = __byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s3 = __byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s2 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s1 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s0 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r12, src_r13, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r11, src_r12, selector);
|
|
|
|
|
s5 = hc_byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s4 = hc_byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s3 = hc_byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s2 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s1 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s0 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
s7 = __byte_perm_S (src_r11, src_r12, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s5 = __byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s4 = __byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s3 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s2 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s1 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r11, src_r12, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s5 = hc_byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s4 = hc_byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s3 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s2 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s1 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s0 = 0;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
s7 = __byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s5 = __byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s4 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s3 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s2 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r10, src_r11, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s5 = hc_byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s4 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s3 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s2 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s1 = 0;
|
|
|
|
|
s0 = 0;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
s7 = __byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s5 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s4 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s3 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r03, src_r10, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s5 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s4 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s3 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s2 = 0;
|
|
|
|
|
s1 = 0;
|
|
|
|
|
s0 = 0;
|
|
|
|
@ -973,10 +973,10 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 4:
|
|
|
|
|
s7 = __byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s5 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s4 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r02, src_r03, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s5 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s4 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s3 = 0;
|
|
|
|
|
s2 = 0;
|
|
|
|
|
s1 = 0;
|
|
|
|
@ -984,9 +984,9 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 5:
|
|
|
|
|
s7 = __byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s6 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s5 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r01, src_r02, selector);
|
|
|
|
|
s6 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s5 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s4 = 0;
|
|
|
|
|
s3 = 0;
|
|
|
|
|
s2 = 0;
|
|
|
|
@ -995,8 +995,8 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 6:
|
|
|
|
|
s7 = __byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s6 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S (src_r00, src_r01, selector);
|
|
|
|
|
s6 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s5 = 0;
|
|
|
|
|
s4 = 0;
|
|
|
|
|
s3 = 0;
|
|
|
|
@ -1006,7 +1006,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 7:
|
|
|
|
|
s7 = __byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s7 = hc_byte_perm_S ( 0, src_r00, selector);
|
|
|
|
|
s6 = 0;
|
|
|
|
|
s5 = 0;
|
|
|
|
|
s4 = 0;
|
|
|
|
@ -1681,10 +1681,10 @@ u32 search_on_register (const u32 in, const u32 p0)
|
|
|
|
|
{
|
|
|
|
|
u32 r = 0;
|
|
|
|
|
|
|
|
|
|
if (__bfe_S (in, 0, 8) == p0) r |= 1;
|
|
|
|
|
if (__bfe_S (in, 8, 8) == p0) r |= 2;
|
|
|
|
|
if (__bfe_S (in, 16, 8) == p0) r |= 4;
|
|
|
|
|
if (__bfe_S (in, 24, 8) == p0) r |= 8;
|
|
|
|
|
if (hc_bfe_S (in, 0, 8) == p0) r |= 1;
|
|
|
|
|
if (hc_bfe_S (in, 8, 8) == p0) r |= 2;
|
|
|
|
|
if (hc_bfe_S (in, 16, 8) == p0) r |= 4;
|
|
|
|
|
if (hc_bfe_S (in, 24, 8) == p0) r |= 8;
|
|
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|