mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-12 18:59:03 +00:00
61489 lines
2.0 MiB
61489 lines
2.0 MiB
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
__constant u32 c_append_helper[64][16] =
|
|
{
|
|
{ 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x000000ff },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ff00 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00ff0000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff000000 },
|
|
};
|
|
|
|
/**
|
|
* pure scalar functions
|
|
*/
|
|
|
|
static int ffz (const u32 v)
|
|
{
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (int i = 0; i < 32; i++)
|
|
{
|
|
if ((v >> i) & 1) continue;
|
|
|
|
return i;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
static int hash_comp (const u32 d1[4], __global const u32 *d2)
|
|
{
|
|
if (d1[3] > d2[DGST_R3]) return ( 1);
|
|
if (d1[3] < d2[DGST_R3]) return (-1);
|
|
if (d1[2] > d2[DGST_R2]) return ( 1);
|
|
if (d1[2] < d2[DGST_R2]) return (-1);
|
|
if (d1[1] > d2[DGST_R1]) return ( 1);
|
|
if (d1[1] < d2[DGST_R1]) return (-1);
|
|
if (d1[0] > d2[DGST_R0]) return ( 1);
|
|
if (d1[0] < d2[DGST_R0]) return (-1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int find_hash (const u32 digest[4], const u32 digests_cnt, __global const digest_t *digests_buf)
|
|
{
|
|
for (u32 l = 0, r = digests_cnt; r; r >>= 1)
|
|
{
|
|
const u32 m = r >> 1;
|
|
|
|
const u32 c = l + m;
|
|
|
|
const int cmp = hash_comp (digest, digests_buf[c].digest_buf);
|
|
|
|
if (cmp > 0)
|
|
{
|
|
l += m + 1;
|
|
|
|
r--;
|
|
}
|
|
|
|
if (cmp == 0) return (c);
|
|
}
|
|
|
|
return (-1);
|
|
}
|
|
|
|
static u32 check_bitmap (__global const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest)
|
|
{
|
|
return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f)));
|
|
}
|
|
|
|
static u32 check (const u32 digest[4], __global const u32 *bitmap_s1_a, __global const u32 *bitmap_s1_b, __global const u32 *bitmap_s1_c, __global const u32 *bitmap_s1_d, __global const u32 *bitmap_s2_a, __global const u32 *bitmap_s2_b, __global const u32 *bitmap_s2_c, __global const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2)
|
|
{
|
|
if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0);
|
|
|
|
if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0);
|
|
if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0);
|
|
|
|
return (1);
|
|
}
|
|
|
|
static void mark_hash (__global plain_t *plains_buf, __global u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u32 gid, const u32 il_pos)
|
|
{
|
|
const u32 idx = atomic_inc (d_result);
|
|
|
|
if (idx >= digests_cnt)
|
|
{
|
|
// this is kind of tricky: we *must* call atomic_inc() to know about the current value from a multi-thread perspective
|
|
// this action creates a buffer overflow, so we need to fix it here
|
|
|
|
atomic_dec (d_result);
|
|
|
|
return;
|
|
}
|
|
|
|
plains_buf[idx].salt_pos = salt_pos;
|
|
plains_buf[idx].digest_pos = digest_pos; // relative
|
|
plains_buf[idx].hash_pos = hash_pos; // absolute
|
|
plains_buf[idx].gidvid = gid;
|
|
plains_buf[idx].il_pos = il_pos;
|
|
}
|
|
|
|
static int count_char (const u32 *buf, const int elems, const u32 c)
|
|
{
|
|
int r = 0;
|
|
|
|
for (int i = 0; i < elems; i++)
|
|
{
|
|
const u32 v = buf[i];
|
|
|
|
if (((v >> 0) & 0xff) == c) r++;
|
|
if (((v >> 8) & 0xff) == c) r++;
|
|
if (((v >> 16) & 0xff) == c) r++;
|
|
if (((v >> 24) & 0xff) == c) r++;
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
static float get_entropy (const u32 *buf, const int elems)
|
|
{
|
|
const int length = elems * 4;
|
|
|
|
float entropy = 0.0;
|
|
|
|
#ifdef _unroll
|
|
#pragma unroll
|
|
#endif
|
|
for (u32 c = 0; c < 256; c++)
|
|
{
|
|
const int r = count_char (buf, elems, c);
|
|
|
|
if (r == 0) continue;
|
|
|
|
float w = (float) r / length;
|
|
|
|
entropy += -w * log2 (w);
|
|
}
|
|
|
|
return entropy;
|
|
}
|
|
|
|
/**
|
|
* vector functions
|
|
*/
|
|
|
|
static void make_utf16be (const u32x in[4], u32x out1[4], u32x out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x3727);
|
|
out2[2] = __byte_perm (in[3], 0, 0x1707);
|
|
out2[1] = __byte_perm (in[2], 0, 0x3727);
|
|
out2[0] = __byte_perm (in[2], 0, 0x1707);
|
|
out1[3] = __byte_perm (in[1], 0, 0x3727);
|
|
out1[2] = __byte_perm (in[1], 0, 0x1707);
|
|
out1[1] = __byte_perm (in[0], 0, 0x3727);
|
|
out1[0] = __byte_perm (in[0], 0, 0x1707);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x03070207);
|
|
out2[2] = __byte_perm (in[3], 0, 0x01070007);
|
|
out2[1] = __byte_perm (in[2], 0, 0x03070207);
|
|
out2[0] = __byte_perm (in[2], 0, 0x01070007);
|
|
out1[3] = __byte_perm (in[1], 0, 0x03070207);
|
|
out1[2] = __byte_perm (in[1], 0, 0x01070007);
|
|
out1[1] = __byte_perm (in[0], 0, 0x03070207);
|
|
out1[0] = __byte_perm (in[0], 0, 0x01070007);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
|
out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
|
out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00);
|
|
out2[0] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00);
|
|
out1[3] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00);
|
|
out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00);
|
|
out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
|
out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void make_utf16beN (const u32x in[4], u32x out1[4], u32x out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x1707);
|
|
out2[2] = __byte_perm (in[3], 0, 0x3727);
|
|
out2[1] = __byte_perm (in[2], 0, 0x1707);
|
|
out2[0] = __byte_perm (in[2], 0, 0x3727);
|
|
out1[3] = __byte_perm (in[1], 0, 0x1707);
|
|
out1[2] = __byte_perm (in[1], 0, 0x3727);
|
|
out1[1] = __byte_perm (in[0], 0, 0x1707);
|
|
out1[0] = __byte_perm (in[0], 0, 0x3727);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x01070007);
|
|
out2[2] = __byte_perm (in[3], 0, 0x03070207);
|
|
out2[1] = __byte_perm (in[2], 0, 0x01070007);
|
|
out2[0] = __byte_perm (in[2], 0, 0x03070207);
|
|
out1[3] = __byte_perm (in[1], 0, 0x01070007);
|
|
out1[2] = __byte_perm (in[1], 0, 0x03070207);
|
|
out1[1] = __byte_perm (in[0], 0, 0x01070007);
|
|
out1[0] = __byte_perm (in[0], 0, 0x03070207);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
|
out2[2] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
|
out2[1] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00);
|
|
out2[0] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00);
|
|
out1[3] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00);
|
|
out1[2] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00);
|
|
out1[1] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
|
out1[0] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void make_utf16le (const u32x in[4], u32x out1[4], u32x out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x7372);
|
|
out2[2] = __byte_perm (in[3], 0, 0x7170);
|
|
out2[1] = __byte_perm (in[2], 0, 0x7372);
|
|
out2[0] = __byte_perm (in[2], 0, 0x7170);
|
|
out1[3] = __byte_perm (in[1], 0, 0x7372);
|
|
out1[2] = __byte_perm (in[1], 0, 0x7170);
|
|
out1[1] = __byte_perm (in[0], 0, 0x7372);
|
|
out1[0] = __byte_perm (in[0], 0, 0x7170);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x07030702);
|
|
out2[2] = __byte_perm (in[3], 0, 0x07010700);
|
|
out2[1] = __byte_perm (in[2], 0, 0x07030702);
|
|
out2[0] = __byte_perm (in[2], 0, 0x07010700);
|
|
out1[3] = __byte_perm (in[1], 0, 0x07030702);
|
|
out1[2] = __byte_perm (in[1], 0, 0x07010700);
|
|
out1[1] = __byte_perm (in[0], 0, 0x07030702);
|
|
out1[0] = __byte_perm (in[0], 0, 0x07010700);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
|
out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
|
out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
|
|
out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
|
|
out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
|
|
out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
|
|
out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
|
out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void make_utf16leN (const u32x in[4], u32x out1[4], u32x out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x7170);
|
|
out2[2] = __byte_perm (in[3], 0, 0x7372);
|
|
out2[1] = __byte_perm (in[2], 0, 0x7170);
|
|
out2[0] = __byte_perm (in[2], 0, 0x7372);
|
|
out1[3] = __byte_perm (in[1], 0, 0x7170);
|
|
out1[2] = __byte_perm (in[1], 0, 0x7372);
|
|
out1[1] = __byte_perm (in[0], 0, 0x7170);
|
|
out1[0] = __byte_perm (in[0], 0, 0x7372);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm (in[3], 0, 0x07010700);
|
|
out2[2] = __byte_perm (in[3], 0, 0x07030702);
|
|
out2[1] = __byte_perm (in[2], 0, 0x07010700);
|
|
out2[0] = __byte_perm (in[2], 0, 0x07030702);
|
|
out1[3] = __byte_perm (in[1], 0, 0x07010700);
|
|
out1[2] = __byte_perm (in[1], 0, 0x07030702);
|
|
out1[1] = __byte_perm (in[0], 0, 0x07010700);
|
|
out1[0] = __byte_perm (in[0], 0, 0x07030702);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
|
out2[2] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
|
out2[1] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
|
|
out2[0] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
|
|
out1[3] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
|
|
out1[2] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
|
|
out1[1] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
|
out1[0] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void undo_utf16be (const u32x in1[4], const u32x in2[4], u32x out[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out[0] = __byte_perm (in1[0], in1[1], 0x4602);
|
|
out[1] = __byte_perm (in1[2], in1[3], 0x4602);
|
|
out[2] = __byte_perm (in2[0], in2[1], 0x4602);
|
|
out[3] = __byte_perm (in2[2], in2[3], 0x4602);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out[0] = __byte_perm (in1[0], in1[1], 0x04060002);
|
|
out[1] = __byte_perm (in1[2], in1[3], 0x04060002);
|
|
out[2] = __byte_perm (in2[0], in2[1], 0x04060002);
|
|
out[3] = __byte_perm (in2[2], in2[3], 0x04060002);
|
|
|
|
#else
|
|
|
|
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
|
|
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
|
|
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
|
|
| ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0);
|
|
out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16)
|
|
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
|
|
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
|
|
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void undo_utf16le (const u32x in1[4], const u32x in2[4], u32x out[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out[0] = __byte_perm (in1[0], in1[1], 0x6420);
|
|
out[1] = __byte_perm (in1[2], in1[3], 0x6420);
|
|
out[2] = __byte_perm (in2[0], in2[1], 0x6420);
|
|
out[3] = __byte_perm (in2[2], in2[3], 0x6420);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out[0] = __byte_perm (in1[0], in1[1], 0x06040200);
|
|
out[1] = __byte_perm (in1[2], in1[3], 0x06040200);
|
|
out[2] = __byte_perm (in2[0], in2[1], 0x06040200);
|
|
out[3] = __byte_perm (in2[2], in2[3], 0x06040200);
|
|
|
|
#else
|
|
|
|
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
|
|
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
|
|
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
|
|
| ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
|
|
out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
|
|
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
|
|
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
|
|
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void append_0x80_1x4 (u32x w0[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][3];
|
|
}
|
|
|
|
static void append_0x80_2x4 (u32x w0[4], u32x w1[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][7];
|
|
}
|
|
|
|
static void append_0x80_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][ 0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][ 1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][ 2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][ 3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][ 4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][ 5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][ 6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][ 7];
|
|
w2[0] |= 0x80808080 & c_append_helper[offset][ 8];
|
|
w2[1] |= 0x80808080 & c_append_helper[offset][ 9];
|
|
w2[2] |= 0x80808080 & c_append_helper[offset][10];
|
|
w2[3] |= 0x80808080 & c_append_helper[offset][11];
|
|
}
|
|
|
|
static void append_0x80_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][ 0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][ 1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][ 2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][ 3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][ 4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][ 5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][ 6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][ 7];
|
|
w2[0] |= 0x80808080 & c_append_helper[offset][ 8];
|
|
w2[1] |= 0x80808080 & c_append_helper[offset][ 9];
|
|
w2[2] |= 0x80808080 & c_append_helper[offset][10];
|
|
w2[3] |= 0x80808080 & c_append_helper[offset][11];
|
|
w3[0] |= 0x80808080 & c_append_helper[offset][12];
|
|
w3[1] |= 0x80808080 & c_append_helper[offset][13];
|
|
w3[2] |= 0x80808080 & c_append_helper[offset][14];
|
|
w3[3] |= 0x80808080 & c_append_helper[offset][15];
|
|
}
|
|
|
|
static void append_0x80_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
|
|
{
|
|
switch (offset)
|
|
{
|
|
case 0:
|
|
w0[0] = w0[0] | 0x80;
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] = w0[0] | 0x8000;
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] = w0[0] | 0x800000;
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] = w0[0] | 0x80000000;
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = w0[1] | 0x80;
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] = w0[1] | 0x8000;
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] = w0[1] | 0x800000;
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] = w0[1] | 0x80000000;
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = w0[2] | 0x80;
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] = w0[2] | 0x8000;
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] = w0[2] | 0x800000;
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] = w0[2] | 0x80000000;
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = w0[3] | 0x80;
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] = w0[3] | 0x8000;
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] = w0[3] | 0x800000;
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] = w0[3] | 0x80000000;
|
|
break;
|
|
|
|
case 16:
|
|
w1[0] = w1[0] | 0x80;
|
|
break;
|
|
|
|
case 17:
|
|
w1[0] = w1[0] | 0x8000;
|
|
break;
|
|
|
|
case 18:
|
|
w1[0] = w1[0] | 0x800000;
|
|
break;
|
|
|
|
case 19:
|
|
w1[0] = w1[0] | 0x80000000;
|
|
break;
|
|
|
|
case 20:
|
|
w1[1] = w1[1] | 0x80;
|
|
break;
|
|
|
|
case 21:
|
|
w1[1] = w1[1] | 0x8000;
|
|
break;
|
|
|
|
case 22:
|
|
w1[1] = w1[1] | 0x800000;
|
|
break;
|
|
|
|
case 23:
|
|
w1[1] = w1[1] | 0x80000000;
|
|
break;
|
|
|
|
case 24:
|
|
w1[2] = w1[2] | 0x80;
|
|
break;
|
|
|
|
case 25:
|
|
w1[2] = w1[2] | 0x8000;
|
|
break;
|
|
|
|
case 26:
|
|
w1[2] = w1[2] | 0x800000;
|
|
break;
|
|
|
|
case 27:
|
|
w1[2] = w1[2] | 0x80000000;
|
|
break;
|
|
|
|
case 28:
|
|
w1[3] = w1[3] | 0x80;
|
|
break;
|
|
|
|
case 29:
|
|
w1[3] = w1[3] | 0x8000;
|
|
break;
|
|
|
|
case 30:
|
|
w1[3] = w1[3] | 0x800000;
|
|
break;
|
|
|
|
case 31:
|
|
w1[3] = w1[3] | 0x80000000;
|
|
break;
|
|
|
|
case 32:
|
|
w2[0] = w2[0] | 0x80;
|
|
break;
|
|
|
|
case 33:
|
|
w2[0] = w2[0] | 0x8000;
|
|
break;
|
|
|
|
case 34:
|
|
w2[0] = w2[0] | 0x800000;
|
|
break;
|
|
|
|
case 35:
|
|
w2[0] = w2[0] | 0x80000000;
|
|
break;
|
|
|
|
case 36:
|
|
w2[1] = w2[1] | 0x80;
|
|
break;
|
|
|
|
case 37:
|
|
w2[1] = w2[1] | 0x8000;
|
|
break;
|
|
|
|
case 38:
|
|
w2[1] = w2[1] | 0x800000;
|
|
break;
|
|
|
|
case 39:
|
|
w2[1] = w2[1] | 0x80000000;
|
|
break;
|
|
|
|
case 40:
|
|
w2[2] = w2[2] | 0x80;
|
|
break;
|
|
|
|
case 41:
|
|
w2[2] = w2[2] | 0x8000;
|
|
break;
|
|
|
|
case 42:
|
|
w2[2] = w2[2] | 0x800000;
|
|
break;
|
|
|
|
case 43:
|
|
w2[2] = w2[2] | 0x80000000;
|
|
break;
|
|
|
|
case 44:
|
|
w2[3] = w2[3] | 0x80;
|
|
break;
|
|
|
|
case 45:
|
|
w2[3] = w2[3] | 0x8000;
|
|
break;
|
|
|
|
case 46:
|
|
w2[3] = w2[3] | 0x800000;
|
|
break;
|
|
|
|
case 47:
|
|
w2[3] = w2[3] | 0x80000000;
|
|
break;
|
|
|
|
case 48:
|
|
w3[0] = w3[0] | 0x80;
|
|
break;
|
|
|
|
case 49:
|
|
w3[0] = w3[0] | 0x8000;
|
|
break;
|
|
|
|
case 50:
|
|
w3[0] = w3[0] | 0x800000;
|
|
break;
|
|
|
|
case 51:
|
|
w3[0] = w3[0] | 0x80000000;
|
|
break;
|
|
|
|
case 52:
|
|
w3[1] = w3[1] | 0x80;
|
|
break;
|
|
|
|
case 53:
|
|
w3[1] = w3[1] | 0x8000;
|
|
break;
|
|
|
|
case 54:
|
|
w3[1] = w3[1] | 0x800000;
|
|
break;
|
|
|
|
case 55:
|
|
w3[1] = w3[1] | 0x80000000;
|
|
break;
|
|
|
|
case 56:
|
|
w3[2] = w3[2] | 0x80;
|
|
break;
|
|
|
|
case 57:
|
|
w3[2] = w3[2] | 0x8000;
|
|
break;
|
|
|
|
case 58:
|
|
w3[2] = w3[2] | 0x800000;
|
|
break;
|
|
|
|
case 59:
|
|
w3[2] = w3[2] | 0x80000000;
|
|
break;
|
|
|
|
case 60:
|
|
w3[3] = w3[3] | 0x80;
|
|
break;
|
|
|
|
case 61:
|
|
w3[3] = w3[3] | 0x8000;
|
|
break;
|
|
|
|
case 62:
|
|
w3[3] = w3[3] | 0x800000;
|
|
break;
|
|
|
|
case 63:
|
|
w3[3] = w3[3] | 0x80000000;
|
|
break;
|
|
|
|
case 64:
|
|
w4[0] = w4[0] | 0x80;
|
|
break;
|
|
|
|
case 65:
|
|
w4[0] = w4[0] | 0x8000;
|
|
break;
|
|
|
|
case 66:
|
|
w4[0] = w4[0] | 0x800000;
|
|
break;
|
|
|
|
case 67:
|
|
w4[0] = w4[0] | 0x80000000;
|
|
break;
|
|
|
|
case 68:
|
|
w4[1] = w4[1] | 0x80;
|
|
break;
|
|
|
|
case 69:
|
|
w4[1] = w4[1] | 0x8000;
|
|
break;
|
|
|
|
case 70:
|
|
w4[1] = w4[1] | 0x800000;
|
|
break;
|
|
|
|
case 71:
|
|
w4[1] = w4[1] | 0x80000000;
|
|
break;
|
|
|
|
case 72:
|
|
w4[2] = w4[2] | 0x80;
|
|
break;
|
|
|
|
case 73:
|
|
w4[2] = w4[2] | 0x8000;
|
|
break;
|
|
|
|
case 74:
|
|
w4[2] = w4[2] | 0x800000;
|
|
break;
|
|
|
|
case 75:
|
|
w4[2] = w4[2] | 0x80000000;
|
|
break;
|
|
|
|
case 76:
|
|
w4[3] = w4[3] | 0x80;
|
|
break;
|
|
|
|
case 77:
|
|
w4[3] = w4[3] | 0x8000;
|
|
break;
|
|
|
|
case 78:
|
|
w4[3] = w4[3] | 0x800000;
|
|
break;
|
|
|
|
case 79:
|
|
w4[3] = w4[3] | 0x80000000;
|
|
break;
|
|
|
|
case 80:
|
|
w5[0] = w5[0] | 0x80;
|
|
break;
|
|
|
|
case 81:
|
|
w5[0] = w5[0] | 0x8000;
|
|
break;
|
|
|
|
case 82:
|
|
w5[0] = w5[0] | 0x800000;
|
|
break;
|
|
|
|
case 83:
|
|
w5[0] = w5[0] | 0x80000000;
|
|
break;
|
|
|
|
case 84:
|
|
w5[1] = w5[1] | 0x80;
|
|
break;
|
|
|
|
case 85:
|
|
w5[1] = w5[1] | 0x8000;
|
|
break;
|
|
|
|
case 86:
|
|
w5[1] = w5[1] | 0x800000;
|
|
break;
|
|
|
|
case 87:
|
|
w5[1] = w5[1] | 0x80000000;
|
|
break;
|
|
|
|
case 88:
|
|
w5[2] = w5[2] | 0x80;
|
|
break;
|
|
|
|
case 89:
|
|
w5[2] = w5[2] | 0x8000;
|
|
break;
|
|
|
|
case 90:
|
|
w5[2] = w5[2] | 0x800000;
|
|
break;
|
|
|
|
case 91:
|
|
w5[2] = w5[2] | 0x80000000;
|
|
break;
|
|
|
|
case 92:
|
|
w5[3] = w5[3] | 0x80;
|
|
break;
|
|
|
|
case 93:
|
|
w5[3] = w5[3] | 0x8000;
|
|
break;
|
|
|
|
case 94:
|
|
w5[3] = w5[3] | 0x800000;
|
|
break;
|
|
|
|
case 95:
|
|
w5[3] = w5[3] | 0x80000000;
|
|
break;
|
|
|
|
case 96:
|
|
w6[0] = w6[0] | 0x80;
|
|
break;
|
|
|
|
case 97:
|
|
w6[0] = w6[0] | 0x8000;
|
|
break;
|
|
|
|
case 98:
|
|
w6[0] = w6[0] | 0x800000;
|
|
break;
|
|
|
|
case 99:
|
|
w6[0] = w6[0] | 0x80000000;
|
|
break;
|
|
|
|
case 100:
|
|
w6[1] = w6[1] | 0x80;
|
|
break;
|
|
|
|
case 101:
|
|
w6[1] = w6[1] | 0x8000;
|
|
break;
|
|
|
|
case 102:
|
|
w6[1] = w6[1] | 0x800000;
|
|
break;
|
|
|
|
case 103:
|
|
w6[1] = w6[1] | 0x80000000;
|
|
break;
|
|
|
|
case 104:
|
|
w6[2] = w6[2] | 0x80;
|
|
break;
|
|
|
|
case 105:
|
|
w6[2] = w6[2] | 0x8000;
|
|
break;
|
|
|
|
case 106:
|
|
w6[2] = w6[2] | 0x800000;
|
|
break;
|
|
|
|
case 107:
|
|
w6[2] = w6[2] | 0x80000000;
|
|
break;
|
|
|
|
case 108:
|
|
w6[3] = w6[3] | 0x80;
|
|
break;
|
|
|
|
case 109:
|
|
w6[3] = w6[3] | 0x8000;
|
|
break;
|
|
|
|
case 110:
|
|
w6[3] = w6[3] | 0x800000;
|
|
break;
|
|
|
|
case 111:
|
|
w6[3] = w6[3] | 0x80000000;
|
|
break;
|
|
|
|
case 112:
|
|
w7[0] = w7[0] | 0x80;
|
|
break;
|
|
|
|
case 113:
|
|
w7[0] = w7[0] | 0x8000;
|
|
break;
|
|
|
|
case 114:
|
|
w7[0] = w7[0] | 0x800000;
|
|
break;
|
|
|
|
case 115:
|
|
w7[0] = w7[0] | 0x80000000;
|
|
break;
|
|
|
|
case 116:
|
|
w7[1] = w7[1] | 0x80;
|
|
break;
|
|
|
|
case 117:
|
|
w7[1] = w7[1] | 0x8000;
|
|
break;
|
|
|
|
case 118:
|
|
w7[1] = w7[1] | 0x800000;
|
|
break;
|
|
|
|
case 119:
|
|
w7[1] = w7[1] | 0x80000000;
|
|
break;
|
|
|
|
case 120:
|
|
w7[2] = w7[2] | 0x80;
|
|
break;
|
|
|
|
case 121:
|
|
w7[2] = w7[2] | 0x8000;
|
|
break;
|
|
|
|
case 122:
|
|
w7[2] = w7[2] | 0x800000;
|
|
break;
|
|
|
|
case 123:
|
|
w7[2] = w7[2] | 0x80000000;
|
|
break;
|
|
|
|
case 124:
|
|
w7[3] = w7[3] | 0x80;
|
|
break;
|
|
|
|
case 125:
|
|
w7[3] = w7[3] | 0x8000;
|
|
break;
|
|
|
|
case 126:
|
|
w7[3] = w7[3] | 0x800000;
|
|
break;
|
|
|
|
case 127:
|
|
w7[3] = w7[3] | 0x80000000;
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void append_0x80_1x16 (u32x w[16], const u32 offset)
|
|
{
|
|
switch (offset)
|
|
{
|
|
case 0:
|
|
w[ 0] = 0x80;
|
|
break;
|
|
|
|
case 1:
|
|
w[ 0] = w[ 0] | 0x8000;
|
|
break;
|
|
|
|
case 2:
|
|
w[ 0] = w[ 0] | 0x800000;
|
|
break;
|
|
|
|
case 3:
|
|
w[ 0] = w[ 0] | 0x80000000;
|
|
break;
|
|
|
|
case 4:
|
|
w[ 1] = 0x80;
|
|
break;
|
|
|
|
case 5:
|
|
w[ 1] = w[ 1] | 0x8000;
|
|
break;
|
|
|
|
case 6:
|
|
w[ 1] = w[ 1] | 0x800000;
|
|
break;
|
|
|
|
case 7:
|
|
w[ 1] = w[ 1] | 0x80000000;
|
|
break;
|
|
|
|
case 8:
|
|
w[ 2] = 0x80;
|
|
break;
|
|
|
|
case 9:
|
|
w[ 2] = w[ 2] | 0x8000;
|
|
break;
|
|
|
|
case 10:
|
|
w[ 2] = w[ 2] | 0x800000;
|
|
break;
|
|
|
|
case 11:
|
|
w[ 2] = w[ 2] | 0x80000000;
|
|
break;
|
|
|
|
case 12:
|
|
w[ 3] = 0x80;
|
|
break;
|
|
|
|
case 13:
|
|
w[ 3] = w[ 3] | 0x8000;
|
|
break;
|
|
|
|
case 14:
|
|
w[ 3] = w[ 3] | 0x800000;
|
|
break;
|
|
|
|
case 15:
|
|
w[ 3] = w[ 3] | 0x80000000;
|
|
break;
|
|
|
|
case 16:
|
|
w[ 4] = 0x80;
|
|
break;
|
|
|
|
case 17:
|
|
w[ 4] = w[ 4] | 0x8000;
|
|
break;
|
|
|
|
case 18:
|
|
w[ 4] = w[ 4] | 0x800000;
|
|
break;
|
|
|
|
case 19:
|
|
w[ 4] = w[ 4] | 0x80000000;
|
|
break;
|
|
|
|
case 20:
|
|
w[ 5] = 0x80;
|
|
break;
|
|
|
|
case 21:
|
|
w[ 5] = w[ 5] | 0x8000;
|
|
break;
|
|
|
|
case 22:
|
|
w[ 5] = w[ 5] | 0x800000;
|
|
break;
|
|
|
|
case 23:
|
|
w[ 5] = w[ 5] | 0x80000000;
|
|
break;
|
|
|
|
case 24:
|
|
w[ 6] = 0x80;
|
|
break;
|
|
|
|
case 25:
|
|
w[ 6] = w[ 6] | 0x8000;
|
|
break;
|
|
|
|
case 26:
|
|
w[ 6] = w[ 6] | 0x800000;
|
|
break;
|
|
|
|
case 27:
|
|
w[ 6] = w[ 6] | 0x80000000;
|
|
break;
|
|
|
|
case 28:
|
|
w[ 7] = 0x80;
|
|
break;
|
|
|
|
case 29:
|
|
w[ 7] = w[ 7] | 0x8000;
|
|
break;
|
|
|
|
case 30:
|
|
w[ 7] = w[ 7] | 0x800000;
|
|
break;
|
|
|
|
case 31:
|
|
w[ 7] = w[ 7] | 0x80000000;
|
|
break;
|
|
|
|
case 32:
|
|
w[ 8] = 0x80;
|
|
break;
|
|
|
|
case 33:
|
|
w[ 8] = w[ 8] | 0x8000;
|
|
break;
|
|
|
|
case 34:
|
|
w[ 8] = w[ 8] | 0x800000;
|
|
break;
|
|
|
|
case 35:
|
|
w[ 8] = w[ 8] | 0x80000000;
|
|
break;
|
|
|
|
case 36:
|
|
w[ 9] = 0x80;
|
|
break;
|
|
|
|
case 37:
|
|
w[ 9] = w[ 9] | 0x8000;
|
|
break;
|
|
|
|
case 38:
|
|
w[ 9] = w[ 9] | 0x800000;
|
|
break;
|
|
|
|
case 39:
|
|
w[ 9] = w[ 9] | 0x80000000;
|
|
break;
|
|
|
|
case 40:
|
|
w[10] = 0x80;
|
|
break;
|
|
|
|
case 41:
|
|
w[10] = w[10] | 0x8000;
|
|
break;
|
|
|
|
case 42:
|
|
w[10] = w[10] | 0x800000;
|
|
break;
|
|
|
|
case 43:
|
|
w[10] = w[10] | 0x80000000;
|
|
break;
|
|
|
|
case 44:
|
|
w[11] = 0x80;
|
|
break;
|
|
|
|
case 45:
|
|
w[11] = w[11] | 0x8000;
|
|
break;
|
|
|
|
case 46:
|
|
w[11] = w[11] | 0x800000;
|
|
break;
|
|
|
|
case 47:
|
|
w[11] = w[11] | 0x80000000;
|
|
break;
|
|
|
|
case 48:
|
|
w[12] = 0x80;
|
|
break;
|
|
|
|
case 49:
|
|
w[12] = w[12] | 0x8000;
|
|
break;
|
|
|
|
case 50:
|
|
w[12] = w[12] | 0x800000;
|
|
break;
|
|
|
|
case 51:
|
|
w[12] = w[12] | 0x80000000;
|
|
break;
|
|
|
|
case 52:
|
|
w[13] = 0x80;
|
|
break;
|
|
|
|
case 53:
|
|
w[13] = w[13] | 0x8000;
|
|
break;
|
|
|
|
case 54:
|
|
w[13] = w[13] | 0x800000;
|
|
break;
|
|
|
|
case 55:
|
|
w[13] = w[13] | 0x80000000;
|
|
break;
|
|
|
|
case 56:
|
|
w[14] = 0x80;
|
|
break;
|
|
|
|
case 57:
|
|
w[14] = w[14] | 0x8000;
|
|
break;
|
|
|
|
case 58:
|
|
w[14] = w[14] | 0x800000;
|
|
break;
|
|
|
|
case 59:
|
|
w[14] = w[14] | 0x80000000;
|
|
break;
|
|
|
|
case 60:
|
|
w[15] = 0x80;
|
|
break;
|
|
|
|
case 61:
|
|
w[15] = w[15] | 0x8000;
|
|
break;
|
|
|
|
case 62:
|
|
w[15] = w[15] | 0x800000;
|
|
break;
|
|
|
|
case 63:
|
|
w[15] = w[15] | 0x80000000;
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = __byte_perm (w3[2], w3[3], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[2], selector);
|
|
w3[1] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[0] = __byte_perm (w2[3], w3[0], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[0] = __byte_perm ( 0, w0[0], selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = __byte_perm (w3[1], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[1] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[1] = __byte_perm ( 0, w0[0], selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[2] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[2] = __byte_perm ( 0, w0[0], selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[3] = __byte_perm ( 0, w0[0], selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[0] = __byte_perm ( 0, w0[0], selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[1] = __byte_perm ( 0, w0[0], selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[2] = __byte_perm ( 0, w0[0], selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[3] = __byte_perm ( 0, w0[0], selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[0] = __byte_perm ( 0, w0[0], selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[1] = __byte_perm ( 0, w0[0], selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[2] = __byte_perm ( 0, w0[0], selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[3] = __byte_perm ( 0, w0[0], selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[0] = __byte_perm ( 0, w0[0], selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[1] = __byte_perm ( 0, w0[0], selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[2] = __byte_perm ( 0, w0[0], selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = __byte_perm ( 0, w0[0], selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD || defined IS_GENERIC
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign (w3[3], 0, offset);
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign (w3[3], 0, offset);
|
|
c0[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign (w3[3], 0, offset);
|
|
c0[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign (w3[3], 0, offset);
|
|
c0[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign (w3[3], 0, offset);
|
|
c0[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign (w3[3], 0, offset);
|
|
c1[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign (w3[3], 0, offset);
|
|
c1[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign (w3[3], 0, offset);
|
|
c1[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign (w3[3], 0, offset);
|
|
c1[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign (w3[3], 0, offset);
|
|
c2[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign (w3[3], 0, offset);
|
|
c2[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign (w3[3], 0, offset);
|
|
c2[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign (w3[3], 0, offset);
|
|
c2[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign (w3[3], 0, offset);
|
|
c3[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign (w3[3], 0, offset);
|
|
c3[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign (w3[3], 0, offset);
|
|
c3[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
c0[0] = swap32 (c0[0]);
|
|
c0[1] = swap32 (c0[1]);
|
|
c0[2] = swap32 (c0[2]);
|
|
c0[3] = swap32 (c0[3]);
|
|
c1[0] = swap32 (c1[0]);
|
|
c1[1] = swap32 (c1[1]);
|
|
c1[2] = swap32 (c1[2]);
|
|
c1[3] = swap32 (c1[3]);
|
|
c2[0] = swap32 (c2[0]);
|
|
c2[1] = swap32 (c2[1]);
|
|
c2[2] = swap32 (c2[2]);
|
|
c2[3] = swap32 (c2[3]);
|
|
c3[0] = swap32 (c3[0]);
|
|
c3[1] = swap32 (c3[1]);
|
|
c3[2] = swap32 (c3[2]);
|
|
c3[3] = swap32 (c3[3]);
|
|
#endif
|
|
|
|
#ifdef IS_NV
|
|
// todo
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
w3[3] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
w3[2] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
w3[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
w3[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
w2[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w2[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w2[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w2[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w1[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w1[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w1[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w1[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w0[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w0[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w0[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w0[0] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[0] = w0[1];
|
|
w0[1] = w0[2];
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c0[0] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
w3[3] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
w3[2] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
w3[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
w3[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w2[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w2[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w2[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w2[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w1[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w1[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w1[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w1[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w0[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w0[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w0[1] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[1] = w0[2];
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c0[1] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c0[0] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
w3[3] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
w3[2] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
w3[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w3[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w2[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w2[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w2[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w2[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w1[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w1[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w1[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w1[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w0[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w0[2] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c0[2] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c0[1] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c0[0] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
w3[3] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
w3[2] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w3[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w3[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w2[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w2[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w2[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w2[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w1[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w1[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w1[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w1[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w0[3] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c0[3] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c0[2] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c0[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c0[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
w3[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w3[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w3[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w3[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w2[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w2[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w2[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w2[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w1[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w1[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w1[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w1[0] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c1[0] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c0[3] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c0[2] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c0[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c0[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
w3[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w3[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w3[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w3[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w2[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w2[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w2[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w2[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w1[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w1[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w1[1] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c1[1] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c1[0] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c0[3] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c0[2] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c0[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c0[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
w3[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w3[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w3[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w3[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w2[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w2[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w2[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w2[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w1[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w1[2] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c1[2] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c1[1] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c1[0] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c0[3] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c0[2] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c0[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c0[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
w3[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w3[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w3[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w3[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w2[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w2[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w2[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w2[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w1[3] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c1[3] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c1[2] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c1[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c1[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c0[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c0[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c0[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c0[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
w3[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w3[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w3[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w3[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w2[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w2[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w2[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w2[0] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c2[0] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c1[3] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c1[2] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c1[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c1[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c0[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c0[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c0[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c0[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
w3[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w3[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w3[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w3[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w2[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w2[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w2[1] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c2[1] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c2[0] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c1[3] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c1[2] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c1[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c1[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c0[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c0[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c0[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c0[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
w3[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w3[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w3[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w3[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w2[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w2[2] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c2[2] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c2[1] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c2[0] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c1[3] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c1[2] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c1[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c1[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c0[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c0[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c0[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
c0[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
w3[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w3[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w3[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w3[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w2[3] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c2[3] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c2[2] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c2[1] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c2[0] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c1[3] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c1[2] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c1[1] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c1[0] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c0[3] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c0[2] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
c0[1] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
c0[0] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
w3[3] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w3[2] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w3[1] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w3[0] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c3[0] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c2[3] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c2[2] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c2[1] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c2[0] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c1[3] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c1[2] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c1[1] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c1[0] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c0[3] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
c0[2] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
c0[1] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
c0[0] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
w3[3] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w3[2] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w3[1] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c3[1] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c3[0] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c2[3] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c2[2] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c2[1] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c2[0] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c1[3] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c1[2] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c1[1] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c1[0] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
c0[3] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
c0[2] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
c0[1] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
c0[0] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
w3[3] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w3[2] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = c3[2];
|
|
c3[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign ( 0, w3[3], offset_minus_4);
|
|
c3[2] = amd_bytealign (w3[3], w3[2], offset_minus_4);
|
|
c3[1] = amd_bytealign (w3[2], w3[1], offset_minus_4);
|
|
c3[0] = amd_bytealign (w3[1], w3[0], offset_minus_4);
|
|
c2[3] = amd_bytealign (w3[0], w2[3], offset_minus_4);
|
|
c2[2] = amd_bytealign (w2[3], w2[2], offset_minus_4);
|
|
c2[1] = amd_bytealign (w2[2], w2[1], offset_minus_4);
|
|
c2[0] = amd_bytealign (w2[1], w2[0], offset_minus_4);
|
|
c1[3] = amd_bytealign (w2[0], w1[3], offset_minus_4);
|
|
c1[2] = amd_bytealign (w1[3], w1[2], offset_minus_4);
|
|
c1[1] = amd_bytealign (w1[2], w1[1], offset_minus_4);
|
|
c1[0] = amd_bytealign (w1[1], w1[0], offset_minus_4);
|
|
c0[3] = amd_bytealign (w1[0], w0[3], offset_minus_4);
|
|
c0[2] = amd_bytealign (w0[3], w0[2], offset_minus_4);
|
|
c0[1] = amd_bytealign (w0[2], w0[1], offset_minus_4);
|
|
c0[0] = amd_bytealign (w0[1], w0[0], offset_minus_4);
|
|
w3[3] = amd_bytealign (w0[0], 0, offset_minus_4);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = c3[2];
|
|
c3[2] = c3[3];
|
|
c3[3] = 0;
|
|
}
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = __byte_perm (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign (w3[3], 0, offset);
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign (w3[3], 0, offset);
|
|
c0[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign (w3[3], 0, offset);
|
|
c0[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign (w3[3], 0, offset);
|
|
c0[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign (w3[3], 0, offset);
|
|
c0[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign (w3[3], 0, offset);
|
|
c1[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign (w3[3], 0, offset);
|
|
c1[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign (w3[3], 0, offset);
|
|
c1[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign (w3[3], 0, offset);
|
|
c1[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign (w3[3], 0, offset);
|
|
c2[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign (w3[3], 0, offset);
|
|
c2[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign (w3[3], 0, offset);
|
|
c2[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign (w3[3], 0, offset);
|
|
c2[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign (w3[3], 0, offset);
|
|
c3[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign (w3[3], 0, offset);
|
|
c3[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign (w3[3], 0, offset);
|
|
c3[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = __byte_perm ( 0, w3[3], selector);
|
|
w3[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = __byte_perm ( 0, w3[3], selector);
|
|
c0[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = __byte_perm ( 0, w3[3], selector);
|
|
c0[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = __byte_perm ( 0, w3[3], selector);
|
|
c0[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = __byte_perm ( 0, w3[3], selector);
|
|
c0[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = __byte_perm ( 0, w3[3], selector);
|
|
c1[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = __byte_perm ( 0, w3[3], selector);
|
|
c1[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = __byte_perm ( 0, w3[3], selector);
|
|
c1[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = __byte_perm ( 0, w3[3], selector);
|
|
c1[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[0] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[3] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[2] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[1] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = __byte_perm ( 0, w3[3], selector);
|
|
c2[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[0] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[3] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[2] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[1] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = __byte_perm ( 0, w3[3], selector);
|
|
c2[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[0] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[3] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[2] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[1] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = __byte_perm ( 0, w3[3], selector);
|
|
c2[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[0] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[3] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[2] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[1] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = __byte_perm ( 0, w3[3], selector);
|
|
c2[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[0] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[3] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[2] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[1] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[0] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[3] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[2] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[1] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = __byte_perm ( 0, w3[3], selector);
|
|
c3[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[0] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[3] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[2] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[1] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[0] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[3] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[2] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[1] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = __byte_perm ( 0, w3[3], selector);
|
|
c3[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c3[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c2[0] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[3] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[2] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[1] = __byte_perm (w1[3], w1[2], selector);
|
|
c1[0] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[3] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[2] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[1] = __byte_perm (w0[3], w0[2], selector);
|
|
c0[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = __byte_perm ( 0, w3[3], selector);
|
|
c3[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c3[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c3[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c2[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c2[0] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[3] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[2] = __byte_perm (w1[3], w1[2], selector);
|
|
c1[1] = __byte_perm (w1[2], w1[1], selector);
|
|
c1[0] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[3] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[2] = __byte_perm (w0[3], w0[2], selector);
|
|
c0[1] = __byte_perm (w0[2], w0[1], selector);
|
|
c0[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
w4[0] = swap32 (w4[0]);
|
|
w4[1] = swap32 (w4[1]);
|
|
w4[2] = swap32 (w4[2]);
|
|
w4[3] = swap32 (w4[3]);
|
|
w5[0] = swap32 (w5[0]);
|
|
w5[1] = swap32 (w5[1]);
|
|
w5[2] = swap32 (w5[2]);
|
|
w5[3] = swap32 (w5[3]);
|
|
w6[0] = swap32 (w6[0]);
|
|
w6[1] = swap32 (w6[1]);
|
|
w6[2] = swap32 (w6[2]);
|
|
w6[3] = swap32 (w6[3]);
|
|
w7[0] = swap32 (w7[0]);
|
|
w7[1] = swap32 (w7[1]);
|
|
w7[2] = swap32 (w7[2]);
|
|
w7[3] = swap32 (w7[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32 (w0[0]);
|
|
w0[1] = swap32 (w0[1]);
|
|
w0[2] = swap32 (w0[2]);
|
|
w0[3] = swap32 (w0[3]);
|
|
w1[0] = swap32 (w1[0]);
|
|
w1[1] = swap32 (w1[1]);
|
|
w1[2] = swap32 (w1[2]);
|
|
w1[3] = swap32 (w1[3]);
|
|
w2[0] = swap32 (w2[0]);
|
|
w2[1] = swap32 (w2[1]);
|
|
w2[2] = swap32 (w2[2]);
|
|
w2[3] = swap32 (w2[3]);
|
|
w3[0] = swap32 (w3[0]);
|
|
w3[1] = swap32 (w3[1]);
|
|
w3[2] = swap32 (w3[2]);
|
|
w3[3] = swap32 (w3[3]);
|
|
w4[0] = swap32 (w4[0]);
|
|
w4[1] = swap32 (w4[1]);
|
|
w4[2] = swap32 (w4[2]);
|
|
w4[3] = swap32 (w4[3]);
|
|
w5[0] = swap32 (w5[0]);
|
|
w5[1] = swap32 (w5[1]);
|
|
w5[2] = swap32 (w5[2]);
|
|
w5[3] = swap32 (w5[3]);
|
|
w6[0] = swap32 (w6[0]);
|
|
w6[1] = swap32 (w6[1]);
|
|
w6[2] = swap32 (w6[2]);
|
|
w6[3] = swap32 (w6[3]);
|
|
w7[0] = swap32 (w7[0]);
|
|
w7[1] = swap32 (w7[1]);
|
|
w7[2] = swap32 (w7[2]);
|
|
w7[3] = swap32 (w7[3]);
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = __byte_perm (w7[2], w7[3], selector);
|
|
w7[2] = __byte_perm (w7[1], w7[2], selector);
|
|
w7[1] = __byte_perm (w7[0], w7[1], selector);
|
|
w7[0] = __byte_perm (w6[3], w7[0], selector);
|
|
w6[3] = __byte_perm (w6[2], w6[3], selector);
|
|
w6[2] = __byte_perm (w6[1], w6[2], selector);
|
|
w6[1] = __byte_perm (w6[0], w6[1], selector);
|
|
w6[0] = __byte_perm (w5[3], w6[0], selector);
|
|
w5[3] = __byte_perm (w5[2], w5[3], selector);
|
|
w5[2] = __byte_perm (w5[1], w5[2], selector);
|
|
w5[1] = __byte_perm (w5[0], w5[1], selector);
|
|
w5[0] = __byte_perm (w4[3], w5[0], selector);
|
|
w4[3] = __byte_perm (w4[2], w4[3], selector);
|
|
w4[2] = __byte_perm (w4[1], w4[2], selector);
|
|
w4[1] = __byte_perm (w4[0], w4[1], selector);
|
|
w4[0] = __byte_perm (w3[3], w4[0], selector);
|
|
w3[3] = __byte_perm (w3[2], w3[3], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[2], selector);
|
|
w3[1] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[0] = __byte_perm (w2[3], w3[0], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[0] = __byte_perm ( 0, w0[0], selector);
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = __byte_perm (w7[1], w7[2], selector);
|
|
w7[2] = __byte_perm (w7[0], w7[1], selector);
|
|
w7[1] = __byte_perm (w6[3], w7[0], selector);
|
|
w7[0] = __byte_perm (w6[2], w6[3], selector);
|
|
w6[3] = __byte_perm (w6[1], w6[2], selector);
|
|
w6[2] = __byte_perm (w6[0], w6[1], selector);
|
|
w6[1] = __byte_perm (w5[3], w6[0], selector);
|
|
w6[0] = __byte_perm (w5[2], w5[3], selector);
|
|
w5[3] = __byte_perm (w5[1], w5[2], selector);
|
|
w5[2] = __byte_perm (w5[0], w5[1], selector);
|
|
w5[1] = __byte_perm (w4[3], w5[0], selector);
|
|
w5[0] = __byte_perm (w4[2], w4[3], selector);
|
|
w4[3] = __byte_perm (w4[1], w4[2], selector);
|
|
w4[2] = __byte_perm (w4[0], w4[1], selector);
|
|
w4[1] = __byte_perm (w3[3], w4[0], selector);
|
|
w4[0] = __byte_perm (w3[2], w3[3], selector);
|
|
w3[3] = __byte_perm (w3[1], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[1] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[1] = __byte_perm ( 0, w0[0], selector);
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = __byte_perm (w7[0], w7[1], selector);
|
|
w7[2] = __byte_perm (w6[3], w7[0], selector);
|
|
w7[1] = __byte_perm (w6[2], w6[3], selector);
|
|
w7[0] = __byte_perm (w6[1], w6[2], selector);
|
|
w6[3] = __byte_perm (w6[0], w6[1], selector);
|
|
w6[2] = __byte_perm (w5[3], w6[0], selector);
|
|
w6[1] = __byte_perm (w5[2], w5[3], selector);
|
|
w6[0] = __byte_perm (w5[1], w5[2], selector);
|
|
w5[3] = __byte_perm (w5[0], w5[1], selector);
|
|
w5[2] = __byte_perm (w4[3], w5[0], selector);
|
|
w5[1] = __byte_perm (w4[2], w4[3], selector);
|
|
w5[0] = __byte_perm (w4[1], w4[2], selector);
|
|
w4[3] = __byte_perm (w4[0], w4[1], selector);
|
|
w4[2] = __byte_perm (w3[3], w4[0], selector);
|
|
w4[1] = __byte_perm (w3[2], w3[3], selector);
|
|
w4[0] = __byte_perm (w3[1], w3[2], selector);
|
|
w3[3] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[2] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[2] = __byte_perm ( 0, w0[0], selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = __byte_perm (w6[3], w7[0], selector);
|
|
w7[2] = __byte_perm (w6[2], w6[3], selector);
|
|
w7[1] = __byte_perm (w6[1], w6[2], selector);
|
|
w7[0] = __byte_perm (w6[0], w6[1], selector);
|
|
w6[3] = __byte_perm (w5[3], w6[0], selector);
|
|
w6[2] = __byte_perm (w5[2], w5[3], selector);
|
|
w6[1] = __byte_perm (w5[1], w5[2], selector);
|
|
w6[0] = __byte_perm (w5[0], w5[1], selector);
|
|
w5[3] = __byte_perm (w4[3], w5[0], selector);
|
|
w5[2] = __byte_perm (w4[2], w4[3], selector);
|
|
w5[1] = __byte_perm (w4[1], w4[2], selector);
|
|
w5[0] = __byte_perm (w4[0], w4[1], selector);
|
|
w4[3] = __byte_perm (w3[3], w4[0], selector);
|
|
w4[2] = __byte_perm (w3[2], w3[3], selector);
|
|
w4[1] = __byte_perm (w3[1], w3[2], selector);
|
|
w4[0] = __byte_perm (w3[0], w3[1], selector);
|
|
w3[3] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[0], w2[1], selector);
|
|
w2[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w1[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w0[3] = __byte_perm ( 0, w0[0], selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = __byte_perm (w6[2], w6[3], selector);
|
|
w7[2] = __byte_perm (w6[1], w6[2], selector);
|
|
w7[1] = __byte_perm (w6[0], w6[1], selector);
|
|
w7[0] = __byte_perm (w5[3], w6[0], selector);
|
|
w6[3] = __byte_perm (w5[2], w5[3], selector);
|
|
w6[2] = __byte_perm (w5[1], w5[2], selector);
|
|
w6[1] = __byte_perm (w5[0], w5[1], selector);
|
|
w6[0] = __byte_perm (w4[3], w5[0], selector);
|
|
w5[3] = __byte_perm (w4[2], w4[3], selector);
|
|
w5[2] = __byte_perm (w4[1], w4[2], selector);
|
|
w5[1] = __byte_perm (w4[0], w4[1], selector);
|
|
w5[0] = __byte_perm (w3[3], w4[0], selector);
|
|
w4[3] = __byte_perm (w3[2], w3[3], selector);
|
|
w4[2] = __byte_perm (w3[1], w3[2], selector);
|
|
w4[1] = __byte_perm (w3[0], w3[1], selector);
|
|
w4[0] = __byte_perm (w2[3], w3[0], selector);
|
|
w3[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[0] = __byte_perm ( 0, w0[0], selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = __byte_perm (w6[1], w6[2], selector);
|
|
w7[2] = __byte_perm (w6[0], w6[1], selector);
|
|
w7[1] = __byte_perm (w5[3], w6[0], selector);
|
|
w7[0] = __byte_perm (w5[2], w5[3], selector);
|
|
w6[3] = __byte_perm (w5[1], w5[2], selector);
|
|
w6[2] = __byte_perm (w5[0], w5[1], selector);
|
|
w6[1] = __byte_perm (w4[3], w5[0], selector);
|
|
w6[0] = __byte_perm (w4[2], w4[3], selector);
|
|
w5[3] = __byte_perm (w4[1], w4[2], selector);
|
|
w5[2] = __byte_perm (w4[0], w4[1], selector);
|
|
w5[1] = __byte_perm (w3[3], w4[0], selector);
|
|
w5[0] = __byte_perm (w3[2], w3[3], selector);
|
|
w4[3] = __byte_perm (w3[1], w3[2], selector);
|
|
w4[2] = __byte_perm (w3[0], w3[1], selector);
|
|
w4[1] = __byte_perm (w2[3], w3[0], selector);
|
|
w4[0] = __byte_perm (w2[2], w2[3], selector);
|
|
w3[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[1] = __byte_perm ( 0, w0[0], selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = __byte_perm (w6[0], w6[1], selector);
|
|
w7[2] = __byte_perm (w5[3], w6[0], selector);
|
|
w7[1] = __byte_perm (w5[2], w5[3], selector);
|
|
w7[0] = __byte_perm (w5[1], w5[2], selector);
|
|
w6[3] = __byte_perm (w5[0], w5[1], selector);
|
|
w6[2] = __byte_perm (w4[3], w5[0], selector);
|
|
w6[1] = __byte_perm (w4[2], w4[3], selector);
|
|
w6[0] = __byte_perm (w4[1], w4[2], selector);
|
|
w5[3] = __byte_perm (w4[0], w4[1], selector);
|
|
w5[2] = __byte_perm (w3[3], w4[0], selector);
|
|
w5[1] = __byte_perm (w3[2], w3[3], selector);
|
|
w5[0] = __byte_perm (w3[1], w3[2], selector);
|
|
w4[3] = __byte_perm (w3[0], w3[1], selector);
|
|
w4[2] = __byte_perm (w2[3], w3[0], selector);
|
|
w4[1] = __byte_perm (w2[2], w2[3], selector);
|
|
w4[0] = __byte_perm (w2[1], w2[2], selector);
|
|
w3[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[2] = __byte_perm ( 0, w0[0], selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = __byte_perm (w5[3], w6[0], selector);
|
|
w7[2] = __byte_perm (w5[2], w5[3], selector);
|
|
w7[1] = __byte_perm (w5[1], w5[2], selector);
|
|
w7[0] = __byte_perm (w5[0], w5[1], selector);
|
|
w6[3] = __byte_perm (w4[3], w5[0], selector);
|
|
w6[2] = __byte_perm (w4[2], w4[3], selector);
|
|
w6[1] = __byte_perm (w4[1], w4[2], selector);
|
|
w6[0] = __byte_perm (w4[0], w4[1], selector);
|
|
w5[3] = __byte_perm (w3[3], w4[0], selector);
|
|
w5[2] = __byte_perm (w3[2], w3[3], selector);
|
|
w5[1] = __byte_perm (w3[1], w3[2], selector);
|
|
w5[0] = __byte_perm (w3[0], w3[1], selector);
|
|
w4[3] = __byte_perm (w2[3], w3[0], selector);
|
|
w4[2] = __byte_perm (w2[2], w2[3], selector);
|
|
w4[1] = __byte_perm (w2[1], w2[2], selector);
|
|
w4[0] = __byte_perm (w2[0], w2[1], selector);
|
|
w3[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w2[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w1[3] = __byte_perm ( 0, w0[0], selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = __byte_perm (w5[2], w5[3], selector);
|
|
w7[2] = __byte_perm (w5[1], w5[2], selector);
|
|
w7[1] = __byte_perm (w5[0], w5[1], selector);
|
|
w7[0] = __byte_perm (w4[3], w5[0], selector);
|
|
w6[3] = __byte_perm (w4[2], w4[3], selector);
|
|
w6[2] = __byte_perm (w4[1], w4[2], selector);
|
|
w6[1] = __byte_perm (w4[0], w4[1], selector);
|
|
w6[0] = __byte_perm (w3[3], w4[0], selector);
|
|
w5[3] = __byte_perm (w3[2], w3[3], selector);
|
|
w5[2] = __byte_perm (w3[1], w3[2], selector);
|
|
w5[1] = __byte_perm (w3[0], w3[1], selector);
|
|
w5[0] = __byte_perm (w2[3], w3[0], selector);
|
|
w4[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w4[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w4[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w4[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w3[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[0] = __byte_perm ( 0, w0[0], selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = __byte_perm (w5[1], w5[2], selector);
|
|
w7[2] = __byte_perm (w5[0], w5[1], selector);
|
|
w7[1] = __byte_perm (w4[3], w5[0], selector);
|
|
w7[0] = __byte_perm (w4[2], w4[3], selector);
|
|
w6[3] = __byte_perm (w4[1], w4[2], selector);
|
|
w6[2] = __byte_perm (w4[0], w4[1], selector);
|
|
w6[1] = __byte_perm (w3[3], w4[0], selector);
|
|
w6[0] = __byte_perm (w3[2], w3[3], selector);
|
|
w5[3] = __byte_perm (w3[1], w3[2], selector);
|
|
w5[2] = __byte_perm (w3[0], w3[1], selector);
|
|
w5[1] = __byte_perm (w2[3], w3[0], selector);
|
|
w5[0] = __byte_perm (w2[2], w2[3], selector);
|
|
w4[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w4[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w4[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w4[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w3[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[1] = __byte_perm ( 0, w0[0], selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = __byte_perm (w5[0], w5[1], selector);
|
|
w7[2] = __byte_perm (w4[3], w5[0], selector);
|
|
w7[1] = __byte_perm (w4[2], w4[3], selector);
|
|
w7[0] = __byte_perm (w4[1], w4[2], selector);
|
|
w6[3] = __byte_perm (w4[0], w4[1], selector);
|
|
w6[2] = __byte_perm (w3[3], w4[0], selector);
|
|
w6[1] = __byte_perm (w3[2], w3[3], selector);
|
|
w6[0] = __byte_perm (w3[1], w3[2], selector);
|
|
w5[3] = __byte_perm (w3[0], w3[1], selector);
|
|
w5[2] = __byte_perm (w2[3], w3[0], selector);
|
|
w5[1] = __byte_perm (w2[2], w2[3], selector);
|
|
w5[0] = __byte_perm (w2[1], w2[2], selector);
|
|
w4[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w4[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w4[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w4[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w3[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[2] = __byte_perm ( 0, w0[0], selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = __byte_perm (w4[3], w5[0], selector);
|
|
w7[2] = __byte_perm (w4[2], w4[3], selector);
|
|
w7[1] = __byte_perm (w4[1], w4[2], selector);
|
|
w7[0] = __byte_perm (w4[0], w4[1], selector);
|
|
w6[3] = __byte_perm (w3[3], w4[0], selector);
|
|
w6[2] = __byte_perm (w3[2], w3[3], selector);
|
|
w6[1] = __byte_perm (w3[1], w3[2], selector);
|
|
w6[0] = __byte_perm (w3[0], w3[1], selector);
|
|
w5[3] = __byte_perm (w2[3], w3[0], selector);
|
|
w5[2] = __byte_perm (w2[2], w2[3], selector);
|
|
w5[1] = __byte_perm (w2[1], w2[2], selector);
|
|
w5[0] = __byte_perm (w2[0], w2[1], selector);
|
|
w4[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w4[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w4[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w4[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w3[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w2[3] = __byte_perm ( 0, w0[0], selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = __byte_perm (w4[2], w4[3], selector);
|
|
w7[2] = __byte_perm (w4[1], w4[2], selector);
|
|
w7[1] = __byte_perm (w4[0], w4[1], selector);
|
|
w7[0] = __byte_perm (w3[3], w4[0], selector);
|
|
w6[3] = __byte_perm (w3[2], w3[3], selector);
|
|
w6[2] = __byte_perm (w3[1], w3[2], selector);
|
|
w6[1] = __byte_perm (w3[0], w3[1], selector);
|
|
w6[0] = __byte_perm (w2[3], w3[0], selector);
|
|
w5[3] = __byte_perm (w2[2], w2[3], selector);
|
|
w5[2] = __byte_perm (w2[1], w2[2], selector);
|
|
w5[1] = __byte_perm (w2[0], w2[1], selector);
|
|
w5[0] = __byte_perm (w1[3], w2[0], selector);
|
|
w4[3] = __byte_perm (w1[2], w1[3], selector);
|
|
w4[2] = __byte_perm (w1[1], w1[2], selector);
|
|
w4[1] = __byte_perm (w1[0], w1[1], selector);
|
|
w4[0] = __byte_perm (w0[3], w1[0], selector);
|
|
w3[3] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[0] = __byte_perm ( 0, w0[0], selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = __byte_perm (w4[1], w4[2], selector);
|
|
w7[2] = __byte_perm (w4[0], w4[1], selector);
|
|
w7[1] = __byte_perm (w3[3], w4[0], selector);
|
|
w7[0] = __byte_perm (w3[2], w3[3], selector);
|
|
w6[3] = __byte_perm (w3[1], w3[2], selector);
|
|
w6[2] = __byte_perm (w3[0], w3[1], selector);
|
|
w6[1] = __byte_perm (w2[3], w3[0], selector);
|
|
w6[0] = __byte_perm (w2[2], w2[3], selector);
|
|
w5[3] = __byte_perm (w2[1], w2[2], selector);
|
|
w5[2] = __byte_perm (w2[0], w2[1], selector);
|
|
w5[1] = __byte_perm (w1[3], w2[0], selector);
|
|
w5[0] = __byte_perm (w1[2], w1[3], selector);
|
|
w4[3] = __byte_perm (w1[1], w1[2], selector);
|
|
w4[2] = __byte_perm (w1[0], w1[1], selector);
|
|
w4[1] = __byte_perm (w0[3], w1[0], selector);
|
|
w4[0] = __byte_perm (w0[2], w0[3], selector);
|
|
w3[3] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[1] = __byte_perm ( 0, w0[0], selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = __byte_perm (w4[0], w4[1], selector);
|
|
w7[2] = __byte_perm (w3[3], w4[0], selector);
|
|
w7[1] = __byte_perm (w3[2], w3[3], selector);
|
|
w7[0] = __byte_perm (w3[1], w3[2], selector);
|
|
w6[3] = __byte_perm (w3[0], w3[1], selector);
|
|
w6[2] = __byte_perm (w2[3], w3[0], selector);
|
|
w6[1] = __byte_perm (w2[2], w2[3], selector);
|
|
w6[0] = __byte_perm (w2[1], w2[2], selector);
|
|
w5[3] = __byte_perm (w2[0], w2[1], selector);
|
|
w5[2] = __byte_perm (w1[3], w2[0], selector);
|
|
w5[1] = __byte_perm (w1[2], w1[3], selector);
|
|
w5[0] = __byte_perm (w1[1], w1[2], selector);
|
|
w4[3] = __byte_perm (w1[0], w1[1], selector);
|
|
w4[2] = __byte_perm (w0[3], w1[0], selector);
|
|
w4[1] = __byte_perm (w0[2], w0[3], selector);
|
|
w4[0] = __byte_perm (w0[1], w0[2], selector);
|
|
w3[3] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[2] = __byte_perm ( 0, w0[0], selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = __byte_perm (w3[3], w4[0], selector);
|
|
w7[2] = __byte_perm (w3[2], w3[3], selector);
|
|
w7[1] = __byte_perm (w3[1], w3[2], selector);
|
|
w7[0] = __byte_perm (w3[0], w3[1], selector);
|
|
w6[3] = __byte_perm (w2[3], w3[0], selector);
|
|
w6[2] = __byte_perm (w2[2], w2[3], selector);
|
|
w6[1] = __byte_perm (w2[1], w2[2], selector);
|
|
w6[0] = __byte_perm (w2[0], w2[1], selector);
|
|
w5[3] = __byte_perm (w1[3], w2[0], selector);
|
|
w5[2] = __byte_perm (w1[2], w1[3], selector);
|
|
w5[1] = __byte_perm (w1[1], w1[2], selector);
|
|
w5[0] = __byte_perm (w1[0], w1[1], selector);
|
|
w4[3] = __byte_perm (w0[3], w1[0], selector);
|
|
w4[2] = __byte_perm (w0[2], w0[3], selector);
|
|
w4[1] = __byte_perm (w0[1], w0[2], selector);
|
|
w4[0] = __byte_perm (w0[0], w0[1], selector);
|
|
w3[3] = __byte_perm ( 0, w0[0], selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = __byte_perm (w7[3], w7[2], selector);
|
|
w7[2] = __byte_perm (w7[2], w7[1], selector);
|
|
w7[1] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[0] = __byte_perm (w7[0], w6[3], selector);
|
|
w6[3] = __byte_perm (w6[3], w6[2], selector);
|
|
w6[2] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[1] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[0] = __byte_perm (w6[0], w5[3], selector);
|
|
w5[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w5[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w4[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w4[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w3[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = __byte_perm (w7[2], w7[1], selector);
|
|
w7[2] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[1] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[0] = __byte_perm (w6[3], w6[2], selector);
|
|
w6[3] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[2] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[1] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[0] = __byte_perm (w5[3], w5[2], selector);
|
|
w5[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w4[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[2] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[1] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[0] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[3] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[2] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[1] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[0] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[2] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[1] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[0] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[3] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[2] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[1] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[0] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[2] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[1] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[0] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[2] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[1] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[0] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[2] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[1] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[0] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[2] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[1] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[0] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[0] = __byte_perm (w0[0], 0, selector);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[1] = __byte_perm (w0[0], 0, selector);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[2] = __byte_perm (w0[0], 0, selector);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[3] = __byte_perm (w0[0], 0, selector);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[0] = __byte_perm (w0[0], 0, selector);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[1] = __byte_perm (w0[0], 0, selector);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[2] = __byte_perm (w0[0], 0, selector);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[3] = __byte_perm (w0[0], 0, selector);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[0] = __byte_perm (w0[0], 0, selector);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[1] = __byte_perm (w0[0], 0, selector);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[2] = __byte_perm (w0[0], 0, selector);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[3] = __byte_perm (w0[0], 0, selector);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[0] = __byte_perm (w0[0], 0, selector);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[1] = __byte_perm (w0[0], 0, selector);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[2] = __byte_perm (w0[0], 0, selector);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = __byte_perm (w0[0], 0, selector);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], u32x c0[4], u32x c1[4], u32x c2[4], u32x c3[4], u32x c4[4], u32x c5[4], u32x c6[4], u32x c7[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign (w7[3], 0, offset);
|
|
w7[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign (w7[3], 0, offset);
|
|
c0[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
w7[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign (w7[3], 0, offset);
|
|
c0[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c0[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
w7[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign (w7[3], 0, offset);
|
|
c0[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c0[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c0[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
w7[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign (w7[3], 0, offset);
|
|
c0[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c0[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c0[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c0[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
w7[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign (w7[3], 0, offset);
|
|
c1[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c0[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c0[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c0[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c0[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
w7[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign (w7[3], 0, offset);
|
|
c1[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c1[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c0[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c0[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c0[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c0[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
w7[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign (w7[3], 0, offset);
|
|
c1[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c1[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c1[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c0[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c0[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c0[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c0[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
w7[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign (w7[3], 0, offset);
|
|
c1[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c1[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c1[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c1[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c0[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c0[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c0[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c0[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
w7[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign (w7[3], 0, offset);
|
|
c2[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c1[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c1[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c1[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c1[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c0[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c0[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c0[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c0[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
w7[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign (w7[3], 0, offset);
|
|
c2[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c2[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c1[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c1[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c1[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c1[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c0[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c0[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c0[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c0[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
w7[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign (w7[3], 0, offset);
|
|
c2[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c2[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c2[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c1[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c1[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c1[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c1[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c0[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c0[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c0[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c0[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
w7[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign (w7[3], 0, offset);
|
|
c2[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c2[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c2[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c2[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c1[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c1[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c1[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c1[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
c0[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
c0[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
c0[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
c0[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
w7[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign (w7[3], 0, offset);
|
|
c3[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c2[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c2[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c2[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c2[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c1[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c1[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c1[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c1[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
c0[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
c0[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
c0[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
c0[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
w7[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign (w7[3], 0, offset);
|
|
c3[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c3[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c2[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c2[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c2[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c2[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c1[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c1[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c1[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c1[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
c0[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
c0[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
c0[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
c0[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
w7[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign (w7[3], 0, offset);
|
|
c3[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c3[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c3[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c2[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c2[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c2[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c2[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c1[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c1[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c1[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c1[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
c0[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
c0[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
c0[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
c0[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
w7[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
c4[0] = amd_bytealign (w7[3], 0, offset);
|
|
c3[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c3[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c3[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c3[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c2[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c2[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c2[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c2[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
c1[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
c1[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
c1[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
c1[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
c0[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
c0[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
c0[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
c0[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
w7[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
c4[1] = amd_bytealign (w7[3], 0, offset);
|
|
c4[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c3[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c3[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c3[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c3[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c2[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c2[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c2[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c2[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
c1[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
c1[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
c1[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
c1[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
c0[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
c0[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
c0[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
c0[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
w7[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
c4[2] = amd_bytealign (w7[3], 0, offset);
|
|
c4[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c4[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c3[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c3[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c3[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c3[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c2[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c2[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c2[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c2[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
c1[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
c1[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
c1[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
c1[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
c0[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
c0[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
c0[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
w7[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
c4[3] = amd_bytealign (w7[3], 0, offset);
|
|
c4[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c4[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c4[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c3[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c3[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c3[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c3[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c2[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c2[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c2[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c2[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
c1[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
c1[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
c1[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
c1[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
c0[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
c0[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
w7[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
c5[0] = amd_bytealign (w7[3], 0, offset);
|
|
c4[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c4[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c4[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c4[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c3[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c3[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c3[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c3[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
c2[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
c2[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
c2[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
c2[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
c1[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
c1[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
c1[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
c1[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
c0[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
w7[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
c5[1] = amd_bytealign (w7[3], 0, offset);
|
|
c5[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c4[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c4[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c4[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c4[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c3[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c3[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c3[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c3[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
c2[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
c2[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
c2[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
c2[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
c1[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
c1[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
c1[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
c1[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
w7[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
c5[2] = amd_bytealign (w7[3], 0, offset);
|
|
c5[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c5[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c4[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c4[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c4[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c4[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c3[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c3[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c3[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c3[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
c2[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
c2[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
c2[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
c2[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
c1[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
c1[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
c1[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
w7[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
c5[3] = amd_bytealign (w7[3], 0, offset);
|
|
c5[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c5[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c5[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c4[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c4[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c4[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c4[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c3[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c3[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c3[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c3[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
c2[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
c2[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
c2[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
c2[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
c1[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
c1[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
w7[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
c6[0] = amd_bytealign (w7[3], 0, offset);
|
|
c5[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c5[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c5[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c5[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c4[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c4[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c4[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c4[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
c3[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
c3[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
c3[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
c3[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
c2[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
c2[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
c2[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
c2[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
c1[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
w7[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
c6[1] = amd_bytealign (w7[3], 0, offset);
|
|
c6[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c5[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c5[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c5[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c5[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c4[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c4[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c4[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c4[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
c3[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
c3[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
c3[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
c3[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
c2[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
c2[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
c2[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
c2[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
w7[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
c6[2] = amd_bytealign (w7[3], 0, offset);
|
|
c6[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c6[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c5[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c5[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c5[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c5[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c4[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c4[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c4[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c4[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
c3[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
c3[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
c3[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
c3[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
c2[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
c2[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
c2[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
w7[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
c6[3] = amd_bytealign (w7[3], 0, offset);
|
|
c6[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c6[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c6[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c5[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c5[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c5[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c5[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c4[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c4[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c4[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c4[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
c3[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
c3[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
c3[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
c3[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
c2[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
c2[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
w7[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
c7[0] = amd_bytealign (w7[3], 0, offset);
|
|
c6[3] = amd_bytealign (w7[2], w7[3], offset);
|
|
c6[2] = amd_bytealign (w7[1], w7[2], offset);
|
|
c6[1] = amd_bytealign (w7[0], w7[1], offset);
|
|
c6[0] = amd_bytealign (w6[3], w7[0], offset);
|
|
c5[3] = amd_bytealign (w6[2], w6[3], offset);
|
|
c5[2] = amd_bytealign (w6[1], w6[2], offset);
|
|
c5[1] = amd_bytealign (w6[0], w6[1], offset);
|
|
c5[0] = amd_bytealign (w5[3], w6[0], offset);
|
|
c4[3] = amd_bytealign (w5[2], w5[3], offset);
|
|
c4[2] = amd_bytealign (w5[1], w5[2], offset);
|
|
c4[1] = amd_bytealign (w5[0], w5[1], offset);
|
|
c4[0] = amd_bytealign (w4[3], w5[0], offset);
|
|
c3[3] = amd_bytealign (w4[2], w4[3], offset);
|
|
c3[2] = amd_bytealign (w4[1], w4[2], offset);
|
|
c3[1] = amd_bytealign (w4[0], w4[1], offset);
|
|
c3[0] = amd_bytealign (w3[3], w4[0], offset);
|
|
c2[3] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign (w0[3], w1[0], offset);
|
|
w7[3] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
c7[1] = amd_bytealign (w7[3], 0, offset);
|
|
c7[0] = amd_bytealign (w7[2], w7[3], offset);
|
|
c6[3] = amd_bytealign (w7[1], w7[2], offset);
|
|
c6[2] = amd_bytealign (w7[0], w7[1], offset);
|
|
c6[1] = amd_bytealign (w6[3], w7[0], offset);
|
|
c6[0] = amd_bytealign (w6[2], w6[3], offset);
|
|
c5[3] = amd_bytealign (w6[1], w6[2], offset);
|
|
c5[2] = amd_bytealign (w6[0], w6[1], offset);
|
|
c5[1] = amd_bytealign (w5[3], w6[0], offset);
|
|
c5[0] = amd_bytealign (w5[2], w5[3], offset);
|
|
c4[3] = amd_bytealign (w5[1], w5[2], offset);
|
|
c4[2] = amd_bytealign (w5[0], w5[1], offset);
|
|
c4[1] = amd_bytealign (w4[3], w5[0], offset);
|
|
c4[0] = amd_bytealign (w4[2], w4[3], offset);
|
|
c3[3] = amd_bytealign (w4[1], w4[2], offset);
|
|
c3[2] = amd_bytealign (w4[0], w4[1], offset);
|
|
c3[1] = amd_bytealign (w3[3], w4[0], offset);
|
|
c3[0] = amd_bytealign (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign (w0[2], w0[3], offset);
|
|
w7[3] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
c7[2] = amd_bytealign (w7[3], 0, offset);
|
|
c7[1] = amd_bytealign (w7[2], w7[3], offset);
|
|
c7[0] = amd_bytealign (w7[1], w7[2], offset);
|
|
c6[3] = amd_bytealign (w7[0], w7[1], offset);
|
|
c6[2] = amd_bytealign (w6[3], w7[0], offset);
|
|
c6[1] = amd_bytealign (w6[2], w6[3], offset);
|
|
c6[0] = amd_bytealign (w6[1], w6[2], offset);
|
|
c5[3] = amd_bytealign (w6[0], w6[1], offset);
|
|
c5[2] = amd_bytealign (w5[3], w6[0], offset);
|
|
c5[1] = amd_bytealign (w5[2], w5[3], offset);
|
|
c5[0] = amd_bytealign (w5[1], w5[2], offset);
|
|
c4[3] = amd_bytealign (w5[0], w5[1], offset);
|
|
c4[2] = amd_bytealign (w4[3], w5[0], offset);
|
|
c4[1] = amd_bytealign (w4[2], w4[3], offset);
|
|
c4[0] = amd_bytealign (w4[1], w4[2], offset);
|
|
c3[3] = amd_bytealign (w4[0], w4[1], offset);
|
|
c3[2] = amd_bytealign (w3[3], w4[0], offset);
|
|
c3[1] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign (w0[1], w0[2], offset);
|
|
w7[3] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
c7[3] = amd_bytealign (w7[3], 0, offset);
|
|
c7[2] = amd_bytealign (w7[2], w7[3], offset);
|
|
c7[1] = amd_bytealign (w7[1], w7[2], offset);
|
|
c7[0] = amd_bytealign (w7[0], w7[1], offset);
|
|
c6[3] = amd_bytealign (w6[3], w7[0], offset);
|
|
c6[2] = amd_bytealign (w6[2], w6[3], offset);
|
|
c6[1] = amd_bytealign (w6[1], w6[2], offset);
|
|
c6[0] = amd_bytealign (w6[0], w6[1], offset);
|
|
c5[3] = amd_bytealign (w5[3], w6[0], offset);
|
|
c5[2] = amd_bytealign (w5[2], w5[3], offset);
|
|
c5[1] = amd_bytealign (w5[1], w5[2], offset);
|
|
c5[0] = amd_bytealign (w5[0], w5[1], offset);
|
|
c4[3] = amd_bytealign (w4[3], w5[0], offset);
|
|
c4[2] = amd_bytealign (w4[2], w4[3], offset);
|
|
c4[1] = amd_bytealign (w4[1], w4[2], offset);
|
|
c4[0] = amd_bytealign (w4[0], w4[1], offset);
|
|
c3[3] = amd_bytealign (w3[3], w4[0], offset);
|
|
c3[2] = amd_bytealign (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign (w0[0], w0[1], offset);
|
|
w7[3] = amd_bytealign ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = __byte_perm ( 0, w7[3], selector);
|
|
w7[3] = __byte_perm (w7[3], w7[2], selector);
|
|
w7[2] = __byte_perm (w7[2], w7[1], selector);
|
|
w7[1] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[0] = __byte_perm (w7[0], w6[3], selector);
|
|
w6[3] = __byte_perm (w6[3], w6[2], selector);
|
|
w6[2] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[1] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[0] = __byte_perm (w6[0], w5[3], selector);
|
|
w5[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w5[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w4[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w4[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w3[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = __byte_perm ( 0, w7[3], selector);
|
|
c0[0] = __byte_perm (w7[3], w7[2], selector);
|
|
w7[3] = __byte_perm (w7[2], w7[1], selector);
|
|
w7[2] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[1] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[0] = __byte_perm (w6[3], w6[2], selector);
|
|
w6[3] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[2] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[1] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[0] = __byte_perm (w5[3], w5[2], selector);
|
|
w5[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w4[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = __byte_perm ( 0, w7[3], selector);
|
|
c0[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c0[0] = __byte_perm (w7[2], w7[1], selector);
|
|
w7[3] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[2] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[1] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[0] = __byte_perm (w6[2], w6[1], selector);
|
|
w6[3] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[2] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[1] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[0] = __byte_perm (w5[2], w5[1], selector);
|
|
w5[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w4[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = __byte_perm ( 0, w7[3], selector);
|
|
c0[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c0[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c0[0] = __byte_perm (w7[1], w7[0], selector);
|
|
w7[3] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[2] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[1] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[0] = __byte_perm (w6[1], w6[0], selector);
|
|
w6[3] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[2] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[1] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[0] = __byte_perm (w5[1], w5[0], selector);
|
|
w5[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w4[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = __byte_perm ( 0, w7[3], selector);
|
|
c0[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c0[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c0[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c0[0] = __byte_perm (w7[0], w6[3], selector);
|
|
w7[3] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[2] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[1] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[0] = __byte_perm (w6[0], w5[3], selector);
|
|
w6[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w5[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w4[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = __byte_perm ( 0, w7[3], selector);
|
|
c1[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c0[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c0[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c0[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c0[0] = __byte_perm (w6[3], w6[2], selector);
|
|
w7[3] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[2] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[1] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[0] = __byte_perm (w5[3], w5[2], selector);
|
|
w6[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w5[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w4[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = __byte_perm ( 0, w7[3], selector);
|
|
c1[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c1[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c0[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c0[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c0[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c0[0] = __byte_perm (w6[2], w6[1], selector);
|
|
w7[3] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[2] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[1] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[0] = __byte_perm (w5[2], w5[1], selector);
|
|
w6[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w5[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w4[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = __byte_perm ( 0, w7[3], selector);
|
|
c1[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c1[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c1[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c0[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c0[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c0[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c0[0] = __byte_perm (w6[1], w6[0], selector);
|
|
w7[3] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[2] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[1] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[0] = __byte_perm (w5[1], w5[0], selector);
|
|
w6[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w5[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w4[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = __byte_perm ( 0, w7[3], selector);
|
|
c1[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c1[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c1[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c1[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c0[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c0[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c0[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c0[0] = __byte_perm (w6[0], w5[3], selector);
|
|
w7[3] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[2] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[1] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w6[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w5[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w4[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = __byte_perm ( 0, w7[3], selector);
|
|
c2[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c1[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c1[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c1[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c1[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c0[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c0[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c0[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c0[0] = __byte_perm (w5[3], w5[2], selector);
|
|
w7[3] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[2] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[1] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w6[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w5[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w4[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = __byte_perm ( 0, w7[3], selector);
|
|
c2[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c2[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c1[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c1[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c1[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c1[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c0[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c0[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c0[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c0[0] = __byte_perm (w5[2], w5[1], selector);
|
|
w7[3] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[2] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[1] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w6[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w5[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w4[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = __byte_perm ( 0, w7[3], selector);
|
|
c2[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c2[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c2[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c1[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c1[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c1[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c1[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c0[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c0[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c0[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c0[0] = __byte_perm (w5[1], w5[0], selector);
|
|
w7[3] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[2] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[1] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w6[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w5[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w4[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = __byte_perm ( 0, w7[3], selector);
|
|
c2[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c2[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c2[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c2[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c1[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c1[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c1[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c1[0] = __byte_perm (w6[0], w5[3], selector);
|
|
c0[3] = __byte_perm (w5[3], w5[2], selector);
|
|
c0[2] = __byte_perm (w5[2], w5[1], selector);
|
|
c0[1] = __byte_perm (w5[1], w5[0], selector);
|
|
c0[0] = __byte_perm (w5[0], w4[3], selector);
|
|
w7[3] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[2] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[1] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w6[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w5[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w4[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = __byte_perm ( 0, w7[3], selector);
|
|
c3[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c2[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c2[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c2[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c2[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c1[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c1[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c1[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c1[0] = __byte_perm (w5[3], w5[2], selector);
|
|
c0[3] = __byte_perm (w5[2], w5[1], selector);
|
|
c0[2] = __byte_perm (w5[1], w5[0], selector);
|
|
c0[1] = __byte_perm (w5[0], w4[3], selector);
|
|
c0[0] = __byte_perm (w4[3], w4[2], selector);
|
|
w7[3] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[2] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[1] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w6[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w5[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w4[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = __byte_perm ( 0, w7[3], selector);
|
|
c3[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c3[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c2[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c2[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c2[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c2[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c1[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c1[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c1[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c1[0] = __byte_perm (w5[2], w5[1], selector);
|
|
c0[3] = __byte_perm (w5[1], w5[0], selector);
|
|
c0[2] = __byte_perm (w5[0], w4[3], selector);
|
|
c0[1] = __byte_perm (w4[3], w4[2], selector);
|
|
c0[0] = __byte_perm (w4[2], w4[1], selector);
|
|
w7[3] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[2] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[1] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w6[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w5[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w4[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = __byte_perm ( 0, w7[3], selector);
|
|
c3[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c3[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c3[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c2[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c2[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c2[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c2[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c1[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c1[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c1[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c1[0] = __byte_perm (w5[1], w5[0], selector);
|
|
c0[3] = __byte_perm (w5[0], w4[3], selector);
|
|
c0[2] = __byte_perm (w4[3], w4[2], selector);
|
|
c0[1] = __byte_perm (w4[2], w4[1], selector);
|
|
c0[0] = __byte_perm (w4[1], w4[0], selector);
|
|
w7[3] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[2] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[1] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w6[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w5[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w4[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
c4[0] = __byte_perm ( 0, w7[3], selector);
|
|
c3[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c3[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c3[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c3[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c2[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c2[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c2[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c2[0] = __byte_perm (w6[0], w5[3], selector);
|
|
c1[3] = __byte_perm (w5[3], w5[2], selector);
|
|
c1[2] = __byte_perm (w5[2], w5[1], selector);
|
|
c1[1] = __byte_perm (w5[1], w5[0], selector);
|
|
c1[0] = __byte_perm (w5[0], w4[3], selector);
|
|
c0[3] = __byte_perm (w4[3], w4[2], selector);
|
|
c0[2] = __byte_perm (w4[2], w4[1], selector);
|
|
c0[1] = __byte_perm (w4[1], w4[0], selector);
|
|
c0[0] = __byte_perm (w4[0], w3[3], selector);
|
|
w7[3] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[2] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[1] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w6[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w5[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w4[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[0] = __byte_perm (w0[0], 0, selector);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
c4[1] = __byte_perm ( 0, w7[3], selector);
|
|
c4[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c3[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c3[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c3[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c3[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c2[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c2[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c2[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c2[0] = __byte_perm (w5[3], w5[2], selector);
|
|
c1[3] = __byte_perm (w5[2], w5[1], selector);
|
|
c1[2] = __byte_perm (w5[1], w5[0], selector);
|
|
c1[1] = __byte_perm (w5[0], w4[3], selector);
|
|
c1[0] = __byte_perm (w4[3], w4[2], selector);
|
|
c0[3] = __byte_perm (w4[2], w4[1], selector);
|
|
c0[2] = __byte_perm (w4[1], w4[0], selector);
|
|
c0[1] = __byte_perm (w4[0], w3[3], selector);
|
|
c0[0] = __byte_perm (w3[3], w3[2], selector);
|
|
w7[3] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[2] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[1] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w6[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w5[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w4[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[1] = __byte_perm (w0[0], 0, selector);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
c4[2] = __byte_perm ( 0, w7[3], selector);
|
|
c4[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c4[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c3[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c3[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c3[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c3[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c2[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c2[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c2[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c2[0] = __byte_perm (w5[2], w5[1], selector);
|
|
c1[3] = __byte_perm (w5[1], w5[0], selector);
|
|
c1[2] = __byte_perm (w5[0], w4[3], selector);
|
|
c1[1] = __byte_perm (w4[3], w4[2], selector);
|
|
c1[0] = __byte_perm (w4[2], w4[1], selector);
|
|
c0[3] = __byte_perm (w4[1], w4[0], selector);
|
|
c0[2] = __byte_perm (w4[0], w3[3], selector);
|
|
c0[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[0] = __byte_perm (w3[2], w3[1], selector);
|
|
w7[3] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[2] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[1] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w6[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w5[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w4[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[2] = __byte_perm (w0[0], 0, selector);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
c4[3] = __byte_perm ( 0, w7[3], selector);
|
|
c4[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c4[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c4[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c3[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c3[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c3[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c3[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c2[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c2[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c2[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c2[0] = __byte_perm (w5[1], w5[0], selector);
|
|
c1[3] = __byte_perm (w5[0], w4[3], selector);
|
|
c1[2] = __byte_perm (w4[3], w4[2], selector);
|
|
c1[1] = __byte_perm (w4[2], w4[1], selector);
|
|
c1[0] = __byte_perm (w4[1], w4[0], selector);
|
|
c0[3] = __byte_perm (w4[0], w3[3], selector);
|
|
c0[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[0] = __byte_perm (w3[1], w3[0], selector);
|
|
w7[3] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[2] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[1] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w6[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w5[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w4[3] = __byte_perm (w0[0], 0, selector);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
c5[0] = __byte_perm ( 0, w7[3], selector);
|
|
c4[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c4[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c4[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c4[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c3[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c3[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c3[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c3[0] = __byte_perm (w6[0], w5[3], selector);
|
|
c2[3] = __byte_perm (w5[3], w5[2], selector);
|
|
c2[2] = __byte_perm (w5[2], w5[1], selector);
|
|
c2[1] = __byte_perm (w5[1], w5[0], selector);
|
|
c2[0] = __byte_perm (w5[0], w4[3], selector);
|
|
c1[3] = __byte_perm (w4[3], w4[2], selector);
|
|
c1[2] = __byte_perm (w4[2], w4[1], selector);
|
|
c1[1] = __byte_perm (w4[1], w4[0], selector);
|
|
c1[0] = __byte_perm (w4[0], w3[3], selector);
|
|
c0[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[0] = __byte_perm (w3[0], w2[3], selector);
|
|
w7[3] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[2] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[1] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w6[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w5[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[0] = __byte_perm (w0[0], 0, selector);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
c5[1] = __byte_perm ( 0, w7[3], selector);
|
|
c5[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c4[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c4[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c4[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c4[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c3[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c3[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c3[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c3[0] = __byte_perm (w5[3], w5[2], selector);
|
|
c2[3] = __byte_perm (w5[2], w5[1], selector);
|
|
c2[2] = __byte_perm (w5[1], w5[0], selector);
|
|
c2[1] = __byte_perm (w5[0], w4[3], selector);
|
|
c2[0] = __byte_perm (w4[3], w4[2], selector);
|
|
c1[3] = __byte_perm (w4[2], w4[1], selector);
|
|
c1[2] = __byte_perm (w4[1], w4[0], selector);
|
|
c1[1] = __byte_perm (w4[0], w3[3], selector);
|
|
c1[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c0[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[0] = __byte_perm (w2[3], w2[2], selector);
|
|
w7[3] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[2] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[1] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w6[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w5[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[1] = __byte_perm (w0[0], 0, selector);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
c5[2] = __byte_perm ( 0, w7[3], selector);
|
|
c5[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c5[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c4[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c4[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c4[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c4[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c3[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c3[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c3[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c3[0] = __byte_perm (w5[2], w5[1], selector);
|
|
c2[3] = __byte_perm (w5[1], w5[0], selector);
|
|
c2[2] = __byte_perm (w5[0], w4[3], selector);
|
|
c2[1] = __byte_perm (w4[3], w4[2], selector);
|
|
c2[0] = __byte_perm (w4[2], w4[1], selector);
|
|
c1[3] = __byte_perm (w4[1], w4[0], selector);
|
|
c1[2] = __byte_perm (w4[0], w3[3], selector);
|
|
c1[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c0[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[0] = __byte_perm (w2[2], w2[1], selector);
|
|
w7[3] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[2] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[1] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w6[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w5[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[2] = __byte_perm (w0[0], 0, selector);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
c5[3] = __byte_perm ( 0, w7[3], selector);
|
|
c5[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c5[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c5[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c4[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c4[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c4[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c4[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c3[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c3[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c3[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c3[0] = __byte_perm (w5[1], w5[0], selector);
|
|
c2[3] = __byte_perm (w5[0], w4[3], selector);
|
|
c2[2] = __byte_perm (w4[3], w4[2], selector);
|
|
c2[1] = __byte_perm (w4[2], w4[1], selector);
|
|
c2[0] = __byte_perm (w4[1], w4[0], selector);
|
|
c1[3] = __byte_perm (w4[0], w3[3], selector);
|
|
c1[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c0[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[0] = __byte_perm (w2[1], w2[0], selector);
|
|
w7[3] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[2] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[1] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w6[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w5[3] = __byte_perm (w0[0], 0, selector);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
c6[0] = __byte_perm ( 0, w7[3], selector);
|
|
c5[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c5[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c5[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c5[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c4[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c4[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c4[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c4[0] = __byte_perm (w6[0], w5[3], selector);
|
|
c3[3] = __byte_perm (w5[3], w5[2], selector);
|
|
c3[2] = __byte_perm (w5[2], w5[1], selector);
|
|
c3[1] = __byte_perm (w5[1], w5[0], selector);
|
|
c3[0] = __byte_perm (w5[0], w4[3], selector);
|
|
c2[3] = __byte_perm (w4[3], w4[2], selector);
|
|
c2[2] = __byte_perm (w4[2], w4[1], selector);
|
|
c2[1] = __byte_perm (w4[1], w4[0], selector);
|
|
c2[0] = __byte_perm (w4[0], w3[3], selector);
|
|
c1[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[0] = __byte_perm (w3[0], w2[3], selector);
|
|
c0[3] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[2] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[1] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[0] = __byte_perm (w2[0], w1[3], selector);
|
|
w7[3] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[2] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[1] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w6[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[0] = __byte_perm (w0[0], 0, selector);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
c6[1] = __byte_perm ( 0, w7[3], selector);
|
|
c6[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c5[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c5[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c5[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c5[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c4[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c4[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c4[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c4[0] = __byte_perm (w5[3], w5[2], selector);
|
|
c3[3] = __byte_perm (w5[2], w5[1], selector);
|
|
c3[2] = __byte_perm (w5[1], w5[0], selector);
|
|
c3[1] = __byte_perm (w5[0], w4[3], selector);
|
|
c3[0] = __byte_perm (w4[3], w4[2], selector);
|
|
c2[3] = __byte_perm (w4[2], w4[1], selector);
|
|
c2[2] = __byte_perm (w4[1], w4[0], selector);
|
|
c2[1] = __byte_perm (w4[0], w3[3], selector);
|
|
c2[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c1[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[0] = __byte_perm (w2[3], w2[2], selector);
|
|
c0[3] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[2] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[1] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[0] = __byte_perm (w1[3], w1[2], selector);
|
|
w7[3] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[2] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[1] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w6[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[1] = __byte_perm (w0[0], 0, selector);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
c6[2] = __byte_perm ( 0, w7[3], selector);
|
|
c6[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c6[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c5[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c5[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c5[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c5[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c4[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c4[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c4[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c4[0] = __byte_perm (w5[2], w5[1], selector);
|
|
c3[3] = __byte_perm (w5[1], w5[0], selector);
|
|
c3[2] = __byte_perm (w5[0], w4[3], selector);
|
|
c3[1] = __byte_perm (w4[3], w4[2], selector);
|
|
c3[0] = __byte_perm (w4[2], w4[1], selector);
|
|
c2[3] = __byte_perm (w4[1], w4[0], selector);
|
|
c2[2] = __byte_perm (w4[0], w3[3], selector);
|
|
c2[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c1[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[0] = __byte_perm (w2[2], w2[1], selector);
|
|
c0[3] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[2] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[1] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[0] = __byte_perm (w1[2], w1[1], selector);
|
|
w7[3] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[2] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[1] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w6[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[2] = __byte_perm (w0[0], 0, selector);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
c6[3] = __byte_perm ( 0, w7[3], selector);
|
|
c6[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c6[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c6[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c5[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c5[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c5[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c5[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c4[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c4[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c4[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c4[0] = __byte_perm (w5[1], w5[0], selector);
|
|
c3[3] = __byte_perm (w5[0], w4[3], selector);
|
|
c3[2] = __byte_perm (w4[3], w4[2], selector);
|
|
c3[1] = __byte_perm (w4[2], w4[1], selector);
|
|
c3[0] = __byte_perm (w4[1], w4[0], selector);
|
|
c2[3] = __byte_perm (w4[0], w3[3], selector);
|
|
c2[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c1[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[0] = __byte_perm (w2[1], w2[0], selector);
|
|
c0[3] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[2] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[1] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[0] = __byte_perm (w1[1], w1[0], selector);
|
|
w7[3] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[2] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[1] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w6[3] = __byte_perm (w0[0], 0, selector);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
c7[0] = __byte_perm ( 0, w7[3], selector);
|
|
c6[3] = __byte_perm (w7[3], w7[2], selector);
|
|
c6[2] = __byte_perm (w7[2], w7[1], selector);
|
|
c6[1] = __byte_perm (w7[1], w7[0], selector);
|
|
c6[0] = __byte_perm (w7[0], w6[3], selector);
|
|
c5[3] = __byte_perm (w6[3], w6[2], selector);
|
|
c5[2] = __byte_perm (w6[2], w6[1], selector);
|
|
c5[1] = __byte_perm (w6[1], w6[0], selector);
|
|
c5[0] = __byte_perm (w6[0], w5[3], selector);
|
|
c4[3] = __byte_perm (w5[3], w5[2], selector);
|
|
c4[2] = __byte_perm (w5[2], w5[1], selector);
|
|
c4[1] = __byte_perm (w5[1], w5[0], selector);
|
|
c4[0] = __byte_perm (w5[0], w4[3], selector);
|
|
c3[3] = __byte_perm (w4[3], w4[2], selector);
|
|
c3[2] = __byte_perm (w4[2], w4[1], selector);
|
|
c3[1] = __byte_perm (w4[1], w4[0], selector);
|
|
c3[0] = __byte_perm (w4[0], w3[3], selector);
|
|
c2[3] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[2] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[1] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[0] = __byte_perm (w3[0], w2[3], selector);
|
|
c1[3] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[2] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[1] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[0] = __byte_perm (w2[0], w1[3], selector);
|
|
c0[3] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[2] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[1] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[0] = __byte_perm (w1[0], w0[3], selector);
|
|
w7[3] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[2] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[1] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[0] = __byte_perm (w0[0], 0, selector);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
c7[1] = __byte_perm ( 0, w7[3], selector);
|
|
c7[0] = __byte_perm (w7[3], w7[2], selector);
|
|
c6[3] = __byte_perm (w7[2], w7[1], selector);
|
|
c6[2] = __byte_perm (w7[1], w7[0], selector);
|
|
c6[1] = __byte_perm (w7[0], w6[3], selector);
|
|
c6[0] = __byte_perm (w6[3], w6[2], selector);
|
|
c5[3] = __byte_perm (w6[2], w6[1], selector);
|
|
c5[2] = __byte_perm (w6[1], w6[0], selector);
|
|
c5[1] = __byte_perm (w6[0], w5[3], selector);
|
|
c5[0] = __byte_perm (w5[3], w5[2], selector);
|
|
c4[3] = __byte_perm (w5[2], w5[1], selector);
|
|
c4[2] = __byte_perm (w5[1], w5[0], selector);
|
|
c4[1] = __byte_perm (w5[0], w4[3], selector);
|
|
c4[0] = __byte_perm (w4[3], w4[2], selector);
|
|
c3[3] = __byte_perm (w4[2], w4[1], selector);
|
|
c3[2] = __byte_perm (w4[1], w4[0], selector);
|
|
c3[1] = __byte_perm (w4[0], w3[3], selector);
|
|
c3[0] = __byte_perm (w3[3], w3[2], selector);
|
|
c2[3] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[2] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[1] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[0] = __byte_perm (w2[3], w2[2], selector);
|
|
c1[3] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[2] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[1] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[0] = __byte_perm (w1[3], w1[2], selector);
|
|
c0[3] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[2] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[1] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[0] = __byte_perm (w0[3], w0[2], selector);
|
|
w7[3] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[2] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[1] = __byte_perm (w0[0], 0, selector);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
c7[2] = __byte_perm ( 0, w7[3], selector);
|
|
c7[1] = __byte_perm (w7[3], w7[2], selector);
|
|
c7[0] = __byte_perm (w7[2], w7[1], selector);
|
|
c6[3] = __byte_perm (w7[1], w7[0], selector);
|
|
c6[2] = __byte_perm (w7[0], w6[3], selector);
|
|
c6[1] = __byte_perm (w6[3], w6[2], selector);
|
|
c6[0] = __byte_perm (w6[2], w6[1], selector);
|
|
c5[3] = __byte_perm (w6[1], w6[0], selector);
|
|
c5[2] = __byte_perm (w6[0], w5[3], selector);
|
|
c5[1] = __byte_perm (w5[3], w5[2], selector);
|
|
c5[0] = __byte_perm (w5[2], w5[1], selector);
|
|
c4[3] = __byte_perm (w5[1], w5[0], selector);
|
|
c4[2] = __byte_perm (w5[0], w4[3], selector);
|
|
c4[1] = __byte_perm (w4[3], w4[2], selector);
|
|
c4[0] = __byte_perm (w4[2], w4[1], selector);
|
|
c3[3] = __byte_perm (w4[1], w4[0], selector);
|
|
c3[2] = __byte_perm (w4[0], w3[3], selector);
|
|
c3[1] = __byte_perm (w3[3], w3[2], selector);
|
|
c3[0] = __byte_perm (w3[2], w3[1], selector);
|
|
c2[3] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[2] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[1] = __byte_perm (w2[3], w2[2], selector);
|
|
c2[0] = __byte_perm (w2[2], w2[1], selector);
|
|
c1[3] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[2] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[1] = __byte_perm (w1[3], w1[2], selector);
|
|
c1[0] = __byte_perm (w1[2], w1[1], selector);
|
|
c0[3] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[2] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[1] = __byte_perm (w0[3], w0[2], selector);
|
|
c0[0] = __byte_perm (w0[2], w0[1], selector);
|
|
w7[3] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[2] = __byte_perm (w0[0], 0, selector);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
c7[3] = __byte_perm ( 0, w7[3], selector);
|
|
c7[2] = __byte_perm (w7[3], w7[2], selector);
|
|
c7[1] = __byte_perm (w7[2], w7[1], selector);
|
|
c7[0] = __byte_perm (w7[1], w7[0], selector);
|
|
c6[3] = __byte_perm (w7[0], w6[3], selector);
|
|
c6[2] = __byte_perm (w6[3], w6[2], selector);
|
|
c6[1] = __byte_perm (w6[2], w6[1], selector);
|
|
c6[0] = __byte_perm (w6[1], w6[0], selector);
|
|
c5[3] = __byte_perm (w6[0], w5[3], selector);
|
|
c5[2] = __byte_perm (w5[3], w5[2], selector);
|
|
c5[1] = __byte_perm (w5[2], w5[1], selector);
|
|
c5[0] = __byte_perm (w5[1], w5[0], selector);
|
|
c4[3] = __byte_perm (w5[0], w4[3], selector);
|
|
c4[2] = __byte_perm (w4[3], w4[2], selector);
|
|
c4[1] = __byte_perm (w4[2], w4[1], selector);
|
|
c4[0] = __byte_perm (w4[1], w4[0], selector);
|
|
c3[3] = __byte_perm (w4[0], w3[3], selector);
|
|
c3[2] = __byte_perm (w3[3], w3[2], selector);
|
|
c3[1] = __byte_perm (w3[2], w3[1], selector);
|
|
c3[0] = __byte_perm (w3[1], w3[0], selector);
|
|
c2[3] = __byte_perm (w3[0], w2[3], selector);
|
|
c2[2] = __byte_perm (w2[3], w2[2], selector);
|
|
c2[1] = __byte_perm (w2[2], w2[1], selector);
|
|
c2[0] = __byte_perm (w2[1], w2[0], selector);
|
|
c1[3] = __byte_perm (w2[0], w1[3], selector);
|
|
c1[2] = __byte_perm (w1[3], w1[2], selector);
|
|
c1[1] = __byte_perm (w1[2], w1[1], selector);
|
|
c1[0] = __byte_perm (w1[1], w1[0], selector);
|
|
c0[3] = __byte_perm (w1[0], w0[3], selector);
|
|
c0[2] = __byte_perm (w0[3], w0[2], selector);
|
|
c0[1] = __byte_perm (w0[2], w0[1], selector);
|
|
c0[0] = __byte_perm (w0[1], w0[0], selector);
|
|
w7[3] = __byte_perm (w0[0], 0, selector);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_1x64_le (u32x w[64], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
|
|
#pragma unroll
|
|
for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = amd_bytealign (w[62], w[63], offset);
|
|
w[62] = amd_bytealign (w[61], w[62], offset);
|
|
w[61] = amd_bytealign (w[60], w[61], offset);
|
|
w[60] = amd_bytealign (w[59], w[60], offset);
|
|
w[59] = amd_bytealign (w[58], w[59], offset);
|
|
w[58] = amd_bytealign (w[57], w[58], offset);
|
|
w[57] = amd_bytealign (w[56], w[57], offset);
|
|
w[56] = amd_bytealign (w[55], w[56], offset);
|
|
w[55] = amd_bytealign (w[54], w[55], offset);
|
|
w[54] = amd_bytealign (w[53], w[54], offset);
|
|
w[53] = amd_bytealign (w[52], w[53], offset);
|
|
w[52] = amd_bytealign (w[51], w[52], offset);
|
|
w[51] = amd_bytealign (w[50], w[51], offset);
|
|
w[50] = amd_bytealign (w[49], w[50], offset);
|
|
w[49] = amd_bytealign (w[48], w[49], offset);
|
|
w[48] = amd_bytealign (w[47], w[48], offset);
|
|
w[47] = amd_bytealign (w[46], w[47], offset);
|
|
w[46] = amd_bytealign (w[45], w[46], offset);
|
|
w[45] = amd_bytealign (w[44], w[45], offset);
|
|
w[44] = amd_bytealign (w[43], w[44], offset);
|
|
w[43] = amd_bytealign (w[42], w[43], offset);
|
|
w[42] = amd_bytealign (w[41], w[42], offset);
|
|
w[41] = amd_bytealign (w[40], w[41], offset);
|
|
w[40] = amd_bytealign (w[39], w[40], offset);
|
|
w[39] = amd_bytealign (w[38], w[39], offset);
|
|
w[38] = amd_bytealign (w[37], w[38], offset);
|
|
w[37] = amd_bytealign (w[36], w[37], offset);
|
|
w[36] = amd_bytealign (w[35], w[36], offset);
|
|
w[35] = amd_bytealign (w[34], w[35], offset);
|
|
w[34] = amd_bytealign (w[33], w[34], offset);
|
|
w[33] = amd_bytealign (w[32], w[33], offset);
|
|
w[32] = amd_bytealign (w[31], w[32], offset);
|
|
w[31] = amd_bytealign (w[30], w[31], offset);
|
|
w[30] = amd_bytealign (w[29], w[30], offset);
|
|
w[29] = amd_bytealign (w[28], w[29], offset);
|
|
w[28] = amd_bytealign (w[27], w[28], offset);
|
|
w[27] = amd_bytealign (w[26], w[27], offset);
|
|
w[26] = amd_bytealign (w[25], w[26], offset);
|
|
w[25] = amd_bytealign (w[24], w[25], offset);
|
|
w[24] = amd_bytealign (w[23], w[24], offset);
|
|
w[23] = amd_bytealign (w[22], w[23], offset);
|
|
w[22] = amd_bytealign (w[21], w[22], offset);
|
|
w[21] = amd_bytealign (w[20], w[21], offset);
|
|
w[20] = amd_bytealign (w[19], w[20], offset);
|
|
w[19] = amd_bytealign (w[18], w[19], offset);
|
|
w[18] = amd_bytealign (w[17], w[18], offset);
|
|
w[17] = amd_bytealign (w[16], w[17], offset);
|
|
w[16] = amd_bytealign (w[15], w[16], offset);
|
|
w[15] = amd_bytealign (w[14], w[15], offset);
|
|
w[14] = amd_bytealign (w[13], w[14], offset);
|
|
w[13] = amd_bytealign (w[12], w[13], offset);
|
|
w[12] = amd_bytealign (w[11], w[12], offset);
|
|
w[11] = amd_bytealign (w[10], w[11], offset);
|
|
w[10] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[ 9] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[ 8] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 7] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 6] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 5] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 4] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 3] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 2] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 1] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 0] = amd_bytealign ( 0, w[ 0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = amd_bytealign (w[61], w[62], offset);
|
|
w[62] = amd_bytealign (w[60], w[61], offset);
|
|
w[61] = amd_bytealign (w[59], w[60], offset);
|
|
w[60] = amd_bytealign (w[58], w[59], offset);
|
|
w[59] = amd_bytealign (w[57], w[58], offset);
|
|
w[58] = amd_bytealign (w[56], w[57], offset);
|
|
w[57] = amd_bytealign (w[55], w[56], offset);
|
|
w[56] = amd_bytealign (w[54], w[55], offset);
|
|
w[55] = amd_bytealign (w[53], w[54], offset);
|
|
w[54] = amd_bytealign (w[52], w[53], offset);
|
|
w[53] = amd_bytealign (w[51], w[52], offset);
|
|
w[52] = amd_bytealign (w[50], w[51], offset);
|
|
w[51] = amd_bytealign (w[49], w[50], offset);
|
|
w[50] = amd_bytealign (w[48], w[49], offset);
|
|
w[49] = amd_bytealign (w[47], w[48], offset);
|
|
w[48] = amd_bytealign (w[46], w[47], offset);
|
|
w[47] = amd_bytealign (w[45], w[46], offset);
|
|
w[46] = amd_bytealign (w[44], w[45], offset);
|
|
w[45] = amd_bytealign (w[43], w[44], offset);
|
|
w[44] = amd_bytealign (w[42], w[43], offset);
|
|
w[43] = amd_bytealign (w[41], w[42], offset);
|
|
w[42] = amd_bytealign (w[40], w[41], offset);
|
|
w[41] = amd_bytealign (w[39], w[40], offset);
|
|
w[40] = amd_bytealign (w[38], w[39], offset);
|
|
w[39] = amd_bytealign (w[37], w[38], offset);
|
|
w[38] = amd_bytealign (w[36], w[37], offset);
|
|
w[37] = amd_bytealign (w[35], w[36], offset);
|
|
w[36] = amd_bytealign (w[34], w[35], offset);
|
|
w[35] = amd_bytealign (w[33], w[34], offset);
|
|
w[34] = amd_bytealign (w[32], w[33], offset);
|
|
w[33] = amd_bytealign (w[31], w[32], offset);
|
|
w[32] = amd_bytealign (w[30], w[31], offset);
|
|
w[31] = amd_bytealign (w[29], w[30], offset);
|
|
w[30] = amd_bytealign (w[28], w[29], offset);
|
|
w[29] = amd_bytealign (w[27], w[28], offset);
|
|
w[28] = amd_bytealign (w[26], w[27], offset);
|
|
w[27] = amd_bytealign (w[25], w[26], offset);
|
|
w[26] = amd_bytealign (w[24], w[25], offset);
|
|
w[25] = amd_bytealign (w[23], w[24], offset);
|
|
w[24] = amd_bytealign (w[22], w[23], offset);
|
|
w[23] = amd_bytealign (w[21], w[22], offset);
|
|
w[22] = amd_bytealign (w[20], w[21], offset);
|
|
w[21] = amd_bytealign (w[19], w[20], offset);
|
|
w[20] = amd_bytealign (w[18], w[19], offset);
|
|
w[19] = amd_bytealign (w[17], w[18], offset);
|
|
w[18] = amd_bytealign (w[16], w[17], offset);
|
|
w[17] = amd_bytealign (w[15], w[16], offset);
|
|
w[16] = amd_bytealign (w[14], w[15], offset);
|
|
w[15] = amd_bytealign (w[13], w[14], offset);
|
|
w[14] = amd_bytealign (w[12], w[13], offset);
|
|
w[13] = amd_bytealign (w[11], w[12], offset);
|
|
w[12] = amd_bytealign (w[10], w[11], offset);
|
|
w[11] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[10] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[ 9] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 8] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 7] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 6] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 5] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 4] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 3] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 2] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 1] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = amd_bytealign (w[60], w[61], offset);
|
|
w[62] = amd_bytealign (w[59], w[60], offset);
|
|
w[61] = amd_bytealign (w[58], w[59], offset);
|
|
w[60] = amd_bytealign (w[57], w[58], offset);
|
|
w[59] = amd_bytealign (w[56], w[57], offset);
|
|
w[58] = amd_bytealign (w[55], w[56], offset);
|
|
w[57] = amd_bytealign (w[54], w[55], offset);
|
|
w[56] = amd_bytealign (w[53], w[54], offset);
|
|
w[55] = amd_bytealign (w[52], w[53], offset);
|
|
w[54] = amd_bytealign (w[51], w[52], offset);
|
|
w[53] = amd_bytealign (w[50], w[51], offset);
|
|
w[52] = amd_bytealign (w[49], w[50], offset);
|
|
w[51] = amd_bytealign (w[48], w[49], offset);
|
|
w[50] = amd_bytealign (w[47], w[48], offset);
|
|
w[49] = amd_bytealign (w[46], w[47], offset);
|
|
w[48] = amd_bytealign (w[45], w[46], offset);
|
|
w[47] = amd_bytealign (w[44], w[45], offset);
|
|
w[46] = amd_bytealign (w[43], w[44], offset);
|
|
w[45] = amd_bytealign (w[42], w[43], offset);
|
|
w[44] = amd_bytealign (w[41], w[42], offset);
|
|
w[43] = amd_bytealign (w[40], w[41], offset);
|
|
w[42] = amd_bytealign (w[39], w[40], offset);
|
|
w[41] = amd_bytealign (w[38], w[39], offset);
|
|
w[40] = amd_bytealign (w[37], w[38], offset);
|
|
w[39] = amd_bytealign (w[36], w[37], offset);
|
|
w[38] = amd_bytealign (w[35], w[36], offset);
|
|
w[37] = amd_bytealign (w[34], w[35], offset);
|
|
w[36] = amd_bytealign (w[33], w[34], offset);
|
|
w[35] = amd_bytealign (w[32], w[33], offset);
|
|
w[34] = amd_bytealign (w[31], w[32], offset);
|
|
w[33] = amd_bytealign (w[30], w[31], offset);
|
|
w[32] = amd_bytealign (w[29], w[30], offset);
|
|
w[31] = amd_bytealign (w[28], w[29], offset);
|
|
w[30] = amd_bytealign (w[27], w[28], offset);
|
|
w[29] = amd_bytealign (w[26], w[27], offset);
|
|
w[28] = amd_bytealign (w[25], w[26], offset);
|
|
w[27] = amd_bytealign (w[24], w[25], offset);
|
|
w[26] = amd_bytealign (w[23], w[24], offset);
|
|
w[25] = amd_bytealign (w[22], w[23], offset);
|
|
w[24] = amd_bytealign (w[21], w[22], offset);
|
|
w[23] = amd_bytealign (w[20], w[21], offset);
|
|
w[22] = amd_bytealign (w[19], w[20], offset);
|
|
w[21] = amd_bytealign (w[18], w[19], offset);
|
|
w[20] = amd_bytealign (w[17], w[18], offset);
|
|
w[19] = amd_bytealign (w[16], w[17], offset);
|
|
w[18] = amd_bytealign (w[15], w[16], offset);
|
|
w[17] = amd_bytealign (w[14], w[15], offset);
|
|
w[16] = amd_bytealign (w[13], w[14], offset);
|
|
w[15] = amd_bytealign (w[12], w[13], offset);
|
|
w[14] = amd_bytealign (w[11], w[12], offset);
|
|
w[13] = amd_bytealign (w[10], w[11], offset);
|
|
w[12] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[11] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[10] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 9] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 8] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 7] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 6] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 5] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 4] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 3] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 2] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = amd_bytealign (w[59], w[60], offset);
|
|
w[62] = amd_bytealign (w[58], w[59], offset);
|
|
w[61] = amd_bytealign (w[57], w[58], offset);
|
|
w[60] = amd_bytealign (w[56], w[57], offset);
|
|
w[59] = amd_bytealign (w[55], w[56], offset);
|
|
w[58] = amd_bytealign (w[54], w[55], offset);
|
|
w[57] = amd_bytealign (w[53], w[54], offset);
|
|
w[56] = amd_bytealign (w[52], w[53], offset);
|
|
w[55] = amd_bytealign (w[51], w[52], offset);
|
|
w[54] = amd_bytealign (w[50], w[51], offset);
|
|
w[53] = amd_bytealign (w[49], w[50], offset);
|
|
w[52] = amd_bytealign (w[48], w[49], offset);
|
|
w[51] = amd_bytealign (w[47], w[48], offset);
|
|
w[50] = amd_bytealign (w[46], w[47], offset);
|
|
w[49] = amd_bytealign (w[45], w[46], offset);
|
|
w[48] = amd_bytealign (w[44], w[45], offset);
|
|
w[47] = amd_bytealign (w[43], w[44], offset);
|
|
w[46] = amd_bytealign (w[42], w[43], offset);
|
|
w[45] = amd_bytealign (w[41], w[42], offset);
|
|
w[44] = amd_bytealign (w[40], w[41], offset);
|
|
w[43] = amd_bytealign (w[39], w[40], offset);
|
|
w[42] = amd_bytealign (w[38], w[39], offset);
|
|
w[41] = amd_bytealign (w[37], w[38], offset);
|
|
w[40] = amd_bytealign (w[36], w[37], offset);
|
|
w[39] = amd_bytealign (w[35], w[36], offset);
|
|
w[38] = amd_bytealign (w[34], w[35], offset);
|
|
w[37] = amd_bytealign (w[33], w[34], offset);
|
|
w[36] = amd_bytealign (w[32], w[33], offset);
|
|
w[35] = amd_bytealign (w[31], w[32], offset);
|
|
w[34] = amd_bytealign (w[30], w[31], offset);
|
|
w[33] = amd_bytealign (w[29], w[30], offset);
|
|
w[32] = amd_bytealign (w[28], w[29], offset);
|
|
w[31] = amd_bytealign (w[27], w[28], offset);
|
|
w[30] = amd_bytealign (w[26], w[27], offset);
|
|
w[29] = amd_bytealign (w[25], w[26], offset);
|
|
w[28] = amd_bytealign (w[24], w[25], offset);
|
|
w[27] = amd_bytealign (w[23], w[24], offset);
|
|
w[26] = amd_bytealign (w[22], w[23], offset);
|
|
w[25] = amd_bytealign (w[21], w[22], offset);
|
|
w[24] = amd_bytealign (w[20], w[21], offset);
|
|
w[23] = amd_bytealign (w[19], w[20], offset);
|
|
w[22] = amd_bytealign (w[18], w[19], offset);
|
|
w[21] = amd_bytealign (w[17], w[18], offset);
|
|
w[20] = amd_bytealign (w[16], w[17], offset);
|
|
w[19] = amd_bytealign (w[15], w[16], offset);
|
|
w[18] = amd_bytealign (w[14], w[15], offset);
|
|
w[17] = amd_bytealign (w[13], w[14], offset);
|
|
w[16] = amd_bytealign (w[12], w[13], offset);
|
|
w[15] = amd_bytealign (w[11], w[12], offset);
|
|
w[14] = amd_bytealign (w[10], w[11], offset);
|
|
w[13] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[12] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[11] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[10] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 9] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 8] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 7] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 6] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 5] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 4] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 3] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = amd_bytealign (w[58], w[59], offset);
|
|
w[62] = amd_bytealign (w[57], w[58], offset);
|
|
w[61] = amd_bytealign (w[56], w[57], offset);
|
|
w[60] = amd_bytealign (w[55], w[56], offset);
|
|
w[59] = amd_bytealign (w[54], w[55], offset);
|
|
w[58] = amd_bytealign (w[53], w[54], offset);
|
|
w[57] = amd_bytealign (w[52], w[53], offset);
|
|
w[56] = amd_bytealign (w[51], w[52], offset);
|
|
w[55] = amd_bytealign (w[50], w[51], offset);
|
|
w[54] = amd_bytealign (w[49], w[50], offset);
|
|
w[53] = amd_bytealign (w[48], w[49], offset);
|
|
w[52] = amd_bytealign (w[47], w[48], offset);
|
|
w[51] = amd_bytealign (w[46], w[47], offset);
|
|
w[50] = amd_bytealign (w[45], w[46], offset);
|
|
w[49] = amd_bytealign (w[44], w[45], offset);
|
|
w[48] = amd_bytealign (w[43], w[44], offset);
|
|
w[47] = amd_bytealign (w[42], w[43], offset);
|
|
w[46] = amd_bytealign (w[41], w[42], offset);
|
|
w[45] = amd_bytealign (w[40], w[41], offset);
|
|
w[44] = amd_bytealign (w[39], w[40], offset);
|
|
w[43] = amd_bytealign (w[38], w[39], offset);
|
|
w[42] = amd_bytealign (w[37], w[38], offset);
|
|
w[41] = amd_bytealign (w[36], w[37], offset);
|
|
w[40] = amd_bytealign (w[35], w[36], offset);
|
|
w[39] = amd_bytealign (w[34], w[35], offset);
|
|
w[38] = amd_bytealign (w[33], w[34], offset);
|
|
w[37] = amd_bytealign (w[32], w[33], offset);
|
|
w[36] = amd_bytealign (w[31], w[32], offset);
|
|
w[35] = amd_bytealign (w[30], w[31], offset);
|
|
w[34] = amd_bytealign (w[29], w[30], offset);
|
|
w[33] = amd_bytealign (w[28], w[29], offset);
|
|
w[32] = amd_bytealign (w[27], w[28], offset);
|
|
w[31] = amd_bytealign (w[26], w[27], offset);
|
|
w[30] = amd_bytealign (w[25], w[26], offset);
|
|
w[29] = amd_bytealign (w[24], w[25], offset);
|
|
w[28] = amd_bytealign (w[23], w[24], offset);
|
|
w[27] = amd_bytealign (w[22], w[23], offset);
|
|
w[26] = amd_bytealign (w[21], w[22], offset);
|
|
w[25] = amd_bytealign (w[20], w[21], offset);
|
|
w[24] = amd_bytealign (w[19], w[20], offset);
|
|
w[23] = amd_bytealign (w[18], w[19], offset);
|
|
w[22] = amd_bytealign (w[17], w[18], offset);
|
|
w[21] = amd_bytealign (w[16], w[17], offset);
|
|
w[20] = amd_bytealign (w[15], w[16], offset);
|
|
w[19] = amd_bytealign (w[14], w[15], offset);
|
|
w[18] = amd_bytealign (w[13], w[14], offset);
|
|
w[17] = amd_bytealign (w[12], w[13], offset);
|
|
w[16] = amd_bytealign (w[11], w[12], offset);
|
|
w[15] = amd_bytealign (w[10], w[11], offset);
|
|
w[14] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[13] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[12] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[11] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[10] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 9] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 8] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 7] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 6] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 5] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 4] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = amd_bytealign (w[57], w[58], offset);
|
|
w[62] = amd_bytealign (w[56], w[57], offset);
|
|
w[61] = amd_bytealign (w[55], w[56], offset);
|
|
w[60] = amd_bytealign (w[54], w[55], offset);
|
|
w[59] = amd_bytealign (w[53], w[54], offset);
|
|
w[58] = amd_bytealign (w[52], w[53], offset);
|
|
w[57] = amd_bytealign (w[51], w[52], offset);
|
|
w[56] = amd_bytealign (w[50], w[51], offset);
|
|
w[55] = amd_bytealign (w[49], w[50], offset);
|
|
w[54] = amd_bytealign (w[48], w[49], offset);
|
|
w[53] = amd_bytealign (w[47], w[48], offset);
|
|
w[52] = amd_bytealign (w[46], w[47], offset);
|
|
w[51] = amd_bytealign (w[45], w[46], offset);
|
|
w[50] = amd_bytealign (w[44], w[45], offset);
|
|
w[49] = amd_bytealign (w[43], w[44], offset);
|
|
w[48] = amd_bytealign (w[42], w[43], offset);
|
|
w[47] = amd_bytealign (w[41], w[42], offset);
|
|
w[46] = amd_bytealign (w[40], w[41], offset);
|
|
w[45] = amd_bytealign (w[39], w[40], offset);
|
|
w[44] = amd_bytealign (w[38], w[39], offset);
|
|
w[43] = amd_bytealign (w[37], w[38], offset);
|
|
w[42] = amd_bytealign (w[36], w[37], offset);
|
|
w[41] = amd_bytealign (w[35], w[36], offset);
|
|
w[40] = amd_bytealign (w[34], w[35], offset);
|
|
w[39] = amd_bytealign (w[33], w[34], offset);
|
|
w[38] = amd_bytealign (w[32], w[33], offset);
|
|
w[37] = amd_bytealign (w[31], w[32], offset);
|
|
w[36] = amd_bytealign (w[30], w[31], offset);
|
|
w[35] = amd_bytealign (w[29], w[30], offset);
|
|
w[34] = amd_bytealign (w[28], w[29], offset);
|
|
w[33] = amd_bytealign (w[27], w[28], offset);
|
|
w[32] = amd_bytealign (w[26], w[27], offset);
|
|
w[31] = amd_bytealign (w[25], w[26], offset);
|
|
w[30] = amd_bytealign (w[24], w[25], offset);
|
|
w[29] = amd_bytealign (w[23], w[24], offset);
|
|
w[28] = amd_bytealign (w[22], w[23], offset);
|
|
w[27] = amd_bytealign (w[21], w[22], offset);
|
|
w[26] = amd_bytealign (w[20], w[21], offset);
|
|
w[25] = amd_bytealign (w[19], w[20], offset);
|
|
w[24] = amd_bytealign (w[18], w[19], offset);
|
|
w[23] = amd_bytealign (w[17], w[18], offset);
|
|
w[22] = amd_bytealign (w[16], w[17], offset);
|
|
w[21] = amd_bytealign (w[15], w[16], offset);
|
|
w[20] = amd_bytealign (w[14], w[15], offset);
|
|
w[19] = amd_bytealign (w[13], w[14], offset);
|
|
w[18] = amd_bytealign (w[12], w[13], offset);
|
|
w[17] = amd_bytealign (w[11], w[12], offset);
|
|
w[16] = amd_bytealign (w[10], w[11], offset);
|
|
w[15] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[14] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[13] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[12] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[11] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[10] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 9] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 8] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 7] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 6] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 5] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = amd_bytealign (w[56], w[57], offset);
|
|
w[62] = amd_bytealign (w[55], w[56], offset);
|
|
w[61] = amd_bytealign (w[54], w[55], offset);
|
|
w[60] = amd_bytealign (w[53], w[54], offset);
|
|
w[59] = amd_bytealign (w[52], w[53], offset);
|
|
w[58] = amd_bytealign (w[51], w[52], offset);
|
|
w[57] = amd_bytealign (w[50], w[51], offset);
|
|
w[56] = amd_bytealign (w[49], w[50], offset);
|
|
w[55] = amd_bytealign (w[48], w[49], offset);
|
|
w[54] = amd_bytealign (w[47], w[48], offset);
|
|
w[53] = amd_bytealign (w[46], w[47], offset);
|
|
w[52] = amd_bytealign (w[45], w[46], offset);
|
|
w[51] = amd_bytealign (w[44], w[45], offset);
|
|
w[50] = amd_bytealign (w[43], w[44], offset);
|
|
w[49] = amd_bytealign (w[42], w[43], offset);
|
|
w[48] = amd_bytealign (w[41], w[42], offset);
|
|
w[47] = amd_bytealign (w[40], w[41], offset);
|
|
w[46] = amd_bytealign (w[39], w[40], offset);
|
|
w[45] = amd_bytealign (w[38], w[39], offset);
|
|
w[44] = amd_bytealign (w[37], w[38], offset);
|
|
w[43] = amd_bytealign (w[36], w[37], offset);
|
|
w[42] = amd_bytealign (w[35], w[36], offset);
|
|
w[41] = amd_bytealign (w[34], w[35], offset);
|
|
w[40] = amd_bytealign (w[33], w[34], offset);
|
|
w[39] = amd_bytealign (w[32], w[33], offset);
|
|
w[38] = amd_bytealign (w[31], w[32], offset);
|
|
w[37] = amd_bytealign (w[30], w[31], offset);
|
|
w[36] = amd_bytealign (w[29], w[30], offset);
|
|
w[35] = amd_bytealign (w[28], w[29], offset);
|
|
w[34] = amd_bytealign (w[27], w[28], offset);
|
|
w[33] = amd_bytealign (w[26], w[27], offset);
|
|
w[32] = amd_bytealign (w[25], w[26], offset);
|
|
w[31] = amd_bytealign (w[24], w[25], offset);
|
|
w[30] = amd_bytealign (w[23], w[24], offset);
|
|
w[29] = amd_bytealign (w[22], w[23], offset);
|
|
w[28] = amd_bytealign (w[21], w[22], offset);
|
|
w[27] = amd_bytealign (w[20], w[21], offset);
|
|
w[26] = amd_bytealign (w[19], w[20], offset);
|
|
w[25] = amd_bytealign (w[18], w[19], offset);
|
|
w[24] = amd_bytealign (w[17], w[18], offset);
|
|
w[23] = amd_bytealign (w[16], w[17], offset);
|
|
w[22] = amd_bytealign (w[15], w[16], offset);
|
|
w[21] = amd_bytealign (w[14], w[15], offset);
|
|
w[20] = amd_bytealign (w[13], w[14], offset);
|
|
w[19] = amd_bytealign (w[12], w[13], offset);
|
|
w[18] = amd_bytealign (w[11], w[12], offset);
|
|
w[17] = amd_bytealign (w[10], w[11], offset);
|
|
w[16] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[15] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[14] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[13] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[12] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[11] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[10] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 9] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 8] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 7] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 6] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = amd_bytealign (w[55], w[56], offset);
|
|
w[62] = amd_bytealign (w[54], w[55], offset);
|
|
w[61] = amd_bytealign (w[53], w[54], offset);
|
|
w[60] = amd_bytealign (w[52], w[53], offset);
|
|
w[59] = amd_bytealign (w[51], w[52], offset);
|
|
w[58] = amd_bytealign (w[50], w[51], offset);
|
|
w[57] = amd_bytealign (w[49], w[50], offset);
|
|
w[56] = amd_bytealign (w[48], w[49], offset);
|
|
w[55] = amd_bytealign (w[47], w[48], offset);
|
|
w[54] = amd_bytealign (w[46], w[47], offset);
|
|
w[53] = amd_bytealign (w[45], w[46], offset);
|
|
w[52] = amd_bytealign (w[44], w[45], offset);
|
|
w[51] = amd_bytealign (w[43], w[44], offset);
|
|
w[50] = amd_bytealign (w[42], w[43], offset);
|
|
w[49] = amd_bytealign (w[41], w[42], offset);
|
|
w[48] = amd_bytealign (w[40], w[41], offset);
|
|
w[47] = amd_bytealign (w[39], w[40], offset);
|
|
w[46] = amd_bytealign (w[38], w[39], offset);
|
|
w[45] = amd_bytealign (w[37], w[38], offset);
|
|
w[44] = amd_bytealign (w[36], w[37], offset);
|
|
w[43] = amd_bytealign (w[35], w[36], offset);
|
|
w[42] = amd_bytealign (w[34], w[35], offset);
|
|
w[41] = amd_bytealign (w[33], w[34], offset);
|
|
w[40] = amd_bytealign (w[32], w[33], offset);
|
|
w[39] = amd_bytealign (w[31], w[32], offset);
|
|
w[38] = amd_bytealign (w[30], w[31], offset);
|
|
w[37] = amd_bytealign (w[29], w[30], offset);
|
|
w[36] = amd_bytealign (w[28], w[29], offset);
|
|
w[35] = amd_bytealign (w[27], w[28], offset);
|
|
w[34] = amd_bytealign (w[26], w[27], offset);
|
|
w[33] = amd_bytealign (w[25], w[26], offset);
|
|
w[32] = amd_bytealign (w[24], w[25], offset);
|
|
w[31] = amd_bytealign (w[23], w[24], offset);
|
|
w[30] = amd_bytealign (w[22], w[23], offset);
|
|
w[29] = amd_bytealign (w[21], w[22], offset);
|
|
w[28] = amd_bytealign (w[20], w[21], offset);
|
|
w[27] = amd_bytealign (w[19], w[20], offset);
|
|
w[26] = amd_bytealign (w[18], w[19], offset);
|
|
w[25] = amd_bytealign (w[17], w[18], offset);
|
|
w[24] = amd_bytealign (w[16], w[17], offset);
|
|
w[23] = amd_bytealign (w[15], w[16], offset);
|
|
w[22] = amd_bytealign (w[14], w[15], offset);
|
|
w[21] = amd_bytealign (w[13], w[14], offset);
|
|
w[20] = amd_bytealign (w[12], w[13], offset);
|
|
w[19] = amd_bytealign (w[11], w[12], offset);
|
|
w[18] = amd_bytealign (w[10], w[11], offset);
|
|
w[17] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[16] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[15] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[14] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[13] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[12] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[11] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[10] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 9] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 8] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 7] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = amd_bytealign (w[54], w[55], offset);
|
|
w[62] = amd_bytealign (w[53], w[54], offset);
|
|
w[61] = amd_bytealign (w[52], w[53], offset);
|
|
w[60] = amd_bytealign (w[51], w[52], offset);
|
|
w[59] = amd_bytealign (w[50], w[51], offset);
|
|
w[58] = amd_bytealign (w[49], w[50], offset);
|
|
w[57] = amd_bytealign (w[48], w[49], offset);
|
|
w[56] = amd_bytealign (w[47], w[48], offset);
|
|
w[55] = amd_bytealign (w[46], w[47], offset);
|
|
w[54] = amd_bytealign (w[45], w[46], offset);
|
|
w[53] = amd_bytealign (w[44], w[45], offset);
|
|
w[52] = amd_bytealign (w[43], w[44], offset);
|
|
w[51] = amd_bytealign (w[42], w[43], offset);
|
|
w[50] = amd_bytealign (w[41], w[42], offset);
|
|
w[49] = amd_bytealign (w[40], w[41], offset);
|
|
w[48] = amd_bytealign (w[39], w[40], offset);
|
|
w[47] = amd_bytealign (w[38], w[39], offset);
|
|
w[46] = amd_bytealign (w[37], w[38], offset);
|
|
w[45] = amd_bytealign (w[36], w[37], offset);
|
|
w[44] = amd_bytealign (w[35], w[36], offset);
|
|
w[43] = amd_bytealign (w[34], w[35], offset);
|
|
w[42] = amd_bytealign (w[33], w[34], offset);
|
|
w[41] = amd_bytealign (w[32], w[33], offset);
|
|
w[40] = amd_bytealign (w[31], w[32], offset);
|
|
w[39] = amd_bytealign (w[30], w[31], offset);
|
|
w[38] = amd_bytealign (w[29], w[30], offset);
|
|
w[37] = amd_bytealign (w[28], w[29], offset);
|
|
w[36] = amd_bytealign (w[27], w[28], offset);
|
|
w[35] = amd_bytealign (w[26], w[27], offset);
|
|
w[34] = amd_bytealign (w[25], w[26], offset);
|
|
w[33] = amd_bytealign (w[24], w[25], offset);
|
|
w[32] = amd_bytealign (w[23], w[24], offset);
|
|
w[31] = amd_bytealign (w[22], w[23], offset);
|
|
w[30] = amd_bytealign (w[21], w[22], offset);
|
|
w[29] = amd_bytealign (w[20], w[21], offset);
|
|
w[28] = amd_bytealign (w[19], w[20], offset);
|
|
w[27] = amd_bytealign (w[18], w[19], offset);
|
|
w[26] = amd_bytealign (w[17], w[18], offset);
|
|
w[25] = amd_bytealign (w[16], w[17], offset);
|
|
w[24] = amd_bytealign (w[15], w[16], offset);
|
|
w[23] = amd_bytealign (w[14], w[15], offset);
|
|
w[22] = amd_bytealign (w[13], w[14], offset);
|
|
w[21] = amd_bytealign (w[12], w[13], offset);
|
|
w[20] = amd_bytealign (w[11], w[12], offset);
|
|
w[19] = amd_bytealign (w[10], w[11], offset);
|
|
w[18] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[17] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[16] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[15] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[14] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[13] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[12] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[11] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[10] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 9] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 8] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = amd_bytealign (w[53], w[54], offset);
|
|
w[62] = amd_bytealign (w[52], w[53], offset);
|
|
w[61] = amd_bytealign (w[51], w[52], offset);
|
|
w[60] = amd_bytealign (w[50], w[51], offset);
|
|
w[59] = amd_bytealign (w[49], w[50], offset);
|
|
w[58] = amd_bytealign (w[48], w[49], offset);
|
|
w[57] = amd_bytealign (w[47], w[48], offset);
|
|
w[56] = amd_bytealign (w[46], w[47], offset);
|
|
w[55] = amd_bytealign (w[45], w[46], offset);
|
|
w[54] = amd_bytealign (w[44], w[45], offset);
|
|
w[53] = amd_bytealign (w[43], w[44], offset);
|
|
w[52] = amd_bytealign (w[42], w[43], offset);
|
|
w[51] = amd_bytealign (w[41], w[42], offset);
|
|
w[50] = amd_bytealign (w[40], w[41], offset);
|
|
w[49] = amd_bytealign (w[39], w[40], offset);
|
|
w[48] = amd_bytealign (w[38], w[39], offset);
|
|
w[47] = amd_bytealign (w[37], w[38], offset);
|
|
w[46] = amd_bytealign (w[36], w[37], offset);
|
|
w[45] = amd_bytealign (w[35], w[36], offset);
|
|
w[44] = amd_bytealign (w[34], w[35], offset);
|
|
w[43] = amd_bytealign (w[33], w[34], offset);
|
|
w[42] = amd_bytealign (w[32], w[33], offset);
|
|
w[41] = amd_bytealign (w[31], w[32], offset);
|
|
w[40] = amd_bytealign (w[30], w[31], offset);
|
|
w[39] = amd_bytealign (w[29], w[30], offset);
|
|
w[38] = amd_bytealign (w[28], w[29], offset);
|
|
w[37] = amd_bytealign (w[27], w[28], offset);
|
|
w[36] = amd_bytealign (w[26], w[27], offset);
|
|
w[35] = amd_bytealign (w[25], w[26], offset);
|
|
w[34] = amd_bytealign (w[24], w[25], offset);
|
|
w[33] = amd_bytealign (w[23], w[24], offset);
|
|
w[32] = amd_bytealign (w[22], w[23], offset);
|
|
w[31] = amd_bytealign (w[21], w[22], offset);
|
|
w[30] = amd_bytealign (w[20], w[21], offset);
|
|
w[29] = amd_bytealign (w[19], w[20], offset);
|
|
w[28] = amd_bytealign (w[18], w[19], offset);
|
|
w[27] = amd_bytealign (w[17], w[18], offset);
|
|
w[26] = amd_bytealign (w[16], w[17], offset);
|
|
w[25] = amd_bytealign (w[15], w[16], offset);
|
|
w[24] = amd_bytealign (w[14], w[15], offset);
|
|
w[23] = amd_bytealign (w[13], w[14], offset);
|
|
w[22] = amd_bytealign (w[12], w[13], offset);
|
|
w[21] = amd_bytealign (w[11], w[12], offset);
|
|
w[20] = amd_bytealign (w[10], w[11], offset);
|
|
w[19] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[18] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[17] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[16] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[15] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[14] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[13] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[12] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[11] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[10] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 9] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = amd_bytealign (w[52], w[53], offset);
|
|
w[62] = amd_bytealign (w[51], w[52], offset);
|
|
w[61] = amd_bytealign (w[50], w[51], offset);
|
|
w[60] = amd_bytealign (w[49], w[50], offset);
|
|
w[59] = amd_bytealign (w[48], w[49], offset);
|
|
w[58] = amd_bytealign (w[47], w[48], offset);
|
|
w[57] = amd_bytealign (w[46], w[47], offset);
|
|
w[56] = amd_bytealign (w[45], w[46], offset);
|
|
w[55] = amd_bytealign (w[44], w[45], offset);
|
|
w[54] = amd_bytealign (w[43], w[44], offset);
|
|
w[53] = amd_bytealign (w[42], w[43], offset);
|
|
w[52] = amd_bytealign (w[41], w[42], offset);
|
|
w[51] = amd_bytealign (w[40], w[41], offset);
|
|
w[50] = amd_bytealign (w[39], w[40], offset);
|
|
w[49] = amd_bytealign (w[38], w[39], offset);
|
|
w[48] = amd_bytealign (w[37], w[38], offset);
|
|
w[47] = amd_bytealign (w[36], w[37], offset);
|
|
w[46] = amd_bytealign (w[35], w[36], offset);
|
|
w[45] = amd_bytealign (w[34], w[35], offset);
|
|
w[44] = amd_bytealign (w[33], w[34], offset);
|
|
w[43] = amd_bytealign (w[32], w[33], offset);
|
|
w[42] = amd_bytealign (w[31], w[32], offset);
|
|
w[41] = amd_bytealign (w[30], w[31], offset);
|
|
w[40] = amd_bytealign (w[29], w[30], offset);
|
|
w[39] = amd_bytealign (w[28], w[29], offset);
|
|
w[38] = amd_bytealign (w[27], w[28], offset);
|
|
w[37] = amd_bytealign (w[26], w[27], offset);
|
|
w[36] = amd_bytealign (w[25], w[26], offset);
|
|
w[35] = amd_bytealign (w[24], w[25], offset);
|
|
w[34] = amd_bytealign (w[23], w[24], offset);
|
|
w[33] = amd_bytealign (w[22], w[23], offset);
|
|
w[32] = amd_bytealign (w[21], w[22], offset);
|
|
w[31] = amd_bytealign (w[20], w[21], offset);
|
|
w[30] = amd_bytealign (w[19], w[20], offset);
|
|
w[29] = amd_bytealign (w[18], w[19], offset);
|
|
w[28] = amd_bytealign (w[17], w[18], offset);
|
|
w[27] = amd_bytealign (w[16], w[17], offset);
|
|
w[26] = amd_bytealign (w[15], w[16], offset);
|
|
w[25] = amd_bytealign (w[14], w[15], offset);
|
|
w[24] = amd_bytealign (w[13], w[14], offset);
|
|
w[23] = amd_bytealign (w[12], w[13], offset);
|
|
w[22] = amd_bytealign (w[11], w[12], offset);
|
|
w[21] = amd_bytealign (w[10], w[11], offset);
|
|
w[20] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[19] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[18] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[17] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[16] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[15] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[14] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[13] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[12] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[11] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[10] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = amd_bytealign (w[51], w[52], offset);
|
|
w[62] = amd_bytealign (w[50], w[51], offset);
|
|
w[61] = amd_bytealign (w[49], w[50], offset);
|
|
w[60] = amd_bytealign (w[48], w[49], offset);
|
|
w[59] = amd_bytealign (w[47], w[48], offset);
|
|
w[58] = amd_bytealign (w[46], w[47], offset);
|
|
w[57] = amd_bytealign (w[45], w[46], offset);
|
|
w[56] = amd_bytealign (w[44], w[45], offset);
|
|
w[55] = amd_bytealign (w[43], w[44], offset);
|
|
w[54] = amd_bytealign (w[42], w[43], offset);
|
|
w[53] = amd_bytealign (w[41], w[42], offset);
|
|
w[52] = amd_bytealign (w[40], w[41], offset);
|
|
w[51] = amd_bytealign (w[39], w[40], offset);
|
|
w[50] = amd_bytealign (w[38], w[39], offset);
|
|
w[49] = amd_bytealign (w[37], w[38], offset);
|
|
w[48] = amd_bytealign (w[36], w[37], offset);
|
|
w[47] = amd_bytealign (w[35], w[36], offset);
|
|
w[46] = amd_bytealign (w[34], w[35], offset);
|
|
w[45] = amd_bytealign (w[33], w[34], offset);
|
|
w[44] = amd_bytealign (w[32], w[33], offset);
|
|
w[43] = amd_bytealign (w[31], w[32], offset);
|
|
w[42] = amd_bytealign (w[30], w[31], offset);
|
|
w[41] = amd_bytealign (w[29], w[30], offset);
|
|
w[40] = amd_bytealign (w[28], w[29], offset);
|
|
w[39] = amd_bytealign (w[27], w[28], offset);
|
|
w[38] = amd_bytealign (w[26], w[27], offset);
|
|
w[37] = amd_bytealign (w[25], w[26], offset);
|
|
w[36] = amd_bytealign (w[24], w[25], offset);
|
|
w[35] = amd_bytealign (w[23], w[24], offset);
|
|
w[34] = amd_bytealign (w[22], w[23], offset);
|
|
w[33] = amd_bytealign (w[21], w[22], offset);
|
|
w[32] = amd_bytealign (w[20], w[21], offset);
|
|
w[31] = amd_bytealign (w[19], w[20], offset);
|
|
w[30] = amd_bytealign (w[18], w[19], offset);
|
|
w[29] = amd_bytealign (w[17], w[18], offset);
|
|
w[28] = amd_bytealign (w[16], w[17], offset);
|
|
w[27] = amd_bytealign (w[15], w[16], offset);
|
|
w[26] = amd_bytealign (w[14], w[15], offset);
|
|
w[25] = amd_bytealign (w[13], w[14], offset);
|
|
w[24] = amd_bytealign (w[12], w[13], offset);
|
|
w[23] = amd_bytealign (w[11], w[12], offset);
|
|
w[22] = amd_bytealign (w[10], w[11], offset);
|
|
w[21] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[20] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[19] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[18] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[17] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[16] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[15] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[14] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[13] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[12] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[11] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = amd_bytealign (w[50], w[51], offset);
|
|
w[62] = amd_bytealign (w[49], w[50], offset);
|
|
w[61] = amd_bytealign (w[48], w[49], offset);
|
|
w[60] = amd_bytealign (w[47], w[48], offset);
|
|
w[59] = amd_bytealign (w[46], w[47], offset);
|
|
w[58] = amd_bytealign (w[45], w[46], offset);
|
|
w[57] = amd_bytealign (w[44], w[45], offset);
|
|
w[56] = amd_bytealign (w[43], w[44], offset);
|
|
w[55] = amd_bytealign (w[42], w[43], offset);
|
|
w[54] = amd_bytealign (w[41], w[42], offset);
|
|
w[53] = amd_bytealign (w[40], w[41], offset);
|
|
w[52] = amd_bytealign (w[39], w[40], offset);
|
|
w[51] = amd_bytealign (w[38], w[39], offset);
|
|
w[50] = amd_bytealign (w[37], w[38], offset);
|
|
w[49] = amd_bytealign (w[36], w[37], offset);
|
|
w[48] = amd_bytealign (w[35], w[36], offset);
|
|
w[47] = amd_bytealign (w[34], w[35], offset);
|
|
w[46] = amd_bytealign (w[33], w[34], offset);
|
|
w[45] = amd_bytealign (w[32], w[33], offset);
|
|
w[44] = amd_bytealign (w[31], w[32], offset);
|
|
w[43] = amd_bytealign (w[30], w[31], offset);
|
|
w[42] = amd_bytealign (w[29], w[30], offset);
|
|
w[41] = amd_bytealign (w[28], w[29], offset);
|
|
w[40] = amd_bytealign (w[27], w[28], offset);
|
|
w[39] = amd_bytealign (w[26], w[27], offset);
|
|
w[38] = amd_bytealign (w[25], w[26], offset);
|
|
w[37] = amd_bytealign (w[24], w[25], offset);
|
|
w[36] = amd_bytealign (w[23], w[24], offset);
|
|
w[35] = amd_bytealign (w[22], w[23], offset);
|
|
w[34] = amd_bytealign (w[21], w[22], offset);
|
|
w[33] = amd_bytealign (w[20], w[21], offset);
|
|
w[32] = amd_bytealign (w[19], w[20], offset);
|
|
w[31] = amd_bytealign (w[18], w[19], offset);
|
|
w[30] = amd_bytealign (w[17], w[18], offset);
|
|
w[29] = amd_bytealign (w[16], w[17], offset);
|
|
w[28] = amd_bytealign (w[15], w[16], offset);
|
|
w[27] = amd_bytealign (w[14], w[15], offset);
|
|
w[26] = amd_bytealign (w[13], w[14], offset);
|
|
w[25] = amd_bytealign (w[12], w[13], offset);
|
|
w[24] = amd_bytealign (w[11], w[12], offset);
|
|
w[23] = amd_bytealign (w[10], w[11], offset);
|
|
w[22] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[21] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[20] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[19] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[18] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[17] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[16] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[15] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[14] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[13] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[12] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = amd_bytealign (w[49], w[50], offset);
|
|
w[62] = amd_bytealign (w[48], w[49], offset);
|
|
w[61] = amd_bytealign (w[47], w[48], offset);
|
|
w[60] = amd_bytealign (w[46], w[47], offset);
|
|
w[59] = amd_bytealign (w[45], w[46], offset);
|
|
w[58] = amd_bytealign (w[44], w[45], offset);
|
|
w[57] = amd_bytealign (w[43], w[44], offset);
|
|
w[56] = amd_bytealign (w[42], w[43], offset);
|
|
w[55] = amd_bytealign (w[41], w[42], offset);
|
|
w[54] = amd_bytealign (w[40], w[41], offset);
|
|
w[53] = amd_bytealign (w[39], w[40], offset);
|
|
w[52] = amd_bytealign (w[38], w[39], offset);
|
|
w[51] = amd_bytealign (w[37], w[38], offset);
|
|
w[50] = amd_bytealign (w[36], w[37], offset);
|
|
w[49] = amd_bytealign (w[35], w[36], offset);
|
|
w[48] = amd_bytealign (w[34], w[35], offset);
|
|
w[47] = amd_bytealign (w[33], w[34], offset);
|
|
w[46] = amd_bytealign (w[32], w[33], offset);
|
|
w[45] = amd_bytealign (w[31], w[32], offset);
|
|
w[44] = amd_bytealign (w[30], w[31], offset);
|
|
w[43] = amd_bytealign (w[29], w[30], offset);
|
|
w[42] = amd_bytealign (w[28], w[29], offset);
|
|
w[41] = amd_bytealign (w[27], w[28], offset);
|
|
w[40] = amd_bytealign (w[26], w[27], offset);
|
|
w[39] = amd_bytealign (w[25], w[26], offset);
|
|
w[38] = amd_bytealign (w[24], w[25], offset);
|
|
w[37] = amd_bytealign (w[23], w[24], offset);
|
|
w[36] = amd_bytealign (w[22], w[23], offset);
|
|
w[35] = amd_bytealign (w[21], w[22], offset);
|
|
w[34] = amd_bytealign (w[20], w[21], offset);
|
|
w[33] = amd_bytealign (w[19], w[20], offset);
|
|
w[32] = amd_bytealign (w[18], w[19], offset);
|
|
w[31] = amd_bytealign (w[17], w[18], offset);
|
|
w[30] = amd_bytealign (w[16], w[17], offset);
|
|
w[29] = amd_bytealign (w[15], w[16], offset);
|
|
w[28] = amd_bytealign (w[14], w[15], offset);
|
|
w[27] = amd_bytealign (w[13], w[14], offset);
|
|
w[26] = amd_bytealign (w[12], w[13], offset);
|
|
w[25] = amd_bytealign (w[11], w[12], offset);
|
|
w[24] = amd_bytealign (w[10], w[11], offset);
|
|
w[23] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[22] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[21] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[20] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[19] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[18] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[17] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[16] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[15] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[14] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[13] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = amd_bytealign (w[48], w[49], offset);
|
|
w[62] = amd_bytealign (w[47], w[48], offset);
|
|
w[61] = amd_bytealign (w[46], w[47], offset);
|
|
w[60] = amd_bytealign (w[45], w[46], offset);
|
|
w[59] = amd_bytealign (w[44], w[45], offset);
|
|
w[58] = amd_bytealign (w[43], w[44], offset);
|
|
w[57] = amd_bytealign (w[42], w[43], offset);
|
|
w[56] = amd_bytealign (w[41], w[42], offset);
|
|
w[55] = amd_bytealign (w[40], w[41], offset);
|
|
w[54] = amd_bytealign (w[39], w[40], offset);
|
|
w[53] = amd_bytealign (w[38], w[39], offset);
|
|
w[52] = amd_bytealign (w[37], w[38], offset);
|
|
w[51] = amd_bytealign (w[36], w[37], offset);
|
|
w[50] = amd_bytealign (w[35], w[36], offset);
|
|
w[49] = amd_bytealign (w[34], w[35], offset);
|
|
w[48] = amd_bytealign (w[33], w[34], offset);
|
|
w[47] = amd_bytealign (w[32], w[33], offset);
|
|
w[46] = amd_bytealign (w[31], w[32], offset);
|
|
w[45] = amd_bytealign (w[30], w[31], offset);
|
|
w[44] = amd_bytealign (w[29], w[30], offset);
|
|
w[43] = amd_bytealign (w[28], w[29], offset);
|
|
w[42] = amd_bytealign (w[27], w[28], offset);
|
|
w[41] = amd_bytealign (w[26], w[27], offset);
|
|
w[40] = amd_bytealign (w[25], w[26], offset);
|
|
w[39] = amd_bytealign (w[24], w[25], offset);
|
|
w[38] = amd_bytealign (w[23], w[24], offset);
|
|
w[37] = amd_bytealign (w[22], w[23], offset);
|
|
w[36] = amd_bytealign (w[21], w[22], offset);
|
|
w[35] = amd_bytealign (w[20], w[21], offset);
|
|
w[34] = amd_bytealign (w[19], w[20], offset);
|
|
w[33] = amd_bytealign (w[18], w[19], offset);
|
|
w[32] = amd_bytealign (w[17], w[18], offset);
|
|
w[31] = amd_bytealign (w[16], w[17], offset);
|
|
w[30] = amd_bytealign (w[15], w[16], offset);
|
|
w[29] = amd_bytealign (w[14], w[15], offset);
|
|
w[28] = amd_bytealign (w[13], w[14], offset);
|
|
w[27] = amd_bytealign (w[12], w[13], offset);
|
|
w[26] = amd_bytealign (w[11], w[12], offset);
|
|
w[25] = amd_bytealign (w[10], w[11], offset);
|
|
w[24] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[23] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[22] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[21] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[20] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[19] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[18] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[17] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[16] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[15] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[14] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = amd_bytealign (w[47], w[48], offset);
|
|
w[62] = amd_bytealign (w[46], w[47], offset);
|
|
w[61] = amd_bytealign (w[45], w[46], offset);
|
|
w[60] = amd_bytealign (w[44], w[45], offset);
|
|
w[59] = amd_bytealign (w[43], w[44], offset);
|
|
w[58] = amd_bytealign (w[42], w[43], offset);
|
|
w[57] = amd_bytealign (w[41], w[42], offset);
|
|
w[56] = amd_bytealign (w[40], w[41], offset);
|
|
w[55] = amd_bytealign (w[39], w[40], offset);
|
|
w[54] = amd_bytealign (w[38], w[39], offset);
|
|
w[53] = amd_bytealign (w[37], w[38], offset);
|
|
w[52] = amd_bytealign (w[36], w[37], offset);
|
|
w[51] = amd_bytealign (w[35], w[36], offset);
|
|
w[50] = amd_bytealign (w[34], w[35], offset);
|
|
w[49] = amd_bytealign (w[33], w[34], offset);
|
|
w[48] = amd_bytealign (w[32], w[33], offset);
|
|
w[47] = amd_bytealign (w[31], w[32], offset);
|
|
w[46] = amd_bytealign (w[30], w[31], offset);
|
|
w[45] = amd_bytealign (w[29], w[30], offset);
|
|
w[44] = amd_bytealign (w[28], w[29], offset);
|
|
w[43] = amd_bytealign (w[27], w[28], offset);
|
|
w[42] = amd_bytealign (w[26], w[27], offset);
|
|
w[41] = amd_bytealign (w[25], w[26], offset);
|
|
w[40] = amd_bytealign (w[24], w[25], offset);
|
|
w[39] = amd_bytealign (w[23], w[24], offset);
|
|
w[38] = amd_bytealign (w[22], w[23], offset);
|
|
w[37] = amd_bytealign (w[21], w[22], offset);
|
|
w[36] = amd_bytealign (w[20], w[21], offset);
|
|
w[35] = amd_bytealign (w[19], w[20], offset);
|
|
w[34] = amd_bytealign (w[18], w[19], offset);
|
|
w[33] = amd_bytealign (w[17], w[18], offset);
|
|
w[32] = amd_bytealign (w[16], w[17], offset);
|
|
w[31] = amd_bytealign (w[15], w[16], offset);
|
|
w[30] = amd_bytealign (w[14], w[15], offset);
|
|
w[29] = amd_bytealign (w[13], w[14], offset);
|
|
w[28] = amd_bytealign (w[12], w[13], offset);
|
|
w[27] = amd_bytealign (w[11], w[12], offset);
|
|
w[26] = amd_bytealign (w[10], w[11], offset);
|
|
w[25] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[24] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[23] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[22] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[21] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[20] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[19] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[18] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[17] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[16] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[15] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = amd_bytealign (w[46], w[47], offset);
|
|
w[62] = amd_bytealign (w[45], w[46], offset);
|
|
w[61] = amd_bytealign (w[44], w[45], offset);
|
|
w[60] = amd_bytealign (w[43], w[44], offset);
|
|
w[59] = amd_bytealign (w[42], w[43], offset);
|
|
w[58] = amd_bytealign (w[41], w[42], offset);
|
|
w[57] = amd_bytealign (w[40], w[41], offset);
|
|
w[56] = amd_bytealign (w[39], w[40], offset);
|
|
w[55] = amd_bytealign (w[38], w[39], offset);
|
|
w[54] = amd_bytealign (w[37], w[38], offset);
|
|
w[53] = amd_bytealign (w[36], w[37], offset);
|
|
w[52] = amd_bytealign (w[35], w[36], offset);
|
|
w[51] = amd_bytealign (w[34], w[35], offset);
|
|
w[50] = amd_bytealign (w[33], w[34], offset);
|
|
w[49] = amd_bytealign (w[32], w[33], offset);
|
|
w[48] = amd_bytealign (w[31], w[32], offset);
|
|
w[47] = amd_bytealign (w[30], w[31], offset);
|
|
w[46] = amd_bytealign (w[29], w[30], offset);
|
|
w[45] = amd_bytealign (w[28], w[29], offset);
|
|
w[44] = amd_bytealign (w[27], w[28], offset);
|
|
w[43] = amd_bytealign (w[26], w[27], offset);
|
|
w[42] = amd_bytealign (w[25], w[26], offset);
|
|
w[41] = amd_bytealign (w[24], w[25], offset);
|
|
w[40] = amd_bytealign (w[23], w[24], offset);
|
|
w[39] = amd_bytealign (w[22], w[23], offset);
|
|
w[38] = amd_bytealign (w[21], w[22], offset);
|
|
w[37] = amd_bytealign (w[20], w[21], offset);
|
|
w[36] = amd_bytealign (w[19], w[20], offset);
|
|
w[35] = amd_bytealign (w[18], w[19], offset);
|
|
w[34] = amd_bytealign (w[17], w[18], offset);
|
|
w[33] = amd_bytealign (w[16], w[17], offset);
|
|
w[32] = amd_bytealign (w[15], w[16], offset);
|
|
w[31] = amd_bytealign (w[14], w[15], offset);
|
|
w[30] = amd_bytealign (w[13], w[14], offset);
|
|
w[29] = amd_bytealign (w[12], w[13], offset);
|
|
w[28] = amd_bytealign (w[11], w[12], offset);
|
|
w[27] = amd_bytealign (w[10], w[11], offset);
|
|
w[26] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[25] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[24] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[23] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[22] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[21] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[20] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[19] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[18] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[17] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[16] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = amd_bytealign (w[45], w[46], offset);
|
|
w[62] = amd_bytealign (w[44], w[45], offset);
|
|
w[61] = amd_bytealign (w[43], w[44], offset);
|
|
w[60] = amd_bytealign (w[42], w[43], offset);
|
|
w[59] = amd_bytealign (w[41], w[42], offset);
|
|
w[58] = amd_bytealign (w[40], w[41], offset);
|
|
w[57] = amd_bytealign (w[39], w[40], offset);
|
|
w[56] = amd_bytealign (w[38], w[39], offset);
|
|
w[55] = amd_bytealign (w[37], w[38], offset);
|
|
w[54] = amd_bytealign (w[36], w[37], offset);
|
|
w[53] = amd_bytealign (w[35], w[36], offset);
|
|
w[52] = amd_bytealign (w[34], w[35], offset);
|
|
w[51] = amd_bytealign (w[33], w[34], offset);
|
|
w[50] = amd_bytealign (w[32], w[33], offset);
|
|
w[49] = amd_bytealign (w[31], w[32], offset);
|
|
w[48] = amd_bytealign (w[30], w[31], offset);
|
|
w[47] = amd_bytealign (w[29], w[30], offset);
|
|
w[46] = amd_bytealign (w[28], w[29], offset);
|
|
w[45] = amd_bytealign (w[27], w[28], offset);
|
|
w[44] = amd_bytealign (w[26], w[27], offset);
|
|
w[43] = amd_bytealign (w[25], w[26], offset);
|
|
w[42] = amd_bytealign (w[24], w[25], offset);
|
|
w[41] = amd_bytealign (w[23], w[24], offset);
|
|
w[40] = amd_bytealign (w[22], w[23], offset);
|
|
w[39] = amd_bytealign (w[21], w[22], offset);
|
|
w[38] = amd_bytealign (w[20], w[21], offset);
|
|
w[37] = amd_bytealign (w[19], w[20], offset);
|
|
w[36] = amd_bytealign (w[18], w[19], offset);
|
|
w[35] = amd_bytealign (w[17], w[18], offset);
|
|
w[34] = amd_bytealign (w[16], w[17], offset);
|
|
w[33] = amd_bytealign (w[15], w[16], offset);
|
|
w[32] = amd_bytealign (w[14], w[15], offset);
|
|
w[31] = amd_bytealign (w[13], w[14], offset);
|
|
w[30] = amd_bytealign (w[12], w[13], offset);
|
|
w[29] = amd_bytealign (w[11], w[12], offset);
|
|
w[28] = amd_bytealign (w[10], w[11], offset);
|
|
w[27] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[26] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[25] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[24] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[23] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[22] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[21] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[20] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[19] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[18] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[17] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = amd_bytealign (w[44], w[45], offset);
|
|
w[62] = amd_bytealign (w[43], w[44], offset);
|
|
w[61] = amd_bytealign (w[42], w[43], offset);
|
|
w[60] = amd_bytealign (w[41], w[42], offset);
|
|
w[59] = amd_bytealign (w[40], w[41], offset);
|
|
w[58] = amd_bytealign (w[39], w[40], offset);
|
|
w[57] = amd_bytealign (w[38], w[39], offset);
|
|
w[56] = amd_bytealign (w[37], w[38], offset);
|
|
w[55] = amd_bytealign (w[36], w[37], offset);
|
|
w[54] = amd_bytealign (w[35], w[36], offset);
|
|
w[53] = amd_bytealign (w[34], w[35], offset);
|
|
w[52] = amd_bytealign (w[33], w[34], offset);
|
|
w[51] = amd_bytealign (w[32], w[33], offset);
|
|
w[50] = amd_bytealign (w[31], w[32], offset);
|
|
w[49] = amd_bytealign (w[30], w[31], offset);
|
|
w[48] = amd_bytealign (w[29], w[30], offset);
|
|
w[47] = amd_bytealign (w[28], w[29], offset);
|
|
w[46] = amd_bytealign (w[27], w[28], offset);
|
|
w[45] = amd_bytealign (w[26], w[27], offset);
|
|
w[44] = amd_bytealign (w[25], w[26], offset);
|
|
w[43] = amd_bytealign (w[24], w[25], offset);
|
|
w[42] = amd_bytealign (w[23], w[24], offset);
|
|
w[41] = amd_bytealign (w[22], w[23], offset);
|
|
w[40] = amd_bytealign (w[21], w[22], offset);
|
|
w[39] = amd_bytealign (w[20], w[21], offset);
|
|
w[38] = amd_bytealign (w[19], w[20], offset);
|
|
w[37] = amd_bytealign (w[18], w[19], offset);
|
|
w[36] = amd_bytealign (w[17], w[18], offset);
|
|
w[35] = amd_bytealign (w[16], w[17], offset);
|
|
w[34] = amd_bytealign (w[15], w[16], offset);
|
|
w[33] = amd_bytealign (w[14], w[15], offset);
|
|
w[32] = amd_bytealign (w[13], w[14], offset);
|
|
w[31] = amd_bytealign (w[12], w[13], offset);
|
|
w[30] = amd_bytealign (w[11], w[12], offset);
|
|
w[29] = amd_bytealign (w[10], w[11], offset);
|
|
w[28] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[27] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[26] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[25] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[24] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[23] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[22] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[21] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[20] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[19] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[18] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = amd_bytealign (w[43], w[44], offset);
|
|
w[62] = amd_bytealign (w[42], w[43], offset);
|
|
w[61] = amd_bytealign (w[41], w[42], offset);
|
|
w[60] = amd_bytealign (w[40], w[41], offset);
|
|
w[59] = amd_bytealign (w[39], w[40], offset);
|
|
w[58] = amd_bytealign (w[38], w[39], offset);
|
|
w[57] = amd_bytealign (w[37], w[38], offset);
|
|
w[56] = amd_bytealign (w[36], w[37], offset);
|
|
w[55] = amd_bytealign (w[35], w[36], offset);
|
|
w[54] = amd_bytealign (w[34], w[35], offset);
|
|
w[53] = amd_bytealign (w[33], w[34], offset);
|
|
w[52] = amd_bytealign (w[32], w[33], offset);
|
|
w[51] = amd_bytealign (w[31], w[32], offset);
|
|
w[50] = amd_bytealign (w[30], w[31], offset);
|
|
w[49] = amd_bytealign (w[29], w[30], offset);
|
|
w[48] = amd_bytealign (w[28], w[29], offset);
|
|
w[47] = amd_bytealign (w[27], w[28], offset);
|
|
w[46] = amd_bytealign (w[26], w[27], offset);
|
|
w[45] = amd_bytealign (w[25], w[26], offset);
|
|
w[44] = amd_bytealign (w[24], w[25], offset);
|
|
w[43] = amd_bytealign (w[23], w[24], offset);
|
|
w[42] = amd_bytealign (w[22], w[23], offset);
|
|
w[41] = amd_bytealign (w[21], w[22], offset);
|
|
w[40] = amd_bytealign (w[20], w[21], offset);
|
|
w[39] = amd_bytealign (w[19], w[20], offset);
|
|
w[38] = amd_bytealign (w[18], w[19], offset);
|
|
w[37] = amd_bytealign (w[17], w[18], offset);
|
|
w[36] = amd_bytealign (w[16], w[17], offset);
|
|
w[35] = amd_bytealign (w[15], w[16], offset);
|
|
w[34] = amd_bytealign (w[14], w[15], offset);
|
|
w[33] = amd_bytealign (w[13], w[14], offset);
|
|
w[32] = amd_bytealign (w[12], w[13], offset);
|
|
w[31] = amd_bytealign (w[11], w[12], offset);
|
|
w[30] = amd_bytealign (w[10], w[11], offset);
|
|
w[29] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[28] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[27] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[26] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[25] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[24] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[23] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[22] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[21] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[20] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[19] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = amd_bytealign (w[42], w[43], offset);
|
|
w[62] = amd_bytealign (w[41], w[42], offset);
|
|
w[61] = amd_bytealign (w[40], w[41], offset);
|
|
w[60] = amd_bytealign (w[39], w[40], offset);
|
|
w[59] = amd_bytealign (w[38], w[39], offset);
|
|
w[58] = amd_bytealign (w[37], w[38], offset);
|
|
w[57] = amd_bytealign (w[36], w[37], offset);
|
|
w[56] = amd_bytealign (w[35], w[36], offset);
|
|
w[55] = amd_bytealign (w[34], w[35], offset);
|
|
w[54] = amd_bytealign (w[33], w[34], offset);
|
|
w[53] = amd_bytealign (w[32], w[33], offset);
|
|
w[52] = amd_bytealign (w[31], w[32], offset);
|
|
w[51] = amd_bytealign (w[30], w[31], offset);
|
|
w[50] = amd_bytealign (w[29], w[30], offset);
|
|
w[49] = amd_bytealign (w[28], w[29], offset);
|
|
w[48] = amd_bytealign (w[27], w[28], offset);
|
|
w[47] = amd_bytealign (w[26], w[27], offset);
|
|
w[46] = amd_bytealign (w[25], w[26], offset);
|
|
w[45] = amd_bytealign (w[24], w[25], offset);
|
|
w[44] = amd_bytealign (w[23], w[24], offset);
|
|
w[43] = amd_bytealign (w[22], w[23], offset);
|
|
w[42] = amd_bytealign (w[21], w[22], offset);
|
|
w[41] = amd_bytealign (w[20], w[21], offset);
|
|
w[40] = amd_bytealign (w[19], w[20], offset);
|
|
w[39] = amd_bytealign (w[18], w[19], offset);
|
|
w[38] = amd_bytealign (w[17], w[18], offset);
|
|
w[37] = amd_bytealign (w[16], w[17], offset);
|
|
w[36] = amd_bytealign (w[15], w[16], offset);
|
|
w[35] = amd_bytealign (w[14], w[15], offset);
|
|
w[34] = amd_bytealign (w[13], w[14], offset);
|
|
w[33] = amd_bytealign (w[12], w[13], offset);
|
|
w[32] = amd_bytealign (w[11], w[12], offset);
|
|
w[31] = amd_bytealign (w[10], w[11], offset);
|
|
w[30] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[29] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[28] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[27] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[26] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[25] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[24] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[23] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[22] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[21] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[20] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = amd_bytealign (w[41], w[42], offset);
|
|
w[62] = amd_bytealign (w[40], w[41], offset);
|
|
w[61] = amd_bytealign (w[39], w[40], offset);
|
|
w[60] = amd_bytealign (w[38], w[39], offset);
|
|
w[59] = amd_bytealign (w[37], w[38], offset);
|
|
w[58] = amd_bytealign (w[36], w[37], offset);
|
|
w[57] = amd_bytealign (w[35], w[36], offset);
|
|
w[56] = amd_bytealign (w[34], w[35], offset);
|
|
w[55] = amd_bytealign (w[33], w[34], offset);
|
|
w[54] = amd_bytealign (w[32], w[33], offset);
|
|
w[53] = amd_bytealign (w[31], w[32], offset);
|
|
w[52] = amd_bytealign (w[30], w[31], offset);
|
|
w[51] = amd_bytealign (w[29], w[30], offset);
|
|
w[50] = amd_bytealign (w[28], w[29], offset);
|
|
w[49] = amd_bytealign (w[27], w[28], offset);
|
|
w[48] = amd_bytealign (w[26], w[27], offset);
|
|
w[47] = amd_bytealign (w[25], w[26], offset);
|
|
w[46] = amd_bytealign (w[24], w[25], offset);
|
|
w[45] = amd_bytealign (w[23], w[24], offset);
|
|
w[44] = amd_bytealign (w[22], w[23], offset);
|
|
w[43] = amd_bytealign (w[21], w[22], offset);
|
|
w[42] = amd_bytealign (w[20], w[21], offset);
|
|
w[41] = amd_bytealign (w[19], w[20], offset);
|
|
w[40] = amd_bytealign (w[18], w[19], offset);
|
|
w[39] = amd_bytealign (w[17], w[18], offset);
|
|
w[38] = amd_bytealign (w[16], w[17], offset);
|
|
w[37] = amd_bytealign (w[15], w[16], offset);
|
|
w[36] = amd_bytealign (w[14], w[15], offset);
|
|
w[35] = amd_bytealign (w[13], w[14], offset);
|
|
w[34] = amd_bytealign (w[12], w[13], offset);
|
|
w[33] = amd_bytealign (w[11], w[12], offset);
|
|
w[32] = amd_bytealign (w[10], w[11], offset);
|
|
w[31] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[30] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[29] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[28] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[27] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[26] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[25] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[24] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[23] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[22] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[21] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = amd_bytealign (w[40], w[41], offset);
|
|
w[62] = amd_bytealign (w[39], w[40], offset);
|
|
w[61] = amd_bytealign (w[38], w[39], offset);
|
|
w[60] = amd_bytealign (w[37], w[38], offset);
|
|
w[59] = amd_bytealign (w[36], w[37], offset);
|
|
w[58] = amd_bytealign (w[35], w[36], offset);
|
|
w[57] = amd_bytealign (w[34], w[35], offset);
|
|
w[56] = amd_bytealign (w[33], w[34], offset);
|
|
w[55] = amd_bytealign (w[32], w[33], offset);
|
|
w[54] = amd_bytealign (w[31], w[32], offset);
|
|
w[53] = amd_bytealign (w[30], w[31], offset);
|
|
w[52] = amd_bytealign (w[29], w[30], offset);
|
|
w[51] = amd_bytealign (w[28], w[29], offset);
|
|
w[50] = amd_bytealign (w[27], w[28], offset);
|
|
w[49] = amd_bytealign (w[26], w[27], offset);
|
|
w[48] = amd_bytealign (w[25], w[26], offset);
|
|
w[47] = amd_bytealign (w[24], w[25], offset);
|
|
w[46] = amd_bytealign (w[23], w[24], offset);
|
|
w[45] = amd_bytealign (w[22], w[23], offset);
|
|
w[44] = amd_bytealign (w[21], w[22], offset);
|
|
w[43] = amd_bytealign (w[20], w[21], offset);
|
|
w[42] = amd_bytealign (w[19], w[20], offset);
|
|
w[41] = amd_bytealign (w[18], w[19], offset);
|
|
w[40] = amd_bytealign (w[17], w[18], offset);
|
|
w[39] = amd_bytealign (w[16], w[17], offset);
|
|
w[38] = amd_bytealign (w[15], w[16], offset);
|
|
w[37] = amd_bytealign (w[14], w[15], offset);
|
|
w[36] = amd_bytealign (w[13], w[14], offset);
|
|
w[35] = amd_bytealign (w[12], w[13], offset);
|
|
w[34] = amd_bytealign (w[11], w[12], offset);
|
|
w[33] = amd_bytealign (w[10], w[11], offset);
|
|
w[32] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[31] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[30] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[29] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[28] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[27] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[26] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[25] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[24] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[23] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[22] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = amd_bytealign (w[39], w[40], offset);
|
|
w[62] = amd_bytealign (w[38], w[39], offset);
|
|
w[61] = amd_bytealign (w[37], w[38], offset);
|
|
w[60] = amd_bytealign (w[36], w[37], offset);
|
|
w[59] = amd_bytealign (w[35], w[36], offset);
|
|
w[58] = amd_bytealign (w[34], w[35], offset);
|
|
w[57] = amd_bytealign (w[33], w[34], offset);
|
|
w[56] = amd_bytealign (w[32], w[33], offset);
|
|
w[55] = amd_bytealign (w[31], w[32], offset);
|
|
w[54] = amd_bytealign (w[30], w[31], offset);
|
|
w[53] = amd_bytealign (w[29], w[30], offset);
|
|
w[52] = amd_bytealign (w[28], w[29], offset);
|
|
w[51] = amd_bytealign (w[27], w[28], offset);
|
|
w[50] = amd_bytealign (w[26], w[27], offset);
|
|
w[49] = amd_bytealign (w[25], w[26], offset);
|
|
w[48] = amd_bytealign (w[24], w[25], offset);
|
|
w[47] = amd_bytealign (w[23], w[24], offset);
|
|
w[46] = amd_bytealign (w[22], w[23], offset);
|
|
w[45] = amd_bytealign (w[21], w[22], offset);
|
|
w[44] = amd_bytealign (w[20], w[21], offset);
|
|
w[43] = amd_bytealign (w[19], w[20], offset);
|
|
w[42] = amd_bytealign (w[18], w[19], offset);
|
|
w[41] = amd_bytealign (w[17], w[18], offset);
|
|
w[40] = amd_bytealign (w[16], w[17], offset);
|
|
w[39] = amd_bytealign (w[15], w[16], offset);
|
|
w[38] = amd_bytealign (w[14], w[15], offset);
|
|
w[37] = amd_bytealign (w[13], w[14], offset);
|
|
w[36] = amd_bytealign (w[12], w[13], offset);
|
|
w[35] = amd_bytealign (w[11], w[12], offset);
|
|
w[34] = amd_bytealign (w[10], w[11], offset);
|
|
w[33] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[32] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[31] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[30] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[29] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[28] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[27] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[26] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[25] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[24] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[23] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = amd_bytealign (w[38], w[39], offset);
|
|
w[62] = amd_bytealign (w[37], w[38], offset);
|
|
w[61] = amd_bytealign (w[36], w[37], offset);
|
|
w[60] = amd_bytealign (w[35], w[36], offset);
|
|
w[59] = amd_bytealign (w[34], w[35], offset);
|
|
w[58] = amd_bytealign (w[33], w[34], offset);
|
|
w[57] = amd_bytealign (w[32], w[33], offset);
|
|
w[56] = amd_bytealign (w[31], w[32], offset);
|
|
w[55] = amd_bytealign (w[30], w[31], offset);
|
|
w[54] = amd_bytealign (w[29], w[30], offset);
|
|
w[53] = amd_bytealign (w[28], w[29], offset);
|
|
w[52] = amd_bytealign (w[27], w[28], offset);
|
|
w[51] = amd_bytealign (w[26], w[27], offset);
|
|
w[50] = amd_bytealign (w[25], w[26], offset);
|
|
w[49] = amd_bytealign (w[24], w[25], offset);
|
|
w[48] = amd_bytealign (w[23], w[24], offset);
|
|
w[47] = amd_bytealign (w[22], w[23], offset);
|
|
w[46] = amd_bytealign (w[21], w[22], offset);
|
|
w[45] = amd_bytealign (w[20], w[21], offset);
|
|
w[44] = amd_bytealign (w[19], w[20], offset);
|
|
w[43] = amd_bytealign (w[18], w[19], offset);
|
|
w[42] = amd_bytealign (w[17], w[18], offset);
|
|
w[41] = amd_bytealign (w[16], w[17], offset);
|
|
w[40] = amd_bytealign (w[15], w[16], offset);
|
|
w[39] = amd_bytealign (w[14], w[15], offset);
|
|
w[38] = amd_bytealign (w[13], w[14], offset);
|
|
w[37] = amd_bytealign (w[12], w[13], offset);
|
|
w[36] = amd_bytealign (w[11], w[12], offset);
|
|
w[35] = amd_bytealign (w[10], w[11], offset);
|
|
w[34] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[33] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[32] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[31] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[30] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[29] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[28] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[27] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[26] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[25] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[24] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = amd_bytealign (w[37], w[38], offset);
|
|
w[62] = amd_bytealign (w[36], w[37], offset);
|
|
w[61] = amd_bytealign (w[35], w[36], offset);
|
|
w[60] = amd_bytealign (w[34], w[35], offset);
|
|
w[59] = amd_bytealign (w[33], w[34], offset);
|
|
w[58] = amd_bytealign (w[32], w[33], offset);
|
|
w[57] = amd_bytealign (w[31], w[32], offset);
|
|
w[56] = amd_bytealign (w[30], w[31], offset);
|
|
w[55] = amd_bytealign (w[29], w[30], offset);
|
|
w[54] = amd_bytealign (w[28], w[29], offset);
|
|
w[53] = amd_bytealign (w[27], w[28], offset);
|
|
w[52] = amd_bytealign (w[26], w[27], offset);
|
|
w[51] = amd_bytealign (w[25], w[26], offset);
|
|
w[50] = amd_bytealign (w[24], w[25], offset);
|
|
w[49] = amd_bytealign (w[23], w[24], offset);
|
|
w[48] = amd_bytealign (w[22], w[23], offset);
|
|
w[47] = amd_bytealign (w[21], w[22], offset);
|
|
w[46] = amd_bytealign (w[20], w[21], offset);
|
|
w[45] = amd_bytealign (w[19], w[20], offset);
|
|
w[44] = amd_bytealign (w[18], w[19], offset);
|
|
w[43] = amd_bytealign (w[17], w[18], offset);
|
|
w[42] = amd_bytealign (w[16], w[17], offset);
|
|
w[41] = amd_bytealign (w[15], w[16], offset);
|
|
w[40] = amd_bytealign (w[14], w[15], offset);
|
|
w[39] = amd_bytealign (w[13], w[14], offset);
|
|
w[38] = amd_bytealign (w[12], w[13], offset);
|
|
w[37] = amd_bytealign (w[11], w[12], offset);
|
|
w[36] = amd_bytealign (w[10], w[11], offset);
|
|
w[35] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[34] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[33] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[32] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[31] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[30] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[29] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[28] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[27] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[26] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[25] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = amd_bytealign (w[36], w[37], offset);
|
|
w[62] = amd_bytealign (w[35], w[36], offset);
|
|
w[61] = amd_bytealign (w[34], w[35], offset);
|
|
w[60] = amd_bytealign (w[33], w[34], offset);
|
|
w[59] = amd_bytealign (w[32], w[33], offset);
|
|
w[58] = amd_bytealign (w[31], w[32], offset);
|
|
w[57] = amd_bytealign (w[30], w[31], offset);
|
|
w[56] = amd_bytealign (w[29], w[30], offset);
|
|
w[55] = amd_bytealign (w[28], w[29], offset);
|
|
w[54] = amd_bytealign (w[27], w[28], offset);
|
|
w[53] = amd_bytealign (w[26], w[27], offset);
|
|
w[52] = amd_bytealign (w[25], w[26], offset);
|
|
w[51] = amd_bytealign (w[24], w[25], offset);
|
|
w[50] = amd_bytealign (w[23], w[24], offset);
|
|
w[49] = amd_bytealign (w[22], w[23], offset);
|
|
w[48] = amd_bytealign (w[21], w[22], offset);
|
|
w[47] = amd_bytealign (w[20], w[21], offset);
|
|
w[46] = amd_bytealign (w[19], w[20], offset);
|
|
w[45] = amd_bytealign (w[18], w[19], offset);
|
|
w[44] = amd_bytealign (w[17], w[18], offset);
|
|
w[43] = amd_bytealign (w[16], w[17], offset);
|
|
w[42] = amd_bytealign (w[15], w[16], offset);
|
|
w[41] = amd_bytealign (w[14], w[15], offset);
|
|
w[40] = amd_bytealign (w[13], w[14], offset);
|
|
w[39] = amd_bytealign (w[12], w[13], offset);
|
|
w[38] = amd_bytealign (w[11], w[12], offset);
|
|
w[37] = amd_bytealign (w[10], w[11], offset);
|
|
w[36] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[35] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[34] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[33] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[32] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[31] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[30] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[29] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[28] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[27] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[26] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = amd_bytealign (w[35], w[36], offset);
|
|
w[62] = amd_bytealign (w[34], w[35], offset);
|
|
w[61] = amd_bytealign (w[33], w[34], offset);
|
|
w[60] = amd_bytealign (w[32], w[33], offset);
|
|
w[59] = amd_bytealign (w[31], w[32], offset);
|
|
w[58] = amd_bytealign (w[30], w[31], offset);
|
|
w[57] = amd_bytealign (w[29], w[30], offset);
|
|
w[56] = amd_bytealign (w[28], w[29], offset);
|
|
w[55] = amd_bytealign (w[27], w[28], offset);
|
|
w[54] = amd_bytealign (w[26], w[27], offset);
|
|
w[53] = amd_bytealign (w[25], w[26], offset);
|
|
w[52] = amd_bytealign (w[24], w[25], offset);
|
|
w[51] = amd_bytealign (w[23], w[24], offset);
|
|
w[50] = amd_bytealign (w[22], w[23], offset);
|
|
w[49] = amd_bytealign (w[21], w[22], offset);
|
|
w[48] = amd_bytealign (w[20], w[21], offset);
|
|
w[47] = amd_bytealign (w[19], w[20], offset);
|
|
w[46] = amd_bytealign (w[18], w[19], offset);
|
|
w[45] = amd_bytealign (w[17], w[18], offset);
|
|
w[44] = amd_bytealign (w[16], w[17], offset);
|
|
w[43] = amd_bytealign (w[15], w[16], offset);
|
|
w[42] = amd_bytealign (w[14], w[15], offset);
|
|
w[41] = amd_bytealign (w[13], w[14], offset);
|
|
w[40] = amd_bytealign (w[12], w[13], offset);
|
|
w[39] = amd_bytealign (w[11], w[12], offset);
|
|
w[38] = amd_bytealign (w[10], w[11], offset);
|
|
w[37] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[36] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[35] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[34] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[33] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[32] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[31] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[30] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[29] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[28] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[27] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = amd_bytealign (w[34], w[35], offset);
|
|
w[62] = amd_bytealign (w[33], w[34], offset);
|
|
w[61] = amd_bytealign (w[32], w[33], offset);
|
|
w[60] = amd_bytealign (w[31], w[32], offset);
|
|
w[59] = amd_bytealign (w[30], w[31], offset);
|
|
w[58] = amd_bytealign (w[29], w[30], offset);
|
|
w[57] = amd_bytealign (w[28], w[29], offset);
|
|
w[56] = amd_bytealign (w[27], w[28], offset);
|
|
w[55] = amd_bytealign (w[26], w[27], offset);
|
|
w[54] = amd_bytealign (w[25], w[26], offset);
|
|
w[53] = amd_bytealign (w[24], w[25], offset);
|
|
w[52] = amd_bytealign (w[23], w[24], offset);
|
|
w[51] = amd_bytealign (w[22], w[23], offset);
|
|
w[50] = amd_bytealign (w[21], w[22], offset);
|
|
w[49] = amd_bytealign (w[20], w[21], offset);
|
|
w[48] = amd_bytealign (w[19], w[20], offset);
|
|
w[47] = amd_bytealign (w[18], w[19], offset);
|
|
w[46] = amd_bytealign (w[17], w[18], offset);
|
|
w[45] = amd_bytealign (w[16], w[17], offset);
|
|
w[44] = amd_bytealign (w[15], w[16], offset);
|
|
w[43] = amd_bytealign (w[14], w[15], offset);
|
|
w[42] = amd_bytealign (w[13], w[14], offset);
|
|
w[41] = amd_bytealign (w[12], w[13], offset);
|
|
w[40] = amd_bytealign (w[11], w[12], offset);
|
|
w[39] = amd_bytealign (w[10], w[11], offset);
|
|
w[38] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[37] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[36] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[35] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[34] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[33] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[32] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[31] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[30] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[29] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[28] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = amd_bytealign (w[33], w[34], offset);
|
|
w[62] = amd_bytealign (w[32], w[33], offset);
|
|
w[61] = amd_bytealign (w[31], w[32], offset);
|
|
w[60] = amd_bytealign (w[30], w[31], offset);
|
|
w[59] = amd_bytealign (w[29], w[30], offset);
|
|
w[58] = amd_bytealign (w[28], w[29], offset);
|
|
w[57] = amd_bytealign (w[27], w[28], offset);
|
|
w[56] = amd_bytealign (w[26], w[27], offset);
|
|
w[55] = amd_bytealign (w[25], w[26], offset);
|
|
w[54] = amd_bytealign (w[24], w[25], offset);
|
|
w[53] = amd_bytealign (w[23], w[24], offset);
|
|
w[52] = amd_bytealign (w[22], w[23], offset);
|
|
w[51] = amd_bytealign (w[21], w[22], offset);
|
|
w[50] = amd_bytealign (w[20], w[21], offset);
|
|
w[49] = amd_bytealign (w[19], w[20], offset);
|
|
w[48] = amd_bytealign (w[18], w[19], offset);
|
|
w[47] = amd_bytealign (w[17], w[18], offset);
|
|
w[46] = amd_bytealign (w[16], w[17], offset);
|
|
w[45] = amd_bytealign (w[15], w[16], offset);
|
|
w[44] = amd_bytealign (w[14], w[15], offset);
|
|
w[43] = amd_bytealign (w[13], w[14], offset);
|
|
w[42] = amd_bytealign (w[12], w[13], offset);
|
|
w[41] = amd_bytealign (w[11], w[12], offset);
|
|
w[40] = amd_bytealign (w[10], w[11], offset);
|
|
w[39] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[38] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[37] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[36] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[35] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[34] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[33] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[32] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[31] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[30] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[29] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = amd_bytealign (w[32], w[33], offset);
|
|
w[62] = amd_bytealign (w[31], w[32], offset);
|
|
w[61] = amd_bytealign (w[30], w[31], offset);
|
|
w[60] = amd_bytealign (w[29], w[30], offset);
|
|
w[59] = amd_bytealign (w[28], w[29], offset);
|
|
w[58] = amd_bytealign (w[27], w[28], offset);
|
|
w[57] = amd_bytealign (w[26], w[27], offset);
|
|
w[56] = amd_bytealign (w[25], w[26], offset);
|
|
w[55] = amd_bytealign (w[24], w[25], offset);
|
|
w[54] = amd_bytealign (w[23], w[24], offset);
|
|
w[53] = amd_bytealign (w[22], w[23], offset);
|
|
w[52] = amd_bytealign (w[21], w[22], offset);
|
|
w[51] = amd_bytealign (w[20], w[21], offset);
|
|
w[50] = amd_bytealign (w[19], w[20], offset);
|
|
w[49] = amd_bytealign (w[18], w[19], offset);
|
|
w[48] = amd_bytealign (w[17], w[18], offset);
|
|
w[47] = amd_bytealign (w[16], w[17], offset);
|
|
w[46] = amd_bytealign (w[15], w[16], offset);
|
|
w[45] = amd_bytealign (w[14], w[15], offset);
|
|
w[44] = amd_bytealign (w[13], w[14], offset);
|
|
w[43] = amd_bytealign (w[12], w[13], offset);
|
|
w[42] = amd_bytealign (w[11], w[12], offset);
|
|
w[41] = amd_bytealign (w[10], w[11], offset);
|
|
w[40] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[39] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[38] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[37] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[36] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[35] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[34] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[33] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[32] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[31] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[30] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = amd_bytealign (w[31], w[32], offset);
|
|
w[62] = amd_bytealign (w[30], w[31], offset);
|
|
w[61] = amd_bytealign (w[29], w[30], offset);
|
|
w[60] = amd_bytealign (w[28], w[29], offset);
|
|
w[59] = amd_bytealign (w[27], w[28], offset);
|
|
w[58] = amd_bytealign (w[26], w[27], offset);
|
|
w[57] = amd_bytealign (w[25], w[26], offset);
|
|
w[56] = amd_bytealign (w[24], w[25], offset);
|
|
w[55] = amd_bytealign (w[23], w[24], offset);
|
|
w[54] = amd_bytealign (w[22], w[23], offset);
|
|
w[53] = amd_bytealign (w[21], w[22], offset);
|
|
w[52] = amd_bytealign (w[20], w[21], offset);
|
|
w[51] = amd_bytealign (w[19], w[20], offset);
|
|
w[50] = amd_bytealign (w[18], w[19], offset);
|
|
w[49] = amd_bytealign (w[17], w[18], offset);
|
|
w[48] = amd_bytealign (w[16], w[17], offset);
|
|
w[47] = amd_bytealign (w[15], w[16], offset);
|
|
w[46] = amd_bytealign (w[14], w[15], offset);
|
|
w[45] = amd_bytealign (w[13], w[14], offset);
|
|
w[44] = amd_bytealign (w[12], w[13], offset);
|
|
w[43] = amd_bytealign (w[11], w[12], offset);
|
|
w[42] = amd_bytealign (w[10], w[11], offset);
|
|
w[41] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[40] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[39] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[38] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[37] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[36] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[35] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[34] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[33] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[32] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[31] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = amd_bytealign (w[30], w[31], offset);
|
|
w[62] = amd_bytealign (w[29], w[30], offset);
|
|
w[61] = amd_bytealign (w[28], w[29], offset);
|
|
w[60] = amd_bytealign (w[27], w[28], offset);
|
|
w[59] = amd_bytealign (w[26], w[27], offset);
|
|
w[58] = amd_bytealign (w[25], w[26], offset);
|
|
w[57] = amd_bytealign (w[24], w[25], offset);
|
|
w[56] = amd_bytealign (w[23], w[24], offset);
|
|
w[55] = amd_bytealign (w[22], w[23], offset);
|
|
w[54] = amd_bytealign (w[21], w[22], offset);
|
|
w[53] = amd_bytealign (w[20], w[21], offset);
|
|
w[52] = amd_bytealign (w[19], w[20], offset);
|
|
w[51] = amd_bytealign (w[18], w[19], offset);
|
|
w[50] = amd_bytealign (w[17], w[18], offset);
|
|
w[49] = amd_bytealign (w[16], w[17], offset);
|
|
w[48] = amd_bytealign (w[15], w[16], offset);
|
|
w[47] = amd_bytealign (w[14], w[15], offset);
|
|
w[46] = amd_bytealign (w[13], w[14], offset);
|
|
w[45] = amd_bytealign (w[12], w[13], offset);
|
|
w[44] = amd_bytealign (w[11], w[12], offset);
|
|
w[43] = amd_bytealign (w[10], w[11], offset);
|
|
w[42] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[41] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[40] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[39] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[38] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[37] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[36] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[35] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[34] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[33] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[32] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = amd_bytealign (w[29], w[30], offset);
|
|
w[62] = amd_bytealign (w[28], w[29], offset);
|
|
w[61] = amd_bytealign (w[27], w[28], offset);
|
|
w[60] = amd_bytealign (w[26], w[27], offset);
|
|
w[59] = amd_bytealign (w[25], w[26], offset);
|
|
w[58] = amd_bytealign (w[24], w[25], offset);
|
|
w[57] = amd_bytealign (w[23], w[24], offset);
|
|
w[56] = amd_bytealign (w[22], w[23], offset);
|
|
w[55] = amd_bytealign (w[21], w[22], offset);
|
|
w[54] = amd_bytealign (w[20], w[21], offset);
|
|
w[53] = amd_bytealign (w[19], w[20], offset);
|
|
w[52] = amd_bytealign (w[18], w[19], offset);
|
|
w[51] = amd_bytealign (w[17], w[18], offset);
|
|
w[50] = amd_bytealign (w[16], w[17], offset);
|
|
w[49] = amd_bytealign (w[15], w[16], offset);
|
|
w[48] = amd_bytealign (w[14], w[15], offset);
|
|
w[47] = amd_bytealign (w[13], w[14], offset);
|
|
w[46] = amd_bytealign (w[12], w[13], offset);
|
|
w[45] = amd_bytealign (w[11], w[12], offset);
|
|
w[44] = amd_bytealign (w[10], w[11], offset);
|
|
w[43] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[42] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[41] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[40] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[39] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[38] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[37] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[36] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[35] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[34] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[33] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = amd_bytealign (w[28], w[29], offset);
|
|
w[62] = amd_bytealign (w[27], w[28], offset);
|
|
w[61] = amd_bytealign (w[26], w[27], offset);
|
|
w[60] = amd_bytealign (w[25], w[26], offset);
|
|
w[59] = amd_bytealign (w[24], w[25], offset);
|
|
w[58] = amd_bytealign (w[23], w[24], offset);
|
|
w[57] = amd_bytealign (w[22], w[23], offset);
|
|
w[56] = amd_bytealign (w[21], w[22], offset);
|
|
w[55] = amd_bytealign (w[20], w[21], offset);
|
|
w[54] = amd_bytealign (w[19], w[20], offset);
|
|
w[53] = amd_bytealign (w[18], w[19], offset);
|
|
w[52] = amd_bytealign (w[17], w[18], offset);
|
|
w[51] = amd_bytealign (w[16], w[17], offset);
|
|
w[50] = amd_bytealign (w[15], w[16], offset);
|
|
w[49] = amd_bytealign (w[14], w[15], offset);
|
|
w[48] = amd_bytealign (w[13], w[14], offset);
|
|
w[47] = amd_bytealign (w[12], w[13], offset);
|
|
w[46] = amd_bytealign (w[11], w[12], offset);
|
|
w[45] = amd_bytealign (w[10], w[11], offset);
|
|
w[44] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[43] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[42] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[41] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[40] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[39] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[38] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[37] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[36] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[35] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[34] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = amd_bytealign (w[27], w[28], offset);
|
|
w[62] = amd_bytealign (w[26], w[27], offset);
|
|
w[61] = amd_bytealign (w[25], w[26], offset);
|
|
w[60] = amd_bytealign (w[24], w[25], offset);
|
|
w[59] = amd_bytealign (w[23], w[24], offset);
|
|
w[58] = amd_bytealign (w[22], w[23], offset);
|
|
w[57] = amd_bytealign (w[21], w[22], offset);
|
|
w[56] = amd_bytealign (w[20], w[21], offset);
|
|
w[55] = amd_bytealign (w[19], w[20], offset);
|
|
w[54] = amd_bytealign (w[18], w[19], offset);
|
|
w[53] = amd_bytealign (w[17], w[18], offset);
|
|
w[52] = amd_bytealign (w[16], w[17], offset);
|
|
w[51] = amd_bytealign (w[15], w[16], offset);
|
|
w[50] = amd_bytealign (w[14], w[15], offset);
|
|
w[49] = amd_bytealign (w[13], w[14], offset);
|
|
w[48] = amd_bytealign (w[12], w[13], offset);
|
|
w[47] = amd_bytealign (w[11], w[12], offset);
|
|
w[46] = amd_bytealign (w[10], w[11], offset);
|
|
w[45] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[44] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[43] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[42] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[41] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[40] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[39] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[38] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[37] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[36] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[35] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = amd_bytealign (w[26], w[27], offset);
|
|
w[62] = amd_bytealign (w[25], w[26], offset);
|
|
w[61] = amd_bytealign (w[24], w[25], offset);
|
|
w[60] = amd_bytealign (w[23], w[24], offset);
|
|
w[59] = amd_bytealign (w[22], w[23], offset);
|
|
w[58] = amd_bytealign (w[21], w[22], offset);
|
|
w[57] = amd_bytealign (w[20], w[21], offset);
|
|
w[56] = amd_bytealign (w[19], w[20], offset);
|
|
w[55] = amd_bytealign (w[18], w[19], offset);
|
|
w[54] = amd_bytealign (w[17], w[18], offset);
|
|
w[53] = amd_bytealign (w[16], w[17], offset);
|
|
w[52] = amd_bytealign (w[15], w[16], offset);
|
|
w[51] = amd_bytealign (w[14], w[15], offset);
|
|
w[50] = amd_bytealign (w[13], w[14], offset);
|
|
w[49] = amd_bytealign (w[12], w[13], offset);
|
|
w[48] = amd_bytealign (w[11], w[12], offset);
|
|
w[47] = amd_bytealign (w[10], w[11], offset);
|
|
w[46] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[45] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[44] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[43] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[42] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[41] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[40] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[39] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[38] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[37] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[36] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = amd_bytealign (w[25], w[26], offset);
|
|
w[62] = amd_bytealign (w[24], w[25], offset);
|
|
w[61] = amd_bytealign (w[23], w[24], offset);
|
|
w[60] = amd_bytealign (w[22], w[23], offset);
|
|
w[59] = amd_bytealign (w[21], w[22], offset);
|
|
w[58] = amd_bytealign (w[20], w[21], offset);
|
|
w[57] = amd_bytealign (w[19], w[20], offset);
|
|
w[56] = amd_bytealign (w[18], w[19], offset);
|
|
w[55] = amd_bytealign (w[17], w[18], offset);
|
|
w[54] = amd_bytealign (w[16], w[17], offset);
|
|
w[53] = amd_bytealign (w[15], w[16], offset);
|
|
w[52] = amd_bytealign (w[14], w[15], offset);
|
|
w[51] = amd_bytealign (w[13], w[14], offset);
|
|
w[50] = amd_bytealign (w[12], w[13], offset);
|
|
w[49] = amd_bytealign (w[11], w[12], offset);
|
|
w[48] = amd_bytealign (w[10], w[11], offset);
|
|
w[47] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[46] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[45] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[44] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[43] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[42] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[41] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[40] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[39] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[38] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[37] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = amd_bytealign (w[24], w[25], offset);
|
|
w[62] = amd_bytealign (w[23], w[24], offset);
|
|
w[61] = amd_bytealign (w[22], w[23], offset);
|
|
w[60] = amd_bytealign (w[21], w[22], offset);
|
|
w[59] = amd_bytealign (w[20], w[21], offset);
|
|
w[58] = amd_bytealign (w[19], w[20], offset);
|
|
w[57] = amd_bytealign (w[18], w[19], offset);
|
|
w[56] = amd_bytealign (w[17], w[18], offset);
|
|
w[55] = amd_bytealign (w[16], w[17], offset);
|
|
w[54] = amd_bytealign (w[15], w[16], offset);
|
|
w[53] = amd_bytealign (w[14], w[15], offset);
|
|
w[52] = amd_bytealign (w[13], w[14], offset);
|
|
w[51] = amd_bytealign (w[12], w[13], offset);
|
|
w[50] = amd_bytealign (w[11], w[12], offset);
|
|
w[49] = amd_bytealign (w[10], w[11], offset);
|
|
w[48] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[47] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[46] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[45] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[44] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[43] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[42] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[41] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[40] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[39] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[38] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = amd_bytealign (w[23], w[24], offset);
|
|
w[62] = amd_bytealign (w[22], w[23], offset);
|
|
w[61] = amd_bytealign (w[21], w[22], offset);
|
|
w[60] = amd_bytealign (w[20], w[21], offset);
|
|
w[59] = amd_bytealign (w[19], w[20], offset);
|
|
w[58] = amd_bytealign (w[18], w[19], offset);
|
|
w[57] = amd_bytealign (w[17], w[18], offset);
|
|
w[56] = amd_bytealign (w[16], w[17], offset);
|
|
w[55] = amd_bytealign (w[15], w[16], offset);
|
|
w[54] = amd_bytealign (w[14], w[15], offset);
|
|
w[53] = amd_bytealign (w[13], w[14], offset);
|
|
w[52] = amd_bytealign (w[12], w[13], offset);
|
|
w[51] = amd_bytealign (w[11], w[12], offset);
|
|
w[50] = amd_bytealign (w[10], w[11], offset);
|
|
w[49] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[48] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[47] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[46] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[45] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[44] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[43] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[42] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[41] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[40] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[39] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = amd_bytealign (w[22], w[23], offset);
|
|
w[62] = amd_bytealign (w[21], w[22], offset);
|
|
w[61] = amd_bytealign (w[20], w[21], offset);
|
|
w[60] = amd_bytealign (w[19], w[20], offset);
|
|
w[59] = amd_bytealign (w[18], w[19], offset);
|
|
w[58] = amd_bytealign (w[17], w[18], offset);
|
|
w[57] = amd_bytealign (w[16], w[17], offset);
|
|
w[56] = amd_bytealign (w[15], w[16], offset);
|
|
w[55] = amd_bytealign (w[14], w[15], offset);
|
|
w[54] = amd_bytealign (w[13], w[14], offset);
|
|
w[53] = amd_bytealign (w[12], w[13], offset);
|
|
w[52] = amd_bytealign (w[11], w[12], offset);
|
|
w[51] = amd_bytealign (w[10], w[11], offset);
|
|
w[50] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[49] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[48] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[47] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[46] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[45] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[44] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[43] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[42] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[41] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[40] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = amd_bytealign (w[21], w[22], offset);
|
|
w[62] = amd_bytealign (w[20], w[21], offset);
|
|
w[61] = amd_bytealign (w[19], w[20], offset);
|
|
w[60] = amd_bytealign (w[18], w[19], offset);
|
|
w[59] = amd_bytealign (w[17], w[18], offset);
|
|
w[58] = amd_bytealign (w[16], w[17], offset);
|
|
w[57] = amd_bytealign (w[15], w[16], offset);
|
|
w[56] = amd_bytealign (w[14], w[15], offset);
|
|
w[55] = amd_bytealign (w[13], w[14], offset);
|
|
w[54] = amd_bytealign (w[12], w[13], offset);
|
|
w[53] = amd_bytealign (w[11], w[12], offset);
|
|
w[52] = amd_bytealign (w[10], w[11], offset);
|
|
w[51] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[50] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[49] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[48] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[47] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[46] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[45] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[44] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[43] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[42] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[41] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = amd_bytealign (w[20], w[21], offset);
|
|
w[62] = amd_bytealign (w[19], w[20], offset);
|
|
w[61] = amd_bytealign (w[18], w[19], offset);
|
|
w[60] = amd_bytealign (w[17], w[18], offset);
|
|
w[59] = amd_bytealign (w[16], w[17], offset);
|
|
w[58] = amd_bytealign (w[15], w[16], offset);
|
|
w[57] = amd_bytealign (w[14], w[15], offset);
|
|
w[56] = amd_bytealign (w[13], w[14], offset);
|
|
w[55] = amd_bytealign (w[12], w[13], offset);
|
|
w[54] = amd_bytealign (w[11], w[12], offset);
|
|
w[53] = amd_bytealign (w[10], w[11], offset);
|
|
w[52] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[51] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[50] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[49] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[48] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[47] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[46] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[45] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[44] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[43] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[42] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = amd_bytealign (w[19], w[20], offset);
|
|
w[62] = amd_bytealign (w[18], w[19], offset);
|
|
w[61] = amd_bytealign (w[17], w[18], offset);
|
|
w[60] = amd_bytealign (w[16], w[17], offset);
|
|
w[59] = amd_bytealign (w[15], w[16], offset);
|
|
w[58] = amd_bytealign (w[14], w[15], offset);
|
|
w[57] = amd_bytealign (w[13], w[14], offset);
|
|
w[56] = amd_bytealign (w[12], w[13], offset);
|
|
w[55] = amd_bytealign (w[11], w[12], offset);
|
|
w[54] = amd_bytealign (w[10], w[11], offset);
|
|
w[53] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[52] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[51] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[50] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[49] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[48] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[47] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[46] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[45] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[44] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[43] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = amd_bytealign (w[18], w[19], offset);
|
|
w[62] = amd_bytealign (w[17], w[18], offset);
|
|
w[61] = amd_bytealign (w[16], w[17], offset);
|
|
w[60] = amd_bytealign (w[15], w[16], offset);
|
|
w[59] = amd_bytealign (w[14], w[15], offset);
|
|
w[58] = amd_bytealign (w[13], w[14], offset);
|
|
w[57] = amd_bytealign (w[12], w[13], offset);
|
|
w[56] = amd_bytealign (w[11], w[12], offset);
|
|
w[55] = amd_bytealign (w[10], w[11], offset);
|
|
w[54] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[53] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[52] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[51] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[50] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[49] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[48] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[47] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[46] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[45] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[44] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = amd_bytealign (w[17], w[18], offset);
|
|
w[62] = amd_bytealign (w[16], w[17], offset);
|
|
w[61] = amd_bytealign (w[15], w[16], offset);
|
|
w[60] = amd_bytealign (w[14], w[15], offset);
|
|
w[59] = amd_bytealign (w[13], w[14], offset);
|
|
w[58] = amd_bytealign (w[12], w[13], offset);
|
|
w[57] = amd_bytealign (w[11], w[12], offset);
|
|
w[56] = amd_bytealign (w[10], w[11], offset);
|
|
w[55] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[54] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[53] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[52] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[51] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[50] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[49] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[48] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[47] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[46] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[45] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = amd_bytealign (w[16], w[17], offset);
|
|
w[62] = amd_bytealign (w[15], w[16], offset);
|
|
w[61] = amd_bytealign (w[14], w[15], offset);
|
|
w[60] = amd_bytealign (w[13], w[14], offset);
|
|
w[59] = amd_bytealign (w[12], w[13], offset);
|
|
w[58] = amd_bytealign (w[11], w[12], offset);
|
|
w[57] = amd_bytealign (w[10], w[11], offset);
|
|
w[56] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[55] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[54] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[53] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[52] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[51] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[50] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[49] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[48] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[47] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[46] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = amd_bytealign (w[15], w[16], offset);
|
|
w[62] = amd_bytealign (w[14], w[15], offset);
|
|
w[61] = amd_bytealign (w[13], w[14], offset);
|
|
w[60] = amd_bytealign (w[12], w[13], offset);
|
|
w[59] = amd_bytealign (w[11], w[12], offset);
|
|
w[58] = amd_bytealign (w[10], w[11], offset);
|
|
w[57] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[56] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[55] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[54] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[53] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[52] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[51] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[50] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[49] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[48] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[47] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = amd_bytealign (w[14], w[15], offset);
|
|
w[62] = amd_bytealign (w[13], w[14], offset);
|
|
w[61] = amd_bytealign (w[12], w[13], offset);
|
|
w[60] = amd_bytealign (w[11], w[12], offset);
|
|
w[59] = amd_bytealign (w[10], w[11], offset);
|
|
w[58] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[57] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[56] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[55] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[54] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[53] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[52] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[51] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[50] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[49] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[48] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = amd_bytealign (w[13], w[14], offset);
|
|
w[62] = amd_bytealign (w[12], w[13], offset);
|
|
w[61] = amd_bytealign (w[11], w[12], offset);
|
|
w[60] = amd_bytealign (w[10], w[11], offset);
|
|
w[59] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[58] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[57] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[56] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[55] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[54] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[53] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[52] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[51] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[50] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[49] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = amd_bytealign (w[12], w[13], offset);
|
|
w[62] = amd_bytealign (w[11], w[12], offset);
|
|
w[61] = amd_bytealign (w[10], w[11], offset);
|
|
w[60] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[59] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[58] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[57] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[56] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[55] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[54] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[53] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[52] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[51] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[50] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = amd_bytealign (w[11], w[12], offset);
|
|
w[62] = amd_bytealign (w[10], w[11], offset);
|
|
w[61] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[60] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[59] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[58] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[57] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[56] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[55] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[54] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[53] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[52] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[51] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = amd_bytealign (w[10], w[11], offset);
|
|
w[62] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[61] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[60] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[59] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[58] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[57] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[56] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[55] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[54] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[53] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[52] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[62] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[61] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[60] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[59] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[58] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[57] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[56] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[55] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[54] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[53] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[62] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[61] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[60] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[59] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[58] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[57] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[56] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[55] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[54] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[62] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[61] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[60] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[59] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[58] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[57] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[56] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[55] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[62] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[61] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[60] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[59] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[58] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[57] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[56] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[62] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[61] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[60] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[59] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[58] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[57] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[62] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[61] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[60] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[59] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[58] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[62] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[61] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[60] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[59] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[62] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[61] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[60] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[62] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[61] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[62] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
#pragma unroll
|
|
for (int i = 0; i < 64; i++) w[i] = swap32 (w[i]);
|
|
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = __byte_perm (w[62], w[63], selector);
|
|
w[62] = __byte_perm (w[61], w[62], selector);
|
|
w[61] = __byte_perm (w[60], w[61], selector);
|
|
w[60] = __byte_perm (w[59], w[60], selector);
|
|
w[59] = __byte_perm (w[58], w[59], selector);
|
|
w[58] = __byte_perm (w[57], w[58], selector);
|
|
w[57] = __byte_perm (w[56], w[57], selector);
|
|
w[56] = __byte_perm (w[55], w[56], selector);
|
|
w[55] = __byte_perm (w[54], w[55], selector);
|
|
w[54] = __byte_perm (w[53], w[54], selector);
|
|
w[53] = __byte_perm (w[52], w[53], selector);
|
|
w[52] = __byte_perm (w[51], w[52], selector);
|
|
w[51] = __byte_perm (w[50], w[51], selector);
|
|
w[50] = __byte_perm (w[49], w[50], selector);
|
|
w[49] = __byte_perm (w[48], w[49], selector);
|
|
w[48] = __byte_perm (w[47], w[48], selector);
|
|
w[47] = __byte_perm (w[46], w[47], selector);
|
|
w[46] = __byte_perm (w[45], w[46], selector);
|
|
w[45] = __byte_perm (w[44], w[45], selector);
|
|
w[44] = __byte_perm (w[43], w[44], selector);
|
|
w[43] = __byte_perm (w[42], w[43], selector);
|
|
w[42] = __byte_perm (w[41], w[42], selector);
|
|
w[41] = __byte_perm (w[40], w[41], selector);
|
|
w[40] = __byte_perm (w[39], w[40], selector);
|
|
w[39] = __byte_perm (w[38], w[39], selector);
|
|
w[38] = __byte_perm (w[37], w[38], selector);
|
|
w[37] = __byte_perm (w[36], w[37], selector);
|
|
w[36] = __byte_perm (w[35], w[36], selector);
|
|
w[35] = __byte_perm (w[34], w[35], selector);
|
|
w[34] = __byte_perm (w[33], w[34], selector);
|
|
w[33] = __byte_perm (w[32], w[33], selector);
|
|
w[32] = __byte_perm (w[31], w[32], selector);
|
|
w[31] = __byte_perm (w[30], w[31], selector);
|
|
w[30] = __byte_perm (w[29], w[30], selector);
|
|
w[29] = __byte_perm (w[28], w[29], selector);
|
|
w[28] = __byte_perm (w[27], w[28], selector);
|
|
w[27] = __byte_perm (w[26], w[27], selector);
|
|
w[26] = __byte_perm (w[25], w[26], selector);
|
|
w[25] = __byte_perm (w[24], w[25], selector);
|
|
w[24] = __byte_perm (w[23], w[24], selector);
|
|
w[23] = __byte_perm (w[22], w[23], selector);
|
|
w[22] = __byte_perm (w[21], w[22], selector);
|
|
w[21] = __byte_perm (w[20], w[21], selector);
|
|
w[20] = __byte_perm (w[19], w[20], selector);
|
|
w[19] = __byte_perm (w[18], w[19], selector);
|
|
w[18] = __byte_perm (w[17], w[18], selector);
|
|
w[17] = __byte_perm (w[16], w[17], selector);
|
|
w[16] = __byte_perm (w[15], w[16], selector);
|
|
w[15] = __byte_perm (w[14], w[15], selector);
|
|
w[14] = __byte_perm (w[13], w[14], selector);
|
|
w[13] = __byte_perm (w[12], w[13], selector);
|
|
w[12] = __byte_perm (w[11], w[12], selector);
|
|
w[11] = __byte_perm (w[10], w[11], selector);
|
|
w[10] = __byte_perm (w[ 9], w[10], selector);
|
|
w[ 9] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[ 8] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[ 7] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[ 6] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[ 5] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 4] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 3] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 2] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 1] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 0] = __byte_perm ( 0, w[ 0], selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = __byte_perm (w[61], w[62], selector);
|
|
w[62] = __byte_perm (w[60], w[61], selector);
|
|
w[61] = __byte_perm (w[59], w[60], selector);
|
|
w[60] = __byte_perm (w[58], w[59], selector);
|
|
w[59] = __byte_perm (w[57], w[58], selector);
|
|
w[58] = __byte_perm (w[56], w[57], selector);
|
|
w[57] = __byte_perm (w[55], w[56], selector);
|
|
w[56] = __byte_perm (w[54], w[55], selector);
|
|
w[55] = __byte_perm (w[53], w[54], selector);
|
|
w[54] = __byte_perm (w[52], w[53], selector);
|
|
w[53] = __byte_perm (w[51], w[52], selector);
|
|
w[52] = __byte_perm (w[50], w[51], selector);
|
|
w[51] = __byte_perm (w[49], w[50], selector);
|
|
w[50] = __byte_perm (w[48], w[49], selector);
|
|
w[49] = __byte_perm (w[47], w[48], selector);
|
|
w[48] = __byte_perm (w[46], w[47], selector);
|
|
w[47] = __byte_perm (w[45], w[46], selector);
|
|
w[46] = __byte_perm (w[44], w[45], selector);
|
|
w[45] = __byte_perm (w[43], w[44], selector);
|
|
w[44] = __byte_perm (w[42], w[43], selector);
|
|
w[43] = __byte_perm (w[41], w[42], selector);
|
|
w[42] = __byte_perm (w[40], w[41], selector);
|
|
w[41] = __byte_perm (w[39], w[40], selector);
|
|
w[40] = __byte_perm (w[38], w[39], selector);
|
|
w[39] = __byte_perm (w[37], w[38], selector);
|
|
w[38] = __byte_perm (w[36], w[37], selector);
|
|
w[37] = __byte_perm (w[35], w[36], selector);
|
|
w[36] = __byte_perm (w[34], w[35], selector);
|
|
w[35] = __byte_perm (w[33], w[34], selector);
|
|
w[34] = __byte_perm (w[32], w[33], selector);
|
|
w[33] = __byte_perm (w[31], w[32], selector);
|
|
w[32] = __byte_perm (w[30], w[31], selector);
|
|
w[31] = __byte_perm (w[29], w[30], selector);
|
|
w[30] = __byte_perm (w[28], w[29], selector);
|
|
w[29] = __byte_perm (w[27], w[28], selector);
|
|
w[28] = __byte_perm (w[26], w[27], selector);
|
|
w[27] = __byte_perm (w[25], w[26], selector);
|
|
w[26] = __byte_perm (w[24], w[25], selector);
|
|
w[25] = __byte_perm (w[23], w[24], selector);
|
|
w[24] = __byte_perm (w[22], w[23], selector);
|
|
w[23] = __byte_perm (w[21], w[22], selector);
|
|
w[22] = __byte_perm (w[20], w[21], selector);
|
|
w[21] = __byte_perm (w[19], w[20], selector);
|
|
w[20] = __byte_perm (w[18], w[19], selector);
|
|
w[19] = __byte_perm (w[17], w[18], selector);
|
|
w[18] = __byte_perm (w[16], w[17], selector);
|
|
w[17] = __byte_perm (w[15], w[16], selector);
|
|
w[16] = __byte_perm (w[14], w[15], selector);
|
|
w[15] = __byte_perm (w[13], w[14], selector);
|
|
w[14] = __byte_perm (w[12], w[13], selector);
|
|
w[13] = __byte_perm (w[11], w[12], selector);
|
|
w[12] = __byte_perm (w[10], w[11], selector);
|
|
w[11] = __byte_perm (w[ 9], w[10], selector);
|
|
w[10] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[ 9] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[ 8] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[ 7] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[ 6] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 5] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 4] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 3] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 2] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 1] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = __byte_perm (w[60], w[61], selector);
|
|
w[62] = __byte_perm (w[59], w[60], selector);
|
|
w[61] = __byte_perm (w[58], w[59], selector);
|
|
w[60] = __byte_perm (w[57], w[58], selector);
|
|
w[59] = __byte_perm (w[56], w[57], selector);
|
|
w[58] = __byte_perm (w[55], w[56], selector);
|
|
w[57] = __byte_perm (w[54], w[55], selector);
|
|
w[56] = __byte_perm (w[53], w[54], selector);
|
|
w[55] = __byte_perm (w[52], w[53], selector);
|
|
w[54] = __byte_perm (w[51], w[52], selector);
|
|
w[53] = __byte_perm (w[50], w[51], selector);
|
|
w[52] = __byte_perm (w[49], w[50], selector);
|
|
w[51] = __byte_perm (w[48], w[49], selector);
|
|
w[50] = __byte_perm (w[47], w[48], selector);
|
|
w[49] = __byte_perm (w[46], w[47], selector);
|
|
w[48] = __byte_perm (w[45], w[46], selector);
|
|
w[47] = __byte_perm (w[44], w[45], selector);
|
|
w[46] = __byte_perm (w[43], w[44], selector);
|
|
w[45] = __byte_perm (w[42], w[43], selector);
|
|
w[44] = __byte_perm (w[41], w[42], selector);
|
|
w[43] = __byte_perm (w[40], w[41], selector);
|
|
w[42] = __byte_perm (w[39], w[40], selector);
|
|
w[41] = __byte_perm (w[38], w[39], selector);
|
|
w[40] = __byte_perm (w[37], w[38], selector);
|
|
w[39] = __byte_perm (w[36], w[37], selector);
|
|
w[38] = __byte_perm (w[35], w[36], selector);
|
|
w[37] = __byte_perm (w[34], w[35], selector);
|
|
w[36] = __byte_perm (w[33], w[34], selector);
|
|
w[35] = __byte_perm (w[32], w[33], selector);
|
|
w[34] = __byte_perm (w[31], w[32], selector);
|
|
w[33] = __byte_perm (w[30], w[31], selector);
|
|
w[32] = __byte_perm (w[29], w[30], selector);
|
|
w[31] = __byte_perm (w[28], w[29], selector);
|
|
w[30] = __byte_perm (w[27], w[28], selector);
|
|
w[29] = __byte_perm (w[26], w[27], selector);
|
|
w[28] = __byte_perm (w[25], w[26], selector);
|
|
w[27] = __byte_perm (w[24], w[25], selector);
|
|
w[26] = __byte_perm (w[23], w[24], selector);
|
|
w[25] = __byte_perm (w[22], w[23], selector);
|
|
w[24] = __byte_perm (w[21], w[22], selector);
|
|
w[23] = __byte_perm (w[20], w[21], selector);
|
|
w[22] = __byte_perm (w[19], w[20], selector);
|
|
w[21] = __byte_perm (w[18], w[19], selector);
|
|
w[20] = __byte_perm (w[17], w[18], selector);
|
|
w[19] = __byte_perm (w[16], w[17], selector);
|
|
w[18] = __byte_perm (w[15], w[16], selector);
|
|
w[17] = __byte_perm (w[14], w[15], selector);
|
|
w[16] = __byte_perm (w[13], w[14], selector);
|
|
w[15] = __byte_perm (w[12], w[13], selector);
|
|
w[14] = __byte_perm (w[11], w[12], selector);
|
|
w[13] = __byte_perm (w[10], w[11], selector);
|
|
w[12] = __byte_perm (w[ 9], w[10], selector);
|
|
w[11] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[10] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[ 9] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[ 8] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[ 7] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 6] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 5] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 4] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 3] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 2] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = __byte_perm (w[59], w[60], selector);
|
|
w[62] = __byte_perm (w[58], w[59], selector);
|
|
w[61] = __byte_perm (w[57], w[58], selector);
|
|
w[60] = __byte_perm (w[56], w[57], selector);
|
|
w[59] = __byte_perm (w[55], w[56], selector);
|
|
w[58] = __byte_perm (w[54], w[55], selector);
|
|
w[57] = __byte_perm (w[53], w[54], selector);
|
|
w[56] = __byte_perm (w[52], w[53], selector);
|
|
w[55] = __byte_perm (w[51], w[52], selector);
|
|
w[54] = __byte_perm (w[50], w[51], selector);
|
|
w[53] = __byte_perm (w[49], w[50], selector);
|
|
w[52] = __byte_perm (w[48], w[49], selector);
|
|
w[51] = __byte_perm (w[47], w[48], selector);
|
|
w[50] = __byte_perm (w[46], w[47], selector);
|
|
w[49] = __byte_perm (w[45], w[46], selector);
|
|
w[48] = __byte_perm (w[44], w[45], selector);
|
|
w[47] = __byte_perm (w[43], w[44], selector);
|
|
w[46] = __byte_perm (w[42], w[43], selector);
|
|
w[45] = __byte_perm (w[41], w[42], selector);
|
|
w[44] = __byte_perm (w[40], w[41], selector);
|
|
w[43] = __byte_perm (w[39], w[40], selector);
|
|
w[42] = __byte_perm (w[38], w[39], selector);
|
|
w[41] = __byte_perm (w[37], w[38], selector);
|
|
w[40] = __byte_perm (w[36], w[37], selector);
|
|
w[39] = __byte_perm (w[35], w[36], selector);
|
|
w[38] = __byte_perm (w[34], w[35], selector);
|
|
w[37] = __byte_perm (w[33], w[34], selector);
|
|
w[36] = __byte_perm (w[32], w[33], selector);
|
|
w[35] = __byte_perm (w[31], w[32], selector);
|
|
w[34] = __byte_perm (w[30], w[31], selector);
|
|
w[33] = __byte_perm (w[29], w[30], selector);
|
|
w[32] = __byte_perm (w[28], w[29], selector);
|
|
w[31] = __byte_perm (w[27], w[28], selector);
|
|
w[30] = __byte_perm (w[26], w[27], selector);
|
|
w[29] = __byte_perm (w[25], w[26], selector);
|
|
w[28] = __byte_perm (w[24], w[25], selector);
|
|
w[27] = __byte_perm (w[23], w[24], selector);
|
|
w[26] = __byte_perm (w[22], w[23], selector);
|
|
w[25] = __byte_perm (w[21], w[22], selector);
|
|
w[24] = __byte_perm (w[20], w[21], selector);
|
|
w[23] = __byte_perm (w[19], w[20], selector);
|
|
w[22] = __byte_perm (w[18], w[19], selector);
|
|
w[21] = __byte_perm (w[17], w[18], selector);
|
|
w[20] = __byte_perm (w[16], w[17], selector);
|
|
w[19] = __byte_perm (w[15], w[16], selector);
|
|
w[18] = __byte_perm (w[14], w[15], selector);
|
|
w[17] = __byte_perm (w[13], w[14], selector);
|
|
w[16] = __byte_perm (w[12], w[13], selector);
|
|
w[15] = __byte_perm (w[11], w[12], selector);
|
|
w[14] = __byte_perm (w[10], w[11], selector);
|
|
w[13] = __byte_perm (w[ 9], w[10], selector);
|
|
w[12] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[11] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[10] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[ 9] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[ 8] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 7] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 6] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 5] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 4] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 3] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = __byte_perm (w[58], w[59], selector);
|
|
w[62] = __byte_perm (w[57], w[58], selector);
|
|
w[61] = __byte_perm (w[56], w[57], selector);
|
|
w[60] = __byte_perm (w[55], w[56], selector);
|
|
w[59] = __byte_perm (w[54], w[55], selector);
|
|
w[58] = __byte_perm (w[53], w[54], selector);
|
|
w[57] = __byte_perm (w[52], w[53], selector);
|
|
w[56] = __byte_perm (w[51], w[52], selector);
|
|
w[55] = __byte_perm (w[50], w[51], selector);
|
|
w[54] = __byte_perm (w[49], w[50], selector);
|
|
w[53] = __byte_perm (w[48], w[49], selector);
|
|
w[52] = __byte_perm (w[47], w[48], selector);
|
|
w[51] = __byte_perm (w[46], w[47], selector);
|
|
w[50] = __byte_perm (w[45], w[46], selector);
|
|
w[49] = __byte_perm (w[44], w[45], selector);
|
|
w[48] = __byte_perm (w[43], w[44], selector);
|
|
w[47] = __byte_perm (w[42], w[43], selector);
|
|
w[46] = __byte_perm (w[41], w[42], selector);
|
|
w[45] = __byte_perm (w[40], w[41], selector);
|
|
w[44] = __byte_perm (w[39], w[40], selector);
|
|
w[43] = __byte_perm (w[38], w[39], selector);
|
|
w[42] = __byte_perm (w[37], w[38], selector);
|
|
w[41] = __byte_perm (w[36], w[37], selector);
|
|
w[40] = __byte_perm (w[35], w[36], selector);
|
|
w[39] = __byte_perm (w[34], w[35], selector);
|
|
w[38] = __byte_perm (w[33], w[34], selector);
|
|
w[37] = __byte_perm (w[32], w[33], selector);
|
|
w[36] = __byte_perm (w[31], w[32], selector);
|
|
w[35] = __byte_perm (w[30], w[31], selector);
|
|
w[34] = __byte_perm (w[29], w[30], selector);
|
|
w[33] = __byte_perm (w[28], w[29], selector);
|
|
w[32] = __byte_perm (w[27], w[28], selector);
|
|
w[31] = __byte_perm (w[26], w[27], selector);
|
|
w[30] = __byte_perm (w[25], w[26], selector);
|
|
w[29] = __byte_perm (w[24], w[25], selector);
|
|
w[28] = __byte_perm (w[23], w[24], selector);
|
|
w[27] = __byte_perm (w[22], w[23], selector);
|
|
w[26] = __byte_perm (w[21], w[22], selector);
|
|
w[25] = __byte_perm (w[20], w[21], selector);
|
|
w[24] = __byte_perm (w[19], w[20], selector);
|
|
w[23] = __byte_perm (w[18], w[19], selector);
|
|
w[22] = __byte_perm (w[17], w[18], selector);
|
|
w[21] = __byte_perm (w[16], w[17], selector);
|
|
w[20] = __byte_perm (w[15], w[16], selector);
|
|
w[19] = __byte_perm (w[14], w[15], selector);
|
|
w[18] = __byte_perm (w[13], w[14], selector);
|
|
w[17] = __byte_perm (w[12], w[13], selector);
|
|
w[16] = __byte_perm (w[11], w[12], selector);
|
|
w[15] = __byte_perm (w[10], w[11], selector);
|
|
w[14] = __byte_perm (w[ 9], w[10], selector);
|
|
w[13] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[12] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[11] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[10] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[ 9] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 8] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 7] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 6] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 5] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 4] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = __byte_perm (w[57], w[58], selector);
|
|
w[62] = __byte_perm (w[56], w[57], selector);
|
|
w[61] = __byte_perm (w[55], w[56], selector);
|
|
w[60] = __byte_perm (w[54], w[55], selector);
|
|
w[59] = __byte_perm (w[53], w[54], selector);
|
|
w[58] = __byte_perm (w[52], w[53], selector);
|
|
w[57] = __byte_perm (w[51], w[52], selector);
|
|
w[56] = __byte_perm (w[50], w[51], selector);
|
|
w[55] = __byte_perm (w[49], w[50], selector);
|
|
w[54] = __byte_perm (w[48], w[49], selector);
|
|
w[53] = __byte_perm (w[47], w[48], selector);
|
|
w[52] = __byte_perm (w[46], w[47], selector);
|
|
w[51] = __byte_perm (w[45], w[46], selector);
|
|
w[50] = __byte_perm (w[44], w[45], selector);
|
|
w[49] = __byte_perm (w[43], w[44], selector);
|
|
w[48] = __byte_perm (w[42], w[43], selector);
|
|
w[47] = __byte_perm (w[41], w[42], selector);
|
|
w[46] = __byte_perm (w[40], w[41], selector);
|
|
w[45] = __byte_perm (w[39], w[40], selector);
|
|
w[44] = __byte_perm (w[38], w[39], selector);
|
|
w[43] = __byte_perm (w[37], w[38], selector);
|
|
w[42] = __byte_perm (w[36], w[37], selector);
|
|
w[41] = __byte_perm (w[35], w[36], selector);
|
|
w[40] = __byte_perm (w[34], w[35], selector);
|
|
w[39] = __byte_perm (w[33], w[34], selector);
|
|
w[38] = __byte_perm (w[32], w[33], selector);
|
|
w[37] = __byte_perm (w[31], w[32], selector);
|
|
w[36] = __byte_perm (w[30], w[31], selector);
|
|
w[35] = __byte_perm (w[29], w[30], selector);
|
|
w[34] = __byte_perm (w[28], w[29], selector);
|
|
w[33] = __byte_perm (w[27], w[28], selector);
|
|
w[32] = __byte_perm (w[26], w[27], selector);
|
|
w[31] = __byte_perm (w[25], w[26], selector);
|
|
w[30] = __byte_perm (w[24], w[25], selector);
|
|
w[29] = __byte_perm (w[23], w[24], selector);
|
|
w[28] = __byte_perm (w[22], w[23], selector);
|
|
w[27] = __byte_perm (w[21], w[22], selector);
|
|
w[26] = __byte_perm (w[20], w[21], selector);
|
|
w[25] = __byte_perm (w[19], w[20], selector);
|
|
w[24] = __byte_perm (w[18], w[19], selector);
|
|
w[23] = __byte_perm (w[17], w[18], selector);
|
|
w[22] = __byte_perm (w[16], w[17], selector);
|
|
w[21] = __byte_perm (w[15], w[16], selector);
|
|
w[20] = __byte_perm (w[14], w[15], selector);
|
|
w[19] = __byte_perm (w[13], w[14], selector);
|
|
w[18] = __byte_perm (w[12], w[13], selector);
|
|
w[17] = __byte_perm (w[11], w[12], selector);
|
|
w[16] = __byte_perm (w[10], w[11], selector);
|
|
w[15] = __byte_perm (w[ 9], w[10], selector);
|
|
w[14] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[13] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[12] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[11] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[10] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[ 9] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 8] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 7] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 6] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 5] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = __byte_perm (w[56], w[57], selector);
|
|
w[62] = __byte_perm (w[55], w[56], selector);
|
|
w[61] = __byte_perm (w[54], w[55], selector);
|
|
w[60] = __byte_perm (w[53], w[54], selector);
|
|
w[59] = __byte_perm (w[52], w[53], selector);
|
|
w[58] = __byte_perm (w[51], w[52], selector);
|
|
w[57] = __byte_perm (w[50], w[51], selector);
|
|
w[56] = __byte_perm (w[49], w[50], selector);
|
|
w[55] = __byte_perm (w[48], w[49], selector);
|
|
w[54] = __byte_perm (w[47], w[48], selector);
|
|
w[53] = __byte_perm (w[46], w[47], selector);
|
|
w[52] = __byte_perm (w[45], w[46], selector);
|
|
w[51] = __byte_perm (w[44], w[45], selector);
|
|
w[50] = __byte_perm (w[43], w[44], selector);
|
|
w[49] = __byte_perm (w[42], w[43], selector);
|
|
w[48] = __byte_perm (w[41], w[42], selector);
|
|
w[47] = __byte_perm (w[40], w[41], selector);
|
|
w[46] = __byte_perm (w[39], w[40], selector);
|
|
w[45] = __byte_perm (w[38], w[39], selector);
|
|
w[44] = __byte_perm (w[37], w[38], selector);
|
|
w[43] = __byte_perm (w[36], w[37], selector);
|
|
w[42] = __byte_perm (w[35], w[36], selector);
|
|
w[41] = __byte_perm (w[34], w[35], selector);
|
|
w[40] = __byte_perm (w[33], w[34], selector);
|
|
w[39] = __byte_perm (w[32], w[33], selector);
|
|
w[38] = __byte_perm (w[31], w[32], selector);
|
|
w[37] = __byte_perm (w[30], w[31], selector);
|
|
w[36] = __byte_perm (w[29], w[30], selector);
|
|
w[35] = __byte_perm (w[28], w[29], selector);
|
|
w[34] = __byte_perm (w[27], w[28], selector);
|
|
w[33] = __byte_perm (w[26], w[27], selector);
|
|
w[32] = __byte_perm (w[25], w[26], selector);
|
|
w[31] = __byte_perm (w[24], w[25], selector);
|
|
w[30] = __byte_perm (w[23], w[24], selector);
|
|
w[29] = __byte_perm (w[22], w[23], selector);
|
|
w[28] = __byte_perm (w[21], w[22], selector);
|
|
w[27] = __byte_perm (w[20], w[21], selector);
|
|
w[26] = __byte_perm (w[19], w[20], selector);
|
|
w[25] = __byte_perm (w[18], w[19], selector);
|
|
w[24] = __byte_perm (w[17], w[18], selector);
|
|
w[23] = __byte_perm (w[16], w[17], selector);
|
|
w[22] = __byte_perm (w[15], w[16], selector);
|
|
w[21] = __byte_perm (w[14], w[15], selector);
|
|
w[20] = __byte_perm (w[13], w[14], selector);
|
|
w[19] = __byte_perm (w[12], w[13], selector);
|
|
w[18] = __byte_perm (w[11], w[12], selector);
|
|
w[17] = __byte_perm (w[10], w[11], selector);
|
|
w[16] = __byte_perm (w[ 9], w[10], selector);
|
|
w[15] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[14] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[13] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[12] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[11] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[10] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[ 9] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 8] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 7] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 6] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = __byte_perm (w[55], w[56], selector);
|
|
w[62] = __byte_perm (w[54], w[55], selector);
|
|
w[61] = __byte_perm (w[53], w[54], selector);
|
|
w[60] = __byte_perm (w[52], w[53], selector);
|
|
w[59] = __byte_perm (w[51], w[52], selector);
|
|
w[58] = __byte_perm (w[50], w[51], selector);
|
|
w[57] = __byte_perm (w[49], w[50], selector);
|
|
w[56] = __byte_perm (w[48], w[49], selector);
|
|
w[55] = __byte_perm (w[47], w[48], selector);
|
|
w[54] = __byte_perm (w[46], w[47], selector);
|
|
w[53] = __byte_perm (w[45], w[46], selector);
|
|
w[52] = __byte_perm (w[44], w[45], selector);
|
|
w[51] = __byte_perm (w[43], w[44], selector);
|
|
w[50] = __byte_perm (w[42], w[43], selector);
|
|
w[49] = __byte_perm (w[41], w[42], selector);
|
|
w[48] = __byte_perm (w[40], w[41], selector);
|
|
w[47] = __byte_perm (w[39], w[40], selector);
|
|
w[46] = __byte_perm (w[38], w[39], selector);
|
|
w[45] = __byte_perm (w[37], w[38], selector);
|
|
w[44] = __byte_perm (w[36], w[37], selector);
|
|
w[43] = __byte_perm (w[35], w[36], selector);
|
|
w[42] = __byte_perm (w[34], w[35], selector);
|
|
w[41] = __byte_perm (w[33], w[34], selector);
|
|
w[40] = __byte_perm (w[32], w[33], selector);
|
|
w[39] = __byte_perm (w[31], w[32], selector);
|
|
w[38] = __byte_perm (w[30], w[31], selector);
|
|
w[37] = __byte_perm (w[29], w[30], selector);
|
|
w[36] = __byte_perm (w[28], w[29], selector);
|
|
w[35] = __byte_perm (w[27], w[28], selector);
|
|
w[34] = __byte_perm (w[26], w[27], selector);
|
|
w[33] = __byte_perm (w[25], w[26], selector);
|
|
w[32] = __byte_perm (w[24], w[25], selector);
|
|
w[31] = __byte_perm (w[23], w[24], selector);
|
|
w[30] = __byte_perm (w[22], w[23], selector);
|
|
w[29] = __byte_perm (w[21], w[22], selector);
|
|
w[28] = __byte_perm (w[20], w[21], selector);
|
|
w[27] = __byte_perm (w[19], w[20], selector);
|
|
w[26] = __byte_perm (w[18], w[19], selector);
|
|
w[25] = __byte_perm (w[17], w[18], selector);
|
|
w[24] = __byte_perm (w[16], w[17], selector);
|
|
w[23] = __byte_perm (w[15], w[16], selector);
|
|
w[22] = __byte_perm (w[14], w[15], selector);
|
|
w[21] = __byte_perm (w[13], w[14], selector);
|
|
w[20] = __byte_perm (w[12], w[13], selector);
|
|
w[19] = __byte_perm (w[11], w[12], selector);
|
|
w[18] = __byte_perm (w[10], w[11], selector);
|
|
w[17] = __byte_perm (w[ 9], w[10], selector);
|
|
w[16] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[15] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[14] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[13] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[12] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[11] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[10] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[ 9] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 8] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 7] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = __byte_perm (w[54], w[55], selector);
|
|
w[62] = __byte_perm (w[53], w[54], selector);
|
|
w[61] = __byte_perm (w[52], w[53], selector);
|
|
w[60] = __byte_perm (w[51], w[52], selector);
|
|
w[59] = __byte_perm (w[50], w[51], selector);
|
|
w[58] = __byte_perm (w[49], w[50], selector);
|
|
w[57] = __byte_perm (w[48], w[49], selector);
|
|
w[56] = __byte_perm (w[47], w[48], selector);
|
|
w[55] = __byte_perm (w[46], w[47], selector);
|
|
w[54] = __byte_perm (w[45], w[46], selector);
|
|
w[53] = __byte_perm (w[44], w[45], selector);
|
|
w[52] = __byte_perm (w[43], w[44], selector);
|
|
w[51] = __byte_perm (w[42], w[43], selector);
|
|
w[50] = __byte_perm (w[41], w[42], selector);
|
|
w[49] = __byte_perm (w[40], w[41], selector);
|
|
w[48] = __byte_perm (w[39], w[40], selector);
|
|
w[47] = __byte_perm (w[38], w[39], selector);
|
|
w[46] = __byte_perm (w[37], w[38], selector);
|
|
w[45] = __byte_perm (w[36], w[37], selector);
|
|
w[44] = __byte_perm (w[35], w[36], selector);
|
|
w[43] = __byte_perm (w[34], w[35], selector);
|
|
w[42] = __byte_perm (w[33], w[34], selector);
|
|
w[41] = __byte_perm (w[32], w[33], selector);
|
|
w[40] = __byte_perm (w[31], w[32], selector);
|
|
w[39] = __byte_perm (w[30], w[31], selector);
|
|
w[38] = __byte_perm (w[29], w[30], selector);
|
|
w[37] = __byte_perm (w[28], w[29], selector);
|
|
w[36] = __byte_perm (w[27], w[28], selector);
|
|
w[35] = __byte_perm (w[26], w[27], selector);
|
|
w[34] = __byte_perm (w[25], w[26], selector);
|
|
w[33] = __byte_perm (w[24], w[25], selector);
|
|
w[32] = __byte_perm (w[23], w[24], selector);
|
|
w[31] = __byte_perm (w[22], w[23], selector);
|
|
w[30] = __byte_perm (w[21], w[22], selector);
|
|
w[29] = __byte_perm (w[20], w[21], selector);
|
|
w[28] = __byte_perm (w[19], w[20], selector);
|
|
w[27] = __byte_perm (w[18], w[19], selector);
|
|
w[26] = __byte_perm (w[17], w[18], selector);
|
|
w[25] = __byte_perm (w[16], w[17], selector);
|
|
w[24] = __byte_perm (w[15], w[16], selector);
|
|
w[23] = __byte_perm (w[14], w[15], selector);
|
|
w[22] = __byte_perm (w[13], w[14], selector);
|
|
w[21] = __byte_perm (w[12], w[13], selector);
|
|
w[20] = __byte_perm (w[11], w[12], selector);
|
|
w[19] = __byte_perm (w[10], w[11], selector);
|
|
w[18] = __byte_perm (w[ 9], w[10], selector);
|
|
w[17] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[16] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[15] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[14] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[13] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[12] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[11] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[10] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[ 9] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 8] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = __byte_perm (w[53], w[54], selector);
|
|
w[62] = __byte_perm (w[52], w[53], selector);
|
|
w[61] = __byte_perm (w[51], w[52], selector);
|
|
w[60] = __byte_perm (w[50], w[51], selector);
|
|
w[59] = __byte_perm (w[49], w[50], selector);
|
|
w[58] = __byte_perm (w[48], w[49], selector);
|
|
w[57] = __byte_perm (w[47], w[48], selector);
|
|
w[56] = __byte_perm (w[46], w[47], selector);
|
|
w[55] = __byte_perm (w[45], w[46], selector);
|
|
w[54] = __byte_perm (w[44], w[45], selector);
|
|
w[53] = __byte_perm (w[43], w[44], selector);
|
|
w[52] = __byte_perm (w[42], w[43], selector);
|
|
w[51] = __byte_perm (w[41], w[42], selector);
|
|
w[50] = __byte_perm (w[40], w[41], selector);
|
|
w[49] = __byte_perm (w[39], w[40], selector);
|
|
w[48] = __byte_perm (w[38], w[39], selector);
|
|
w[47] = __byte_perm (w[37], w[38], selector);
|
|
w[46] = __byte_perm (w[36], w[37], selector);
|
|
w[45] = __byte_perm (w[35], w[36], selector);
|
|
w[44] = __byte_perm (w[34], w[35], selector);
|
|
w[43] = __byte_perm (w[33], w[34], selector);
|
|
w[42] = __byte_perm (w[32], w[33], selector);
|
|
w[41] = __byte_perm (w[31], w[32], selector);
|
|
w[40] = __byte_perm (w[30], w[31], selector);
|
|
w[39] = __byte_perm (w[29], w[30], selector);
|
|
w[38] = __byte_perm (w[28], w[29], selector);
|
|
w[37] = __byte_perm (w[27], w[28], selector);
|
|
w[36] = __byte_perm (w[26], w[27], selector);
|
|
w[35] = __byte_perm (w[25], w[26], selector);
|
|
w[34] = __byte_perm (w[24], w[25], selector);
|
|
w[33] = __byte_perm (w[23], w[24], selector);
|
|
w[32] = __byte_perm (w[22], w[23], selector);
|
|
w[31] = __byte_perm (w[21], w[22], selector);
|
|
w[30] = __byte_perm (w[20], w[21], selector);
|
|
w[29] = __byte_perm (w[19], w[20], selector);
|
|
w[28] = __byte_perm (w[18], w[19], selector);
|
|
w[27] = __byte_perm (w[17], w[18], selector);
|
|
w[26] = __byte_perm (w[16], w[17], selector);
|
|
w[25] = __byte_perm (w[15], w[16], selector);
|
|
w[24] = __byte_perm (w[14], w[15], selector);
|
|
w[23] = __byte_perm (w[13], w[14], selector);
|
|
w[22] = __byte_perm (w[12], w[13], selector);
|
|
w[21] = __byte_perm (w[11], w[12], selector);
|
|
w[20] = __byte_perm (w[10], w[11], selector);
|
|
w[19] = __byte_perm (w[ 9], w[10], selector);
|
|
w[18] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[17] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[16] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[15] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[14] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[13] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[12] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[11] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[10] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[ 9] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = __byte_perm (w[52], w[53], selector);
|
|
w[62] = __byte_perm (w[51], w[52], selector);
|
|
w[61] = __byte_perm (w[50], w[51], selector);
|
|
w[60] = __byte_perm (w[49], w[50], selector);
|
|
w[59] = __byte_perm (w[48], w[49], selector);
|
|
w[58] = __byte_perm (w[47], w[48], selector);
|
|
w[57] = __byte_perm (w[46], w[47], selector);
|
|
w[56] = __byte_perm (w[45], w[46], selector);
|
|
w[55] = __byte_perm (w[44], w[45], selector);
|
|
w[54] = __byte_perm (w[43], w[44], selector);
|
|
w[53] = __byte_perm (w[42], w[43], selector);
|
|
w[52] = __byte_perm (w[41], w[42], selector);
|
|
w[51] = __byte_perm (w[40], w[41], selector);
|
|
w[50] = __byte_perm (w[39], w[40], selector);
|
|
w[49] = __byte_perm (w[38], w[39], selector);
|
|
w[48] = __byte_perm (w[37], w[38], selector);
|
|
w[47] = __byte_perm (w[36], w[37], selector);
|
|
w[46] = __byte_perm (w[35], w[36], selector);
|
|
w[45] = __byte_perm (w[34], w[35], selector);
|
|
w[44] = __byte_perm (w[33], w[34], selector);
|
|
w[43] = __byte_perm (w[32], w[33], selector);
|
|
w[42] = __byte_perm (w[31], w[32], selector);
|
|
w[41] = __byte_perm (w[30], w[31], selector);
|
|
w[40] = __byte_perm (w[29], w[30], selector);
|
|
w[39] = __byte_perm (w[28], w[29], selector);
|
|
w[38] = __byte_perm (w[27], w[28], selector);
|
|
w[37] = __byte_perm (w[26], w[27], selector);
|
|
w[36] = __byte_perm (w[25], w[26], selector);
|
|
w[35] = __byte_perm (w[24], w[25], selector);
|
|
w[34] = __byte_perm (w[23], w[24], selector);
|
|
w[33] = __byte_perm (w[22], w[23], selector);
|
|
w[32] = __byte_perm (w[21], w[22], selector);
|
|
w[31] = __byte_perm (w[20], w[21], selector);
|
|
w[30] = __byte_perm (w[19], w[20], selector);
|
|
w[29] = __byte_perm (w[18], w[19], selector);
|
|
w[28] = __byte_perm (w[17], w[18], selector);
|
|
w[27] = __byte_perm (w[16], w[17], selector);
|
|
w[26] = __byte_perm (w[15], w[16], selector);
|
|
w[25] = __byte_perm (w[14], w[15], selector);
|
|
w[24] = __byte_perm (w[13], w[14], selector);
|
|
w[23] = __byte_perm (w[12], w[13], selector);
|
|
w[22] = __byte_perm (w[11], w[12], selector);
|
|
w[21] = __byte_perm (w[10], w[11], selector);
|
|
w[20] = __byte_perm (w[ 9], w[10], selector);
|
|
w[19] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[18] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[17] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[16] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[15] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[14] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[13] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[12] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[11] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[10] = __byte_perm ( 0, w[ 0], selector);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = __byte_perm (w[51], w[52], selector);
|
|
w[62] = __byte_perm (w[50], w[51], selector);
|
|
w[61] = __byte_perm (w[49], w[50], selector);
|
|
w[60] = __byte_perm (w[48], w[49], selector);
|
|
w[59] = __byte_perm (w[47], w[48], selector);
|
|
w[58] = __byte_perm (w[46], w[47], selector);
|
|
w[57] = __byte_perm (w[45], w[46], selector);
|
|
w[56] = __byte_perm (w[44], w[45], selector);
|
|
w[55] = __byte_perm (w[43], w[44], selector);
|
|
w[54] = __byte_perm (w[42], w[43], selector);
|
|
w[53] = __byte_perm (w[41], w[42], selector);
|
|
w[52] = __byte_perm (w[40], w[41], selector);
|
|
w[51] = __byte_perm (w[39], w[40], selector);
|
|
w[50] = __byte_perm (w[38], w[39], selector);
|
|
w[49] = __byte_perm (w[37], w[38], selector);
|
|
w[48] = __byte_perm (w[36], w[37], selector);
|
|
w[47] = __byte_perm (w[35], w[36], selector);
|
|
w[46] = __byte_perm (w[34], w[35], selector);
|
|
w[45] = __byte_perm (w[33], w[34], selector);
|
|
w[44] = __byte_perm (w[32], w[33], selector);
|
|
w[43] = __byte_perm (w[31], w[32], selector);
|
|
w[42] = __byte_perm (w[30], w[31], selector);
|
|
w[41] = __byte_perm (w[29], w[30], selector);
|
|
w[40] = __byte_perm (w[28], w[29], selector);
|
|
w[39] = __byte_perm (w[27], w[28], selector);
|
|
w[38] = __byte_perm (w[26], w[27], selector);
|
|
w[37] = __byte_perm (w[25], w[26], selector);
|
|
w[36] = __byte_perm (w[24], w[25], selector);
|
|
w[35] = __byte_perm (w[23], w[24], selector);
|
|
w[34] = __byte_perm (w[22], w[23], selector);
|
|
w[33] = __byte_perm (w[21], w[22], selector);
|
|
w[32] = __byte_perm (w[20], w[21], selector);
|
|
w[31] = __byte_perm (w[19], w[20], selector);
|
|
w[30] = __byte_perm (w[18], w[19], selector);
|
|
w[29] = __byte_perm (w[17], w[18], selector);
|
|
w[28] = __byte_perm (w[16], w[17], selector);
|
|
w[27] = __byte_perm (w[15], w[16], selector);
|
|
w[26] = __byte_perm (w[14], w[15], selector);
|
|
w[25] = __byte_perm (w[13], w[14], selector);
|
|
w[24] = __byte_perm (w[12], w[13], selector);
|
|
w[23] = __byte_perm (w[11], w[12], selector);
|
|
w[22] = __byte_perm (w[10], w[11], selector);
|
|
w[21] = __byte_perm (w[ 9], w[10], selector);
|
|
w[20] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[19] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[18] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[17] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[16] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[15] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[14] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[13] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[12] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[11] = __byte_perm ( 0, w[ 0], selector);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = __byte_perm (w[50], w[51], selector);
|
|
w[62] = __byte_perm (w[49], w[50], selector);
|
|
w[61] = __byte_perm (w[48], w[49], selector);
|
|
w[60] = __byte_perm (w[47], w[48], selector);
|
|
w[59] = __byte_perm (w[46], w[47], selector);
|
|
w[58] = __byte_perm (w[45], w[46], selector);
|
|
w[57] = __byte_perm (w[44], w[45], selector);
|
|
w[56] = __byte_perm (w[43], w[44], selector);
|
|
w[55] = __byte_perm (w[42], w[43], selector);
|
|
w[54] = __byte_perm (w[41], w[42], selector);
|
|
w[53] = __byte_perm (w[40], w[41], selector);
|
|
w[52] = __byte_perm (w[39], w[40], selector);
|
|
w[51] = __byte_perm (w[38], w[39], selector);
|
|
w[50] = __byte_perm (w[37], w[38], selector);
|
|
w[49] = __byte_perm (w[36], w[37], selector);
|
|
w[48] = __byte_perm (w[35], w[36], selector);
|
|
w[47] = __byte_perm (w[34], w[35], selector);
|
|
w[46] = __byte_perm (w[33], w[34], selector);
|
|
w[45] = __byte_perm (w[32], w[33], selector);
|
|
w[44] = __byte_perm (w[31], w[32], selector);
|
|
w[43] = __byte_perm (w[30], w[31], selector);
|
|
w[42] = __byte_perm (w[29], w[30], selector);
|
|
w[41] = __byte_perm (w[28], w[29], selector);
|
|
w[40] = __byte_perm (w[27], w[28], selector);
|
|
w[39] = __byte_perm (w[26], w[27], selector);
|
|
w[38] = __byte_perm (w[25], w[26], selector);
|
|
w[37] = __byte_perm (w[24], w[25], selector);
|
|
w[36] = __byte_perm (w[23], w[24], selector);
|
|
w[35] = __byte_perm (w[22], w[23], selector);
|
|
w[34] = __byte_perm (w[21], w[22], selector);
|
|
w[33] = __byte_perm (w[20], w[21], selector);
|
|
w[32] = __byte_perm (w[19], w[20], selector);
|
|
w[31] = __byte_perm (w[18], w[19], selector);
|
|
w[30] = __byte_perm (w[17], w[18], selector);
|
|
w[29] = __byte_perm (w[16], w[17], selector);
|
|
w[28] = __byte_perm (w[15], w[16], selector);
|
|
w[27] = __byte_perm (w[14], w[15], selector);
|
|
w[26] = __byte_perm (w[13], w[14], selector);
|
|
w[25] = __byte_perm (w[12], w[13], selector);
|
|
w[24] = __byte_perm (w[11], w[12], selector);
|
|
w[23] = __byte_perm (w[10], w[11], selector);
|
|
w[22] = __byte_perm (w[ 9], w[10], selector);
|
|
w[21] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[20] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[19] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[18] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[17] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[16] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[15] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[14] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[13] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[12] = __byte_perm ( 0, w[ 0], selector);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = __byte_perm (w[49], w[50], selector);
|
|
w[62] = __byte_perm (w[48], w[49], selector);
|
|
w[61] = __byte_perm (w[47], w[48], selector);
|
|
w[60] = __byte_perm (w[46], w[47], selector);
|
|
w[59] = __byte_perm (w[45], w[46], selector);
|
|
w[58] = __byte_perm (w[44], w[45], selector);
|
|
w[57] = __byte_perm (w[43], w[44], selector);
|
|
w[56] = __byte_perm (w[42], w[43], selector);
|
|
w[55] = __byte_perm (w[41], w[42], selector);
|
|
w[54] = __byte_perm (w[40], w[41], selector);
|
|
w[53] = __byte_perm (w[39], w[40], selector);
|
|
w[52] = __byte_perm (w[38], w[39], selector);
|
|
w[51] = __byte_perm (w[37], w[38], selector);
|
|
w[50] = __byte_perm (w[36], w[37], selector);
|
|
w[49] = __byte_perm (w[35], w[36], selector);
|
|
w[48] = __byte_perm (w[34], w[35], selector);
|
|
w[47] = __byte_perm (w[33], w[34], selector);
|
|
w[46] = __byte_perm (w[32], w[33], selector);
|
|
w[45] = __byte_perm (w[31], w[32], selector);
|
|
w[44] = __byte_perm (w[30], w[31], selector);
|
|
w[43] = __byte_perm (w[29], w[30], selector);
|
|
w[42] = __byte_perm (w[28], w[29], selector);
|
|
w[41] = __byte_perm (w[27], w[28], selector);
|
|
w[40] = __byte_perm (w[26], w[27], selector);
|
|
w[39] = __byte_perm (w[25], w[26], selector);
|
|
w[38] = __byte_perm (w[24], w[25], selector);
|
|
w[37] = __byte_perm (w[23], w[24], selector);
|
|
w[36] = __byte_perm (w[22], w[23], selector);
|
|
w[35] = __byte_perm (w[21], w[22], selector);
|
|
w[34] = __byte_perm (w[20], w[21], selector);
|
|
w[33] = __byte_perm (w[19], w[20], selector);
|
|
w[32] = __byte_perm (w[18], w[19], selector);
|
|
w[31] = __byte_perm (w[17], w[18], selector);
|
|
w[30] = __byte_perm (w[16], w[17], selector);
|
|
w[29] = __byte_perm (w[15], w[16], selector);
|
|
w[28] = __byte_perm (w[14], w[15], selector);
|
|
w[27] = __byte_perm (w[13], w[14], selector);
|
|
w[26] = __byte_perm (w[12], w[13], selector);
|
|
w[25] = __byte_perm (w[11], w[12], selector);
|
|
w[24] = __byte_perm (w[10], w[11], selector);
|
|
w[23] = __byte_perm (w[ 9], w[10], selector);
|
|
w[22] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[21] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[20] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[19] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[18] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[17] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[16] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[15] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[14] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[13] = __byte_perm ( 0, w[ 0], selector);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = __byte_perm (w[48], w[49], selector);
|
|
w[62] = __byte_perm (w[47], w[48], selector);
|
|
w[61] = __byte_perm (w[46], w[47], selector);
|
|
w[60] = __byte_perm (w[45], w[46], selector);
|
|
w[59] = __byte_perm (w[44], w[45], selector);
|
|
w[58] = __byte_perm (w[43], w[44], selector);
|
|
w[57] = __byte_perm (w[42], w[43], selector);
|
|
w[56] = __byte_perm (w[41], w[42], selector);
|
|
w[55] = __byte_perm (w[40], w[41], selector);
|
|
w[54] = __byte_perm (w[39], w[40], selector);
|
|
w[53] = __byte_perm (w[38], w[39], selector);
|
|
w[52] = __byte_perm (w[37], w[38], selector);
|
|
w[51] = __byte_perm (w[36], w[37], selector);
|
|
w[50] = __byte_perm (w[35], w[36], selector);
|
|
w[49] = __byte_perm (w[34], w[35], selector);
|
|
w[48] = __byte_perm (w[33], w[34], selector);
|
|
w[47] = __byte_perm (w[32], w[33], selector);
|
|
w[46] = __byte_perm (w[31], w[32], selector);
|
|
w[45] = __byte_perm (w[30], w[31], selector);
|
|
w[44] = __byte_perm (w[29], w[30], selector);
|
|
w[43] = __byte_perm (w[28], w[29], selector);
|
|
w[42] = __byte_perm (w[27], w[28], selector);
|
|
w[41] = __byte_perm (w[26], w[27], selector);
|
|
w[40] = __byte_perm (w[25], w[26], selector);
|
|
w[39] = __byte_perm (w[24], w[25], selector);
|
|
w[38] = __byte_perm (w[23], w[24], selector);
|
|
w[37] = __byte_perm (w[22], w[23], selector);
|
|
w[36] = __byte_perm (w[21], w[22], selector);
|
|
w[35] = __byte_perm (w[20], w[21], selector);
|
|
w[34] = __byte_perm (w[19], w[20], selector);
|
|
w[33] = __byte_perm (w[18], w[19], selector);
|
|
w[32] = __byte_perm (w[17], w[18], selector);
|
|
w[31] = __byte_perm (w[16], w[17], selector);
|
|
w[30] = __byte_perm (w[15], w[16], selector);
|
|
w[29] = __byte_perm (w[14], w[15], selector);
|
|
w[28] = __byte_perm (w[13], w[14], selector);
|
|
w[27] = __byte_perm (w[12], w[13], selector);
|
|
w[26] = __byte_perm (w[11], w[12], selector);
|
|
w[25] = __byte_perm (w[10], w[11], selector);
|
|
w[24] = __byte_perm (w[ 9], w[10], selector);
|
|
w[23] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[22] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[21] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[20] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[19] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[18] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[17] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[16] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[15] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[14] = __byte_perm ( 0, w[ 0], selector);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = __byte_perm (w[47], w[48], selector);
|
|
w[62] = __byte_perm (w[46], w[47], selector);
|
|
w[61] = __byte_perm (w[45], w[46], selector);
|
|
w[60] = __byte_perm (w[44], w[45], selector);
|
|
w[59] = __byte_perm (w[43], w[44], selector);
|
|
w[58] = __byte_perm (w[42], w[43], selector);
|
|
w[57] = __byte_perm (w[41], w[42], selector);
|
|
w[56] = __byte_perm (w[40], w[41], selector);
|
|
w[55] = __byte_perm (w[39], w[40], selector);
|
|
w[54] = __byte_perm (w[38], w[39], selector);
|
|
w[53] = __byte_perm (w[37], w[38], selector);
|
|
w[52] = __byte_perm (w[36], w[37], selector);
|
|
w[51] = __byte_perm (w[35], w[36], selector);
|
|
w[50] = __byte_perm (w[34], w[35], selector);
|
|
w[49] = __byte_perm (w[33], w[34], selector);
|
|
w[48] = __byte_perm (w[32], w[33], selector);
|
|
w[47] = __byte_perm (w[31], w[32], selector);
|
|
w[46] = __byte_perm (w[30], w[31], selector);
|
|
w[45] = __byte_perm (w[29], w[30], selector);
|
|
w[44] = __byte_perm (w[28], w[29], selector);
|
|
w[43] = __byte_perm (w[27], w[28], selector);
|
|
w[42] = __byte_perm (w[26], w[27], selector);
|
|
w[41] = __byte_perm (w[25], w[26], selector);
|
|
w[40] = __byte_perm (w[24], w[25], selector);
|
|
w[39] = __byte_perm (w[23], w[24], selector);
|
|
w[38] = __byte_perm (w[22], w[23], selector);
|
|
w[37] = __byte_perm (w[21], w[22], selector);
|
|
w[36] = __byte_perm (w[20], w[21], selector);
|
|
w[35] = __byte_perm (w[19], w[20], selector);
|
|
w[34] = __byte_perm (w[18], w[19], selector);
|
|
w[33] = __byte_perm (w[17], w[18], selector);
|
|
w[32] = __byte_perm (w[16], w[17], selector);
|
|
w[31] = __byte_perm (w[15], w[16], selector);
|
|
w[30] = __byte_perm (w[14], w[15], selector);
|
|
w[29] = __byte_perm (w[13], w[14], selector);
|
|
w[28] = __byte_perm (w[12], w[13], selector);
|
|
w[27] = __byte_perm (w[11], w[12], selector);
|
|
w[26] = __byte_perm (w[10], w[11], selector);
|
|
w[25] = __byte_perm (w[ 9], w[10], selector);
|
|
w[24] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[23] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[22] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[21] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[20] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[19] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[18] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[17] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[16] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[15] = __byte_perm ( 0, w[ 0], selector);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = __byte_perm (w[46], w[47], selector);
|
|
w[62] = __byte_perm (w[45], w[46], selector);
|
|
w[61] = __byte_perm (w[44], w[45], selector);
|
|
w[60] = __byte_perm (w[43], w[44], selector);
|
|
w[59] = __byte_perm (w[42], w[43], selector);
|
|
w[58] = __byte_perm (w[41], w[42], selector);
|
|
w[57] = __byte_perm (w[40], w[41], selector);
|
|
w[56] = __byte_perm (w[39], w[40], selector);
|
|
w[55] = __byte_perm (w[38], w[39], selector);
|
|
w[54] = __byte_perm (w[37], w[38], selector);
|
|
w[53] = __byte_perm (w[36], w[37], selector);
|
|
w[52] = __byte_perm (w[35], w[36], selector);
|
|
w[51] = __byte_perm (w[34], w[35], selector);
|
|
w[50] = __byte_perm (w[33], w[34], selector);
|
|
w[49] = __byte_perm (w[32], w[33], selector);
|
|
w[48] = __byte_perm (w[31], w[32], selector);
|
|
w[47] = __byte_perm (w[30], w[31], selector);
|
|
w[46] = __byte_perm (w[29], w[30], selector);
|
|
w[45] = __byte_perm (w[28], w[29], selector);
|
|
w[44] = __byte_perm (w[27], w[28], selector);
|
|
w[43] = __byte_perm (w[26], w[27], selector);
|
|
w[42] = __byte_perm (w[25], w[26], selector);
|
|
w[41] = __byte_perm (w[24], w[25], selector);
|
|
w[40] = __byte_perm (w[23], w[24], selector);
|
|
w[39] = __byte_perm (w[22], w[23], selector);
|
|
w[38] = __byte_perm (w[21], w[22], selector);
|
|
w[37] = __byte_perm (w[20], w[21], selector);
|
|
w[36] = __byte_perm (w[19], w[20], selector);
|
|
w[35] = __byte_perm (w[18], w[19], selector);
|
|
w[34] = __byte_perm (w[17], w[18], selector);
|
|
w[33] = __byte_perm (w[16], w[17], selector);
|
|
w[32] = __byte_perm (w[15], w[16], selector);
|
|
w[31] = __byte_perm (w[14], w[15], selector);
|
|
w[30] = __byte_perm (w[13], w[14], selector);
|
|
w[29] = __byte_perm (w[12], w[13], selector);
|
|
w[28] = __byte_perm (w[11], w[12], selector);
|
|
w[27] = __byte_perm (w[10], w[11], selector);
|
|
w[26] = __byte_perm (w[ 9], w[10], selector);
|
|
w[25] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[24] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[23] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[22] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[21] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[20] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[19] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[18] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[17] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[16] = __byte_perm ( 0, w[ 0], selector);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = __byte_perm (w[45], w[46], selector);
|
|
w[62] = __byte_perm (w[44], w[45], selector);
|
|
w[61] = __byte_perm (w[43], w[44], selector);
|
|
w[60] = __byte_perm (w[42], w[43], selector);
|
|
w[59] = __byte_perm (w[41], w[42], selector);
|
|
w[58] = __byte_perm (w[40], w[41], selector);
|
|
w[57] = __byte_perm (w[39], w[40], selector);
|
|
w[56] = __byte_perm (w[38], w[39], selector);
|
|
w[55] = __byte_perm (w[37], w[38], selector);
|
|
w[54] = __byte_perm (w[36], w[37], selector);
|
|
w[53] = __byte_perm (w[35], w[36], selector);
|
|
w[52] = __byte_perm (w[34], w[35], selector);
|
|
w[51] = __byte_perm (w[33], w[34], selector);
|
|
w[50] = __byte_perm (w[32], w[33], selector);
|
|
w[49] = __byte_perm (w[31], w[32], selector);
|
|
w[48] = __byte_perm (w[30], w[31], selector);
|
|
w[47] = __byte_perm (w[29], w[30], selector);
|
|
w[46] = __byte_perm (w[28], w[29], selector);
|
|
w[45] = __byte_perm (w[27], w[28], selector);
|
|
w[44] = __byte_perm (w[26], w[27], selector);
|
|
w[43] = __byte_perm (w[25], w[26], selector);
|
|
w[42] = __byte_perm (w[24], w[25], selector);
|
|
w[41] = __byte_perm (w[23], w[24], selector);
|
|
w[40] = __byte_perm (w[22], w[23], selector);
|
|
w[39] = __byte_perm (w[21], w[22], selector);
|
|
w[38] = __byte_perm (w[20], w[21], selector);
|
|
w[37] = __byte_perm (w[19], w[20], selector);
|
|
w[36] = __byte_perm (w[18], w[19], selector);
|
|
w[35] = __byte_perm (w[17], w[18], selector);
|
|
w[34] = __byte_perm (w[16], w[17], selector);
|
|
w[33] = __byte_perm (w[15], w[16], selector);
|
|
w[32] = __byte_perm (w[14], w[15], selector);
|
|
w[31] = __byte_perm (w[13], w[14], selector);
|
|
w[30] = __byte_perm (w[12], w[13], selector);
|
|
w[29] = __byte_perm (w[11], w[12], selector);
|
|
w[28] = __byte_perm (w[10], w[11], selector);
|
|
w[27] = __byte_perm (w[ 9], w[10], selector);
|
|
w[26] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[25] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[24] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[23] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[22] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[21] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[20] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[19] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[18] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[17] = __byte_perm ( 0, w[ 0], selector);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = __byte_perm (w[44], w[45], selector);
|
|
w[62] = __byte_perm (w[43], w[44], selector);
|
|
w[61] = __byte_perm (w[42], w[43], selector);
|
|
w[60] = __byte_perm (w[41], w[42], selector);
|
|
w[59] = __byte_perm (w[40], w[41], selector);
|
|
w[58] = __byte_perm (w[39], w[40], selector);
|
|
w[57] = __byte_perm (w[38], w[39], selector);
|
|
w[56] = __byte_perm (w[37], w[38], selector);
|
|
w[55] = __byte_perm (w[36], w[37], selector);
|
|
w[54] = __byte_perm (w[35], w[36], selector);
|
|
w[53] = __byte_perm (w[34], w[35], selector);
|
|
w[52] = __byte_perm (w[33], w[34], selector);
|
|
w[51] = __byte_perm (w[32], w[33], selector);
|
|
w[50] = __byte_perm (w[31], w[32], selector);
|
|
w[49] = __byte_perm (w[30], w[31], selector);
|
|
w[48] = __byte_perm (w[29], w[30], selector);
|
|
w[47] = __byte_perm (w[28], w[29], selector);
|
|
w[46] = __byte_perm (w[27], w[28], selector);
|
|
w[45] = __byte_perm (w[26], w[27], selector);
|
|
w[44] = __byte_perm (w[25], w[26], selector);
|
|
w[43] = __byte_perm (w[24], w[25], selector);
|
|
w[42] = __byte_perm (w[23], w[24], selector);
|
|
w[41] = __byte_perm (w[22], w[23], selector);
|
|
w[40] = __byte_perm (w[21], w[22], selector);
|
|
w[39] = __byte_perm (w[20], w[21], selector);
|
|
w[38] = __byte_perm (w[19], w[20], selector);
|
|
w[37] = __byte_perm (w[18], w[19], selector);
|
|
w[36] = __byte_perm (w[17], w[18], selector);
|
|
w[35] = __byte_perm (w[16], w[17], selector);
|
|
w[34] = __byte_perm (w[15], w[16], selector);
|
|
w[33] = __byte_perm (w[14], w[15], selector);
|
|
w[32] = __byte_perm (w[13], w[14], selector);
|
|
w[31] = __byte_perm (w[12], w[13], selector);
|
|
w[30] = __byte_perm (w[11], w[12], selector);
|
|
w[29] = __byte_perm (w[10], w[11], selector);
|
|
w[28] = __byte_perm (w[ 9], w[10], selector);
|
|
w[27] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[26] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[25] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[24] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[23] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[22] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[21] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[20] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[19] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[18] = __byte_perm ( 0, w[ 0], selector);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = __byte_perm (w[43], w[44], selector);
|
|
w[62] = __byte_perm (w[42], w[43], selector);
|
|
w[61] = __byte_perm (w[41], w[42], selector);
|
|
w[60] = __byte_perm (w[40], w[41], selector);
|
|
w[59] = __byte_perm (w[39], w[40], selector);
|
|
w[58] = __byte_perm (w[38], w[39], selector);
|
|
w[57] = __byte_perm (w[37], w[38], selector);
|
|
w[56] = __byte_perm (w[36], w[37], selector);
|
|
w[55] = __byte_perm (w[35], w[36], selector);
|
|
w[54] = __byte_perm (w[34], w[35], selector);
|
|
w[53] = __byte_perm (w[33], w[34], selector);
|
|
w[52] = __byte_perm (w[32], w[33], selector);
|
|
w[51] = __byte_perm (w[31], w[32], selector);
|
|
w[50] = __byte_perm (w[30], w[31], selector);
|
|
w[49] = __byte_perm (w[29], w[30], selector);
|
|
w[48] = __byte_perm (w[28], w[29], selector);
|
|
w[47] = __byte_perm (w[27], w[28], selector);
|
|
w[46] = __byte_perm (w[26], w[27], selector);
|
|
w[45] = __byte_perm (w[25], w[26], selector);
|
|
w[44] = __byte_perm (w[24], w[25], selector);
|
|
w[43] = __byte_perm (w[23], w[24], selector);
|
|
w[42] = __byte_perm (w[22], w[23], selector);
|
|
w[41] = __byte_perm (w[21], w[22], selector);
|
|
w[40] = __byte_perm (w[20], w[21], selector);
|
|
w[39] = __byte_perm (w[19], w[20], selector);
|
|
w[38] = __byte_perm (w[18], w[19], selector);
|
|
w[37] = __byte_perm (w[17], w[18], selector);
|
|
w[36] = __byte_perm (w[16], w[17], selector);
|
|
w[35] = __byte_perm (w[15], w[16], selector);
|
|
w[34] = __byte_perm (w[14], w[15], selector);
|
|
w[33] = __byte_perm (w[13], w[14], selector);
|
|
w[32] = __byte_perm (w[12], w[13], selector);
|
|
w[31] = __byte_perm (w[11], w[12], selector);
|
|
w[30] = __byte_perm (w[10], w[11], selector);
|
|
w[29] = __byte_perm (w[ 9], w[10], selector);
|
|
w[28] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[27] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[26] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[25] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[24] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[23] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[22] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[21] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[20] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[19] = __byte_perm ( 0, w[ 0], selector);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = __byte_perm (w[42], w[43], selector);
|
|
w[62] = __byte_perm (w[41], w[42], selector);
|
|
w[61] = __byte_perm (w[40], w[41], selector);
|
|
w[60] = __byte_perm (w[39], w[40], selector);
|
|
w[59] = __byte_perm (w[38], w[39], selector);
|
|
w[58] = __byte_perm (w[37], w[38], selector);
|
|
w[57] = __byte_perm (w[36], w[37], selector);
|
|
w[56] = __byte_perm (w[35], w[36], selector);
|
|
w[55] = __byte_perm (w[34], w[35], selector);
|
|
w[54] = __byte_perm (w[33], w[34], selector);
|
|
w[53] = __byte_perm (w[32], w[33], selector);
|
|
w[52] = __byte_perm (w[31], w[32], selector);
|
|
w[51] = __byte_perm (w[30], w[31], selector);
|
|
w[50] = __byte_perm (w[29], w[30], selector);
|
|
w[49] = __byte_perm (w[28], w[29], selector);
|
|
w[48] = __byte_perm (w[27], w[28], selector);
|
|
w[47] = __byte_perm (w[26], w[27], selector);
|
|
w[46] = __byte_perm (w[25], w[26], selector);
|
|
w[45] = __byte_perm (w[24], w[25], selector);
|
|
w[44] = __byte_perm (w[23], w[24], selector);
|
|
w[43] = __byte_perm (w[22], w[23], selector);
|
|
w[42] = __byte_perm (w[21], w[22], selector);
|
|
w[41] = __byte_perm (w[20], w[21], selector);
|
|
w[40] = __byte_perm (w[19], w[20], selector);
|
|
w[39] = __byte_perm (w[18], w[19], selector);
|
|
w[38] = __byte_perm (w[17], w[18], selector);
|
|
w[37] = __byte_perm (w[16], w[17], selector);
|
|
w[36] = __byte_perm (w[15], w[16], selector);
|
|
w[35] = __byte_perm (w[14], w[15], selector);
|
|
w[34] = __byte_perm (w[13], w[14], selector);
|
|
w[33] = __byte_perm (w[12], w[13], selector);
|
|
w[32] = __byte_perm (w[11], w[12], selector);
|
|
w[31] = __byte_perm (w[10], w[11], selector);
|
|
w[30] = __byte_perm (w[ 9], w[10], selector);
|
|
w[29] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[28] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[27] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[26] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[25] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[24] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[23] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[22] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[21] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[20] = __byte_perm ( 0, w[ 0], selector);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = __byte_perm (w[41], w[42], selector);
|
|
w[62] = __byte_perm (w[40], w[41], selector);
|
|
w[61] = __byte_perm (w[39], w[40], selector);
|
|
w[60] = __byte_perm (w[38], w[39], selector);
|
|
w[59] = __byte_perm (w[37], w[38], selector);
|
|
w[58] = __byte_perm (w[36], w[37], selector);
|
|
w[57] = __byte_perm (w[35], w[36], selector);
|
|
w[56] = __byte_perm (w[34], w[35], selector);
|
|
w[55] = __byte_perm (w[33], w[34], selector);
|
|
w[54] = __byte_perm (w[32], w[33], selector);
|
|
w[53] = __byte_perm (w[31], w[32], selector);
|
|
w[52] = __byte_perm (w[30], w[31], selector);
|
|
w[51] = __byte_perm (w[29], w[30], selector);
|
|
w[50] = __byte_perm (w[28], w[29], selector);
|
|
w[49] = __byte_perm (w[27], w[28], selector);
|
|
w[48] = __byte_perm (w[26], w[27], selector);
|
|
w[47] = __byte_perm (w[25], w[26], selector);
|
|
w[46] = __byte_perm (w[24], w[25], selector);
|
|
w[45] = __byte_perm (w[23], w[24], selector);
|
|
w[44] = __byte_perm (w[22], w[23], selector);
|
|
w[43] = __byte_perm (w[21], w[22], selector);
|
|
w[42] = __byte_perm (w[20], w[21], selector);
|
|
w[41] = __byte_perm (w[19], w[20], selector);
|
|
w[40] = __byte_perm (w[18], w[19], selector);
|
|
w[39] = __byte_perm (w[17], w[18], selector);
|
|
w[38] = __byte_perm (w[16], w[17], selector);
|
|
w[37] = __byte_perm (w[15], w[16], selector);
|
|
w[36] = __byte_perm (w[14], w[15], selector);
|
|
w[35] = __byte_perm (w[13], w[14], selector);
|
|
w[34] = __byte_perm (w[12], w[13], selector);
|
|
w[33] = __byte_perm (w[11], w[12], selector);
|
|
w[32] = __byte_perm (w[10], w[11], selector);
|
|
w[31] = __byte_perm (w[ 9], w[10], selector);
|
|
w[30] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[29] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[28] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[27] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[26] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[25] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[24] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[23] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[22] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[21] = __byte_perm ( 0, w[ 0], selector);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = __byte_perm (w[40], w[41], selector);
|
|
w[62] = __byte_perm (w[39], w[40], selector);
|
|
w[61] = __byte_perm (w[38], w[39], selector);
|
|
w[60] = __byte_perm (w[37], w[38], selector);
|
|
w[59] = __byte_perm (w[36], w[37], selector);
|
|
w[58] = __byte_perm (w[35], w[36], selector);
|
|
w[57] = __byte_perm (w[34], w[35], selector);
|
|
w[56] = __byte_perm (w[33], w[34], selector);
|
|
w[55] = __byte_perm (w[32], w[33], selector);
|
|
w[54] = __byte_perm (w[31], w[32], selector);
|
|
w[53] = __byte_perm (w[30], w[31], selector);
|
|
w[52] = __byte_perm (w[29], w[30], selector);
|
|
w[51] = __byte_perm (w[28], w[29], selector);
|
|
w[50] = __byte_perm (w[27], w[28], selector);
|
|
w[49] = __byte_perm (w[26], w[27], selector);
|
|
w[48] = __byte_perm (w[25], w[26], selector);
|
|
w[47] = __byte_perm (w[24], w[25], selector);
|
|
w[46] = __byte_perm (w[23], w[24], selector);
|
|
w[45] = __byte_perm (w[22], w[23], selector);
|
|
w[44] = __byte_perm (w[21], w[22], selector);
|
|
w[43] = __byte_perm (w[20], w[21], selector);
|
|
w[42] = __byte_perm (w[19], w[20], selector);
|
|
w[41] = __byte_perm (w[18], w[19], selector);
|
|
w[40] = __byte_perm (w[17], w[18], selector);
|
|
w[39] = __byte_perm (w[16], w[17], selector);
|
|
w[38] = __byte_perm (w[15], w[16], selector);
|
|
w[37] = __byte_perm (w[14], w[15], selector);
|
|
w[36] = __byte_perm (w[13], w[14], selector);
|
|
w[35] = __byte_perm (w[12], w[13], selector);
|
|
w[34] = __byte_perm (w[11], w[12], selector);
|
|
w[33] = __byte_perm (w[10], w[11], selector);
|
|
w[32] = __byte_perm (w[ 9], w[10], selector);
|
|
w[31] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[30] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[29] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[28] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[27] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[26] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[25] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[24] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[23] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[22] = __byte_perm ( 0, w[ 0], selector);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = __byte_perm (w[39], w[40], selector);
|
|
w[62] = __byte_perm (w[38], w[39], selector);
|
|
w[61] = __byte_perm (w[37], w[38], selector);
|
|
w[60] = __byte_perm (w[36], w[37], selector);
|
|
w[59] = __byte_perm (w[35], w[36], selector);
|
|
w[58] = __byte_perm (w[34], w[35], selector);
|
|
w[57] = __byte_perm (w[33], w[34], selector);
|
|
w[56] = __byte_perm (w[32], w[33], selector);
|
|
w[55] = __byte_perm (w[31], w[32], selector);
|
|
w[54] = __byte_perm (w[30], w[31], selector);
|
|
w[53] = __byte_perm (w[29], w[30], selector);
|
|
w[52] = __byte_perm (w[28], w[29], selector);
|
|
w[51] = __byte_perm (w[27], w[28], selector);
|
|
w[50] = __byte_perm (w[26], w[27], selector);
|
|
w[49] = __byte_perm (w[25], w[26], selector);
|
|
w[48] = __byte_perm (w[24], w[25], selector);
|
|
w[47] = __byte_perm (w[23], w[24], selector);
|
|
w[46] = __byte_perm (w[22], w[23], selector);
|
|
w[45] = __byte_perm (w[21], w[22], selector);
|
|
w[44] = __byte_perm (w[20], w[21], selector);
|
|
w[43] = __byte_perm (w[19], w[20], selector);
|
|
w[42] = __byte_perm (w[18], w[19], selector);
|
|
w[41] = __byte_perm (w[17], w[18], selector);
|
|
w[40] = __byte_perm (w[16], w[17], selector);
|
|
w[39] = __byte_perm (w[15], w[16], selector);
|
|
w[38] = __byte_perm (w[14], w[15], selector);
|
|
w[37] = __byte_perm (w[13], w[14], selector);
|
|
w[36] = __byte_perm (w[12], w[13], selector);
|
|
w[35] = __byte_perm (w[11], w[12], selector);
|
|
w[34] = __byte_perm (w[10], w[11], selector);
|
|
w[33] = __byte_perm (w[ 9], w[10], selector);
|
|
w[32] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[31] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[30] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[29] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[28] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[27] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[26] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[25] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[24] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[23] = __byte_perm ( 0, w[ 0], selector);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = __byte_perm (w[38], w[39], selector);
|
|
w[62] = __byte_perm (w[37], w[38], selector);
|
|
w[61] = __byte_perm (w[36], w[37], selector);
|
|
w[60] = __byte_perm (w[35], w[36], selector);
|
|
w[59] = __byte_perm (w[34], w[35], selector);
|
|
w[58] = __byte_perm (w[33], w[34], selector);
|
|
w[57] = __byte_perm (w[32], w[33], selector);
|
|
w[56] = __byte_perm (w[31], w[32], selector);
|
|
w[55] = __byte_perm (w[30], w[31], selector);
|
|
w[54] = __byte_perm (w[29], w[30], selector);
|
|
w[53] = __byte_perm (w[28], w[29], selector);
|
|
w[52] = __byte_perm (w[27], w[28], selector);
|
|
w[51] = __byte_perm (w[26], w[27], selector);
|
|
w[50] = __byte_perm (w[25], w[26], selector);
|
|
w[49] = __byte_perm (w[24], w[25], selector);
|
|
w[48] = __byte_perm (w[23], w[24], selector);
|
|
w[47] = __byte_perm (w[22], w[23], selector);
|
|
w[46] = __byte_perm (w[21], w[22], selector);
|
|
w[45] = __byte_perm (w[20], w[21], selector);
|
|
w[44] = __byte_perm (w[19], w[20], selector);
|
|
w[43] = __byte_perm (w[18], w[19], selector);
|
|
w[42] = __byte_perm (w[17], w[18], selector);
|
|
w[41] = __byte_perm (w[16], w[17], selector);
|
|
w[40] = __byte_perm (w[15], w[16], selector);
|
|
w[39] = __byte_perm (w[14], w[15], selector);
|
|
w[38] = __byte_perm (w[13], w[14], selector);
|
|
w[37] = __byte_perm (w[12], w[13], selector);
|
|
w[36] = __byte_perm (w[11], w[12], selector);
|
|
w[35] = __byte_perm (w[10], w[11], selector);
|
|
w[34] = __byte_perm (w[ 9], w[10], selector);
|
|
w[33] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[32] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[31] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[30] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[29] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[28] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[27] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[26] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[25] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[24] = __byte_perm ( 0, w[ 0], selector);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = __byte_perm (w[37], w[38], selector);
|
|
w[62] = __byte_perm (w[36], w[37], selector);
|
|
w[61] = __byte_perm (w[35], w[36], selector);
|
|
w[60] = __byte_perm (w[34], w[35], selector);
|
|
w[59] = __byte_perm (w[33], w[34], selector);
|
|
w[58] = __byte_perm (w[32], w[33], selector);
|
|
w[57] = __byte_perm (w[31], w[32], selector);
|
|
w[56] = __byte_perm (w[30], w[31], selector);
|
|
w[55] = __byte_perm (w[29], w[30], selector);
|
|
w[54] = __byte_perm (w[28], w[29], selector);
|
|
w[53] = __byte_perm (w[27], w[28], selector);
|
|
w[52] = __byte_perm (w[26], w[27], selector);
|
|
w[51] = __byte_perm (w[25], w[26], selector);
|
|
w[50] = __byte_perm (w[24], w[25], selector);
|
|
w[49] = __byte_perm (w[23], w[24], selector);
|
|
w[48] = __byte_perm (w[22], w[23], selector);
|
|
w[47] = __byte_perm (w[21], w[22], selector);
|
|
w[46] = __byte_perm (w[20], w[21], selector);
|
|
w[45] = __byte_perm (w[19], w[20], selector);
|
|
w[44] = __byte_perm (w[18], w[19], selector);
|
|
w[43] = __byte_perm (w[17], w[18], selector);
|
|
w[42] = __byte_perm (w[16], w[17], selector);
|
|
w[41] = __byte_perm (w[15], w[16], selector);
|
|
w[40] = __byte_perm (w[14], w[15], selector);
|
|
w[39] = __byte_perm (w[13], w[14], selector);
|
|
w[38] = __byte_perm (w[12], w[13], selector);
|
|
w[37] = __byte_perm (w[11], w[12], selector);
|
|
w[36] = __byte_perm (w[10], w[11], selector);
|
|
w[35] = __byte_perm (w[ 9], w[10], selector);
|
|
w[34] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[33] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[32] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[31] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[30] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[29] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[28] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[27] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[26] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[25] = __byte_perm ( 0, w[ 0], selector);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = __byte_perm (w[36], w[37], selector);
|
|
w[62] = __byte_perm (w[35], w[36], selector);
|
|
w[61] = __byte_perm (w[34], w[35], selector);
|
|
w[60] = __byte_perm (w[33], w[34], selector);
|
|
w[59] = __byte_perm (w[32], w[33], selector);
|
|
w[58] = __byte_perm (w[31], w[32], selector);
|
|
w[57] = __byte_perm (w[30], w[31], selector);
|
|
w[56] = __byte_perm (w[29], w[30], selector);
|
|
w[55] = __byte_perm (w[28], w[29], selector);
|
|
w[54] = __byte_perm (w[27], w[28], selector);
|
|
w[53] = __byte_perm (w[26], w[27], selector);
|
|
w[52] = __byte_perm (w[25], w[26], selector);
|
|
w[51] = __byte_perm (w[24], w[25], selector);
|
|
w[50] = __byte_perm (w[23], w[24], selector);
|
|
w[49] = __byte_perm (w[22], w[23], selector);
|
|
w[48] = __byte_perm (w[21], w[22], selector);
|
|
w[47] = __byte_perm (w[20], w[21], selector);
|
|
w[46] = __byte_perm (w[19], w[20], selector);
|
|
w[45] = __byte_perm (w[18], w[19], selector);
|
|
w[44] = __byte_perm (w[17], w[18], selector);
|
|
w[43] = __byte_perm (w[16], w[17], selector);
|
|
w[42] = __byte_perm (w[15], w[16], selector);
|
|
w[41] = __byte_perm (w[14], w[15], selector);
|
|
w[40] = __byte_perm (w[13], w[14], selector);
|
|
w[39] = __byte_perm (w[12], w[13], selector);
|
|
w[38] = __byte_perm (w[11], w[12], selector);
|
|
w[37] = __byte_perm (w[10], w[11], selector);
|
|
w[36] = __byte_perm (w[ 9], w[10], selector);
|
|
w[35] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[34] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[33] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[32] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[31] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[30] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[29] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[28] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[27] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[26] = __byte_perm ( 0, w[ 0], selector);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = __byte_perm (w[35], w[36], selector);
|
|
w[62] = __byte_perm (w[34], w[35], selector);
|
|
w[61] = __byte_perm (w[33], w[34], selector);
|
|
w[60] = __byte_perm (w[32], w[33], selector);
|
|
w[59] = __byte_perm (w[31], w[32], selector);
|
|
w[58] = __byte_perm (w[30], w[31], selector);
|
|
w[57] = __byte_perm (w[29], w[30], selector);
|
|
w[56] = __byte_perm (w[28], w[29], selector);
|
|
w[55] = __byte_perm (w[27], w[28], selector);
|
|
w[54] = __byte_perm (w[26], w[27], selector);
|
|
w[53] = __byte_perm (w[25], w[26], selector);
|
|
w[52] = __byte_perm (w[24], w[25], selector);
|
|
w[51] = __byte_perm (w[23], w[24], selector);
|
|
w[50] = __byte_perm (w[22], w[23], selector);
|
|
w[49] = __byte_perm (w[21], w[22], selector);
|
|
w[48] = __byte_perm (w[20], w[21], selector);
|
|
w[47] = __byte_perm (w[19], w[20], selector);
|
|
w[46] = __byte_perm (w[18], w[19], selector);
|
|
w[45] = __byte_perm (w[17], w[18], selector);
|
|
w[44] = __byte_perm (w[16], w[17], selector);
|
|
w[43] = __byte_perm (w[15], w[16], selector);
|
|
w[42] = __byte_perm (w[14], w[15], selector);
|
|
w[41] = __byte_perm (w[13], w[14], selector);
|
|
w[40] = __byte_perm (w[12], w[13], selector);
|
|
w[39] = __byte_perm (w[11], w[12], selector);
|
|
w[38] = __byte_perm (w[10], w[11], selector);
|
|
w[37] = __byte_perm (w[ 9], w[10], selector);
|
|
w[36] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[35] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[34] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[33] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[32] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[31] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[30] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[29] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[28] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[27] = __byte_perm ( 0, w[ 0], selector);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = __byte_perm (w[34], w[35], selector);
|
|
w[62] = __byte_perm (w[33], w[34], selector);
|
|
w[61] = __byte_perm (w[32], w[33], selector);
|
|
w[60] = __byte_perm (w[31], w[32], selector);
|
|
w[59] = __byte_perm (w[30], w[31], selector);
|
|
w[58] = __byte_perm (w[29], w[30], selector);
|
|
w[57] = __byte_perm (w[28], w[29], selector);
|
|
w[56] = __byte_perm (w[27], w[28], selector);
|
|
w[55] = __byte_perm (w[26], w[27], selector);
|
|
w[54] = __byte_perm (w[25], w[26], selector);
|
|
w[53] = __byte_perm (w[24], w[25], selector);
|
|
w[52] = __byte_perm (w[23], w[24], selector);
|
|
w[51] = __byte_perm (w[22], w[23], selector);
|
|
w[50] = __byte_perm (w[21], w[22], selector);
|
|
w[49] = __byte_perm (w[20], w[21], selector);
|
|
w[48] = __byte_perm (w[19], w[20], selector);
|
|
w[47] = __byte_perm (w[18], w[19], selector);
|
|
w[46] = __byte_perm (w[17], w[18], selector);
|
|
w[45] = __byte_perm (w[16], w[17], selector);
|
|
w[44] = __byte_perm (w[15], w[16], selector);
|
|
w[43] = __byte_perm (w[14], w[15], selector);
|
|
w[42] = __byte_perm (w[13], w[14], selector);
|
|
w[41] = __byte_perm (w[12], w[13], selector);
|
|
w[40] = __byte_perm (w[11], w[12], selector);
|
|
w[39] = __byte_perm (w[10], w[11], selector);
|
|
w[38] = __byte_perm (w[ 9], w[10], selector);
|
|
w[37] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[36] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[35] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[34] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[33] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[32] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[31] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[30] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[29] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[28] = __byte_perm ( 0, w[ 0], selector);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = __byte_perm (w[33], w[34], selector);
|
|
w[62] = __byte_perm (w[32], w[33], selector);
|
|
w[61] = __byte_perm (w[31], w[32], selector);
|
|
w[60] = __byte_perm (w[30], w[31], selector);
|
|
w[59] = __byte_perm (w[29], w[30], selector);
|
|
w[58] = __byte_perm (w[28], w[29], selector);
|
|
w[57] = __byte_perm (w[27], w[28], selector);
|
|
w[56] = __byte_perm (w[26], w[27], selector);
|
|
w[55] = __byte_perm (w[25], w[26], selector);
|
|
w[54] = __byte_perm (w[24], w[25], selector);
|
|
w[53] = __byte_perm (w[23], w[24], selector);
|
|
w[52] = __byte_perm (w[22], w[23], selector);
|
|
w[51] = __byte_perm (w[21], w[22], selector);
|
|
w[50] = __byte_perm (w[20], w[21], selector);
|
|
w[49] = __byte_perm (w[19], w[20], selector);
|
|
w[48] = __byte_perm (w[18], w[19], selector);
|
|
w[47] = __byte_perm (w[17], w[18], selector);
|
|
w[46] = __byte_perm (w[16], w[17], selector);
|
|
w[45] = __byte_perm (w[15], w[16], selector);
|
|
w[44] = __byte_perm (w[14], w[15], selector);
|
|
w[43] = __byte_perm (w[13], w[14], selector);
|
|
w[42] = __byte_perm (w[12], w[13], selector);
|
|
w[41] = __byte_perm (w[11], w[12], selector);
|
|
w[40] = __byte_perm (w[10], w[11], selector);
|
|
w[39] = __byte_perm (w[ 9], w[10], selector);
|
|
w[38] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[37] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[36] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[35] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[34] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[33] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[32] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[31] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[30] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[29] = __byte_perm ( 0, w[ 0], selector);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = __byte_perm (w[32], w[33], selector);
|
|
w[62] = __byte_perm (w[31], w[32], selector);
|
|
w[61] = __byte_perm (w[30], w[31], selector);
|
|
w[60] = __byte_perm (w[29], w[30], selector);
|
|
w[59] = __byte_perm (w[28], w[29], selector);
|
|
w[58] = __byte_perm (w[27], w[28], selector);
|
|
w[57] = __byte_perm (w[26], w[27], selector);
|
|
w[56] = __byte_perm (w[25], w[26], selector);
|
|
w[55] = __byte_perm (w[24], w[25], selector);
|
|
w[54] = __byte_perm (w[23], w[24], selector);
|
|
w[53] = __byte_perm (w[22], w[23], selector);
|
|
w[52] = __byte_perm (w[21], w[22], selector);
|
|
w[51] = __byte_perm (w[20], w[21], selector);
|
|
w[50] = __byte_perm (w[19], w[20], selector);
|
|
w[49] = __byte_perm (w[18], w[19], selector);
|
|
w[48] = __byte_perm (w[17], w[18], selector);
|
|
w[47] = __byte_perm (w[16], w[17], selector);
|
|
w[46] = __byte_perm (w[15], w[16], selector);
|
|
w[45] = __byte_perm (w[14], w[15], selector);
|
|
w[44] = __byte_perm (w[13], w[14], selector);
|
|
w[43] = __byte_perm (w[12], w[13], selector);
|
|
w[42] = __byte_perm (w[11], w[12], selector);
|
|
w[41] = __byte_perm (w[10], w[11], selector);
|
|
w[40] = __byte_perm (w[ 9], w[10], selector);
|
|
w[39] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[38] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[37] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[36] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[35] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[34] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[33] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[32] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[31] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[30] = __byte_perm ( 0, w[ 0], selector);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = __byte_perm (w[31], w[32], selector);
|
|
w[62] = __byte_perm (w[30], w[31], selector);
|
|
w[61] = __byte_perm (w[29], w[30], selector);
|
|
w[60] = __byte_perm (w[28], w[29], selector);
|
|
w[59] = __byte_perm (w[27], w[28], selector);
|
|
w[58] = __byte_perm (w[26], w[27], selector);
|
|
w[57] = __byte_perm (w[25], w[26], selector);
|
|
w[56] = __byte_perm (w[24], w[25], selector);
|
|
w[55] = __byte_perm (w[23], w[24], selector);
|
|
w[54] = __byte_perm (w[22], w[23], selector);
|
|
w[53] = __byte_perm (w[21], w[22], selector);
|
|
w[52] = __byte_perm (w[20], w[21], selector);
|
|
w[51] = __byte_perm (w[19], w[20], selector);
|
|
w[50] = __byte_perm (w[18], w[19], selector);
|
|
w[49] = __byte_perm (w[17], w[18], selector);
|
|
w[48] = __byte_perm (w[16], w[17], selector);
|
|
w[47] = __byte_perm (w[15], w[16], selector);
|
|
w[46] = __byte_perm (w[14], w[15], selector);
|
|
w[45] = __byte_perm (w[13], w[14], selector);
|
|
w[44] = __byte_perm (w[12], w[13], selector);
|
|
w[43] = __byte_perm (w[11], w[12], selector);
|
|
w[42] = __byte_perm (w[10], w[11], selector);
|
|
w[41] = __byte_perm (w[ 9], w[10], selector);
|
|
w[40] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[39] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[38] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[37] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[36] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[35] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[34] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[33] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[32] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[31] = __byte_perm ( 0, w[ 0], selector);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = __byte_perm (w[30], w[31], selector);
|
|
w[62] = __byte_perm (w[29], w[30], selector);
|
|
w[61] = __byte_perm (w[28], w[29], selector);
|
|
w[60] = __byte_perm (w[27], w[28], selector);
|
|
w[59] = __byte_perm (w[26], w[27], selector);
|
|
w[58] = __byte_perm (w[25], w[26], selector);
|
|
w[57] = __byte_perm (w[24], w[25], selector);
|
|
w[56] = __byte_perm (w[23], w[24], selector);
|
|
w[55] = __byte_perm (w[22], w[23], selector);
|
|
w[54] = __byte_perm (w[21], w[22], selector);
|
|
w[53] = __byte_perm (w[20], w[21], selector);
|
|
w[52] = __byte_perm (w[19], w[20], selector);
|
|
w[51] = __byte_perm (w[18], w[19], selector);
|
|
w[50] = __byte_perm (w[17], w[18], selector);
|
|
w[49] = __byte_perm (w[16], w[17], selector);
|
|
w[48] = __byte_perm (w[15], w[16], selector);
|
|
w[47] = __byte_perm (w[14], w[15], selector);
|
|
w[46] = __byte_perm (w[13], w[14], selector);
|
|
w[45] = __byte_perm (w[12], w[13], selector);
|
|
w[44] = __byte_perm (w[11], w[12], selector);
|
|
w[43] = __byte_perm (w[10], w[11], selector);
|
|
w[42] = __byte_perm (w[ 9], w[10], selector);
|
|
w[41] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[40] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[39] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[38] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[37] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[36] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[35] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[34] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[33] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[32] = __byte_perm ( 0, w[ 0], selector);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = __byte_perm (w[29], w[30], selector);
|
|
w[62] = __byte_perm (w[28], w[29], selector);
|
|
w[61] = __byte_perm (w[27], w[28], selector);
|
|
w[60] = __byte_perm (w[26], w[27], selector);
|
|
w[59] = __byte_perm (w[25], w[26], selector);
|
|
w[58] = __byte_perm (w[24], w[25], selector);
|
|
w[57] = __byte_perm (w[23], w[24], selector);
|
|
w[56] = __byte_perm (w[22], w[23], selector);
|
|
w[55] = __byte_perm (w[21], w[22], selector);
|
|
w[54] = __byte_perm (w[20], w[21], selector);
|
|
w[53] = __byte_perm (w[19], w[20], selector);
|
|
w[52] = __byte_perm (w[18], w[19], selector);
|
|
w[51] = __byte_perm (w[17], w[18], selector);
|
|
w[50] = __byte_perm (w[16], w[17], selector);
|
|
w[49] = __byte_perm (w[15], w[16], selector);
|
|
w[48] = __byte_perm (w[14], w[15], selector);
|
|
w[47] = __byte_perm (w[13], w[14], selector);
|
|
w[46] = __byte_perm (w[12], w[13], selector);
|
|
w[45] = __byte_perm (w[11], w[12], selector);
|
|
w[44] = __byte_perm (w[10], w[11], selector);
|
|
w[43] = __byte_perm (w[ 9], w[10], selector);
|
|
w[42] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[41] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[40] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[39] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[38] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[37] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[36] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[35] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[34] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[33] = __byte_perm ( 0, w[ 0], selector);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = __byte_perm (w[28], w[29], selector);
|
|
w[62] = __byte_perm (w[27], w[28], selector);
|
|
w[61] = __byte_perm (w[26], w[27], selector);
|
|
w[60] = __byte_perm (w[25], w[26], selector);
|
|
w[59] = __byte_perm (w[24], w[25], selector);
|
|
w[58] = __byte_perm (w[23], w[24], selector);
|
|
w[57] = __byte_perm (w[22], w[23], selector);
|
|
w[56] = __byte_perm (w[21], w[22], selector);
|
|
w[55] = __byte_perm (w[20], w[21], selector);
|
|
w[54] = __byte_perm (w[19], w[20], selector);
|
|
w[53] = __byte_perm (w[18], w[19], selector);
|
|
w[52] = __byte_perm (w[17], w[18], selector);
|
|
w[51] = __byte_perm (w[16], w[17], selector);
|
|
w[50] = __byte_perm (w[15], w[16], selector);
|
|
w[49] = __byte_perm (w[14], w[15], selector);
|
|
w[48] = __byte_perm (w[13], w[14], selector);
|
|
w[47] = __byte_perm (w[12], w[13], selector);
|
|
w[46] = __byte_perm (w[11], w[12], selector);
|
|
w[45] = __byte_perm (w[10], w[11], selector);
|
|
w[44] = __byte_perm (w[ 9], w[10], selector);
|
|
w[43] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[42] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[41] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[40] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[39] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[38] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[37] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[36] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[35] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[34] = __byte_perm ( 0, w[ 0], selector);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = __byte_perm (w[27], w[28], selector);
|
|
w[62] = __byte_perm (w[26], w[27], selector);
|
|
w[61] = __byte_perm (w[25], w[26], selector);
|
|
w[60] = __byte_perm (w[24], w[25], selector);
|
|
w[59] = __byte_perm (w[23], w[24], selector);
|
|
w[58] = __byte_perm (w[22], w[23], selector);
|
|
w[57] = __byte_perm (w[21], w[22], selector);
|
|
w[56] = __byte_perm (w[20], w[21], selector);
|
|
w[55] = __byte_perm (w[19], w[20], selector);
|
|
w[54] = __byte_perm (w[18], w[19], selector);
|
|
w[53] = __byte_perm (w[17], w[18], selector);
|
|
w[52] = __byte_perm (w[16], w[17], selector);
|
|
w[51] = __byte_perm (w[15], w[16], selector);
|
|
w[50] = __byte_perm (w[14], w[15], selector);
|
|
w[49] = __byte_perm (w[13], w[14], selector);
|
|
w[48] = __byte_perm (w[12], w[13], selector);
|
|
w[47] = __byte_perm (w[11], w[12], selector);
|
|
w[46] = __byte_perm (w[10], w[11], selector);
|
|
w[45] = __byte_perm (w[ 9], w[10], selector);
|
|
w[44] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[43] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[42] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[41] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[40] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[39] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[38] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[37] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[36] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[35] = __byte_perm ( 0, w[ 0], selector);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = __byte_perm (w[26], w[27], selector);
|
|
w[62] = __byte_perm (w[25], w[26], selector);
|
|
w[61] = __byte_perm (w[24], w[25], selector);
|
|
w[60] = __byte_perm (w[23], w[24], selector);
|
|
w[59] = __byte_perm (w[22], w[23], selector);
|
|
w[58] = __byte_perm (w[21], w[22], selector);
|
|
w[57] = __byte_perm (w[20], w[21], selector);
|
|
w[56] = __byte_perm (w[19], w[20], selector);
|
|
w[55] = __byte_perm (w[18], w[19], selector);
|
|
w[54] = __byte_perm (w[17], w[18], selector);
|
|
w[53] = __byte_perm (w[16], w[17], selector);
|
|
w[52] = __byte_perm (w[15], w[16], selector);
|
|
w[51] = __byte_perm (w[14], w[15], selector);
|
|
w[50] = __byte_perm (w[13], w[14], selector);
|
|
w[49] = __byte_perm (w[12], w[13], selector);
|
|
w[48] = __byte_perm (w[11], w[12], selector);
|
|
w[47] = __byte_perm (w[10], w[11], selector);
|
|
w[46] = __byte_perm (w[ 9], w[10], selector);
|
|
w[45] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[44] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[43] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[42] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[41] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[40] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[39] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[38] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[37] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[36] = __byte_perm ( 0, w[ 0], selector);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = __byte_perm (w[25], w[26], selector);
|
|
w[62] = __byte_perm (w[24], w[25], selector);
|
|
w[61] = __byte_perm (w[23], w[24], selector);
|
|
w[60] = __byte_perm (w[22], w[23], selector);
|
|
w[59] = __byte_perm (w[21], w[22], selector);
|
|
w[58] = __byte_perm (w[20], w[21], selector);
|
|
w[57] = __byte_perm (w[19], w[20], selector);
|
|
w[56] = __byte_perm (w[18], w[19], selector);
|
|
w[55] = __byte_perm (w[17], w[18], selector);
|
|
w[54] = __byte_perm (w[16], w[17], selector);
|
|
w[53] = __byte_perm (w[15], w[16], selector);
|
|
w[52] = __byte_perm (w[14], w[15], selector);
|
|
w[51] = __byte_perm (w[13], w[14], selector);
|
|
w[50] = __byte_perm (w[12], w[13], selector);
|
|
w[49] = __byte_perm (w[11], w[12], selector);
|
|
w[48] = __byte_perm (w[10], w[11], selector);
|
|
w[47] = __byte_perm (w[ 9], w[10], selector);
|
|
w[46] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[45] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[44] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[43] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[42] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[41] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[40] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[39] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[38] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[37] = __byte_perm ( 0, w[ 0], selector);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = __byte_perm (w[24], w[25], selector);
|
|
w[62] = __byte_perm (w[23], w[24], selector);
|
|
w[61] = __byte_perm (w[22], w[23], selector);
|
|
w[60] = __byte_perm (w[21], w[22], selector);
|
|
w[59] = __byte_perm (w[20], w[21], selector);
|
|
w[58] = __byte_perm (w[19], w[20], selector);
|
|
w[57] = __byte_perm (w[18], w[19], selector);
|
|
w[56] = __byte_perm (w[17], w[18], selector);
|
|
w[55] = __byte_perm (w[16], w[17], selector);
|
|
w[54] = __byte_perm (w[15], w[16], selector);
|
|
w[53] = __byte_perm (w[14], w[15], selector);
|
|
w[52] = __byte_perm (w[13], w[14], selector);
|
|
w[51] = __byte_perm (w[12], w[13], selector);
|
|
w[50] = __byte_perm (w[11], w[12], selector);
|
|
w[49] = __byte_perm (w[10], w[11], selector);
|
|
w[48] = __byte_perm (w[ 9], w[10], selector);
|
|
w[47] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[46] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[45] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[44] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[43] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[42] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[41] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[40] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[39] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[38] = __byte_perm ( 0, w[ 0], selector);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = __byte_perm (w[23], w[24], selector);
|
|
w[62] = __byte_perm (w[22], w[23], selector);
|
|
w[61] = __byte_perm (w[21], w[22], selector);
|
|
w[60] = __byte_perm (w[20], w[21], selector);
|
|
w[59] = __byte_perm (w[19], w[20], selector);
|
|
w[58] = __byte_perm (w[18], w[19], selector);
|
|
w[57] = __byte_perm (w[17], w[18], selector);
|
|
w[56] = __byte_perm (w[16], w[17], selector);
|
|
w[55] = __byte_perm (w[15], w[16], selector);
|
|
w[54] = __byte_perm (w[14], w[15], selector);
|
|
w[53] = __byte_perm (w[13], w[14], selector);
|
|
w[52] = __byte_perm (w[12], w[13], selector);
|
|
w[51] = __byte_perm (w[11], w[12], selector);
|
|
w[50] = __byte_perm (w[10], w[11], selector);
|
|
w[49] = __byte_perm (w[ 9], w[10], selector);
|
|
w[48] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[47] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[46] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[45] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[44] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[43] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[42] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[41] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[40] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[39] = __byte_perm ( 0, w[ 0], selector);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = __byte_perm (w[22], w[23], selector);
|
|
w[62] = __byte_perm (w[21], w[22], selector);
|
|
w[61] = __byte_perm (w[20], w[21], selector);
|
|
w[60] = __byte_perm (w[19], w[20], selector);
|
|
w[59] = __byte_perm (w[18], w[19], selector);
|
|
w[58] = __byte_perm (w[17], w[18], selector);
|
|
w[57] = __byte_perm (w[16], w[17], selector);
|
|
w[56] = __byte_perm (w[15], w[16], selector);
|
|
w[55] = __byte_perm (w[14], w[15], selector);
|
|
w[54] = __byte_perm (w[13], w[14], selector);
|
|
w[53] = __byte_perm (w[12], w[13], selector);
|
|
w[52] = __byte_perm (w[11], w[12], selector);
|
|
w[51] = __byte_perm (w[10], w[11], selector);
|
|
w[50] = __byte_perm (w[ 9], w[10], selector);
|
|
w[49] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[48] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[47] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[46] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[45] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[44] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[43] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[42] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[41] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[40] = __byte_perm ( 0, w[ 0], selector);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = __byte_perm (w[21], w[22], selector);
|
|
w[62] = __byte_perm (w[20], w[21], selector);
|
|
w[61] = __byte_perm (w[19], w[20], selector);
|
|
w[60] = __byte_perm (w[18], w[19], selector);
|
|
w[59] = __byte_perm (w[17], w[18], selector);
|
|
w[58] = __byte_perm (w[16], w[17], selector);
|
|
w[57] = __byte_perm (w[15], w[16], selector);
|
|
w[56] = __byte_perm (w[14], w[15], selector);
|
|
w[55] = __byte_perm (w[13], w[14], selector);
|
|
w[54] = __byte_perm (w[12], w[13], selector);
|
|
w[53] = __byte_perm (w[11], w[12], selector);
|
|
w[52] = __byte_perm (w[10], w[11], selector);
|
|
w[51] = __byte_perm (w[ 9], w[10], selector);
|
|
w[50] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[49] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[48] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[47] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[46] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[45] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[44] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[43] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[42] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[41] = __byte_perm ( 0, w[ 0], selector);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = __byte_perm (w[20], w[21], selector);
|
|
w[62] = __byte_perm (w[19], w[20], selector);
|
|
w[61] = __byte_perm (w[18], w[19], selector);
|
|
w[60] = __byte_perm (w[17], w[18], selector);
|
|
w[59] = __byte_perm (w[16], w[17], selector);
|
|
w[58] = __byte_perm (w[15], w[16], selector);
|
|
w[57] = __byte_perm (w[14], w[15], selector);
|
|
w[56] = __byte_perm (w[13], w[14], selector);
|
|
w[55] = __byte_perm (w[12], w[13], selector);
|
|
w[54] = __byte_perm (w[11], w[12], selector);
|
|
w[53] = __byte_perm (w[10], w[11], selector);
|
|
w[52] = __byte_perm (w[ 9], w[10], selector);
|
|
w[51] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[50] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[49] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[48] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[47] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[46] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[45] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[44] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[43] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[42] = __byte_perm ( 0, w[ 0], selector);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = __byte_perm (w[19], w[20], selector);
|
|
w[62] = __byte_perm (w[18], w[19], selector);
|
|
w[61] = __byte_perm (w[17], w[18], selector);
|
|
w[60] = __byte_perm (w[16], w[17], selector);
|
|
w[59] = __byte_perm (w[15], w[16], selector);
|
|
w[58] = __byte_perm (w[14], w[15], selector);
|
|
w[57] = __byte_perm (w[13], w[14], selector);
|
|
w[56] = __byte_perm (w[12], w[13], selector);
|
|
w[55] = __byte_perm (w[11], w[12], selector);
|
|
w[54] = __byte_perm (w[10], w[11], selector);
|
|
w[53] = __byte_perm (w[ 9], w[10], selector);
|
|
w[52] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[51] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[50] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[49] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[48] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[47] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[46] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[45] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[44] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[43] = __byte_perm ( 0, w[ 0], selector);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = __byte_perm (w[18], w[19], selector);
|
|
w[62] = __byte_perm (w[17], w[18], selector);
|
|
w[61] = __byte_perm (w[16], w[17], selector);
|
|
w[60] = __byte_perm (w[15], w[16], selector);
|
|
w[59] = __byte_perm (w[14], w[15], selector);
|
|
w[58] = __byte_perm (w[13], w[14], selector);
|
|
w[57] = __byte_perm (w[12], w[13], selector);
|
|
w[56] = __byte_perm (w[11], w[12], selector);
|
|
w[55] = __byte_perm (w[10], w[11], selector);
|
|
w[54] = __byte_perm (w[ 9], w[10], selector);
|
|
w[53] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[52] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[51] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[50] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[49] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[48] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[47] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[46] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[45] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[44] = __byte_perm ( 0, w[ 0], selector);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = __byte_perm (w[17], w[18], selector);
|
|
w[62] = __byte_perm (w[16], w[17], selector);
|
|
w[61] = __byte_perm (w[15], w[16], selector);
|
|
w[60] = __byte_perm (w[14], w[15], selector);
|
|
w[59] = __byte_perm (w[13], w[14], selector);
|
|
w[58] = __byte_perm (w[12], w[13], selector);
|
|
w[57] = __byte_perm (w[11], w[12], selector);
|
|
w[56] = __byte_perm (w[10], w[11], selector);
|
|
w[55] = __byte_perm (w[ 9], w[10], selector);
|
|
w[54] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[53] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[52] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[51] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[50] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[49] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[48] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[47] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[46] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[45] = __byte_perm ( 0, w[ 0], selector);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = __byte_perm (w[16], w[17], selector);
|
|
w[62] = __byte_perm (w[15], w[16], selector);
|
|
w[61] = __byte_perm (w[14], w[15], selector);
|
|
w[60] = __byte_perm (w[13], w[14], selector);
|
|
w[59] = __byte_perm (w[12], w[13], selector);
|
|
w[58] = __byte_perm (w[11], w[12], selector);
|
|
w[57] = __byte_perm (w[10], w[11], selector);
|
|
w[56] = __byte_perm (w[ 9], w[10], selector);
|
|
w[55] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[54] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[53] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[52] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[51] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[50] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[49] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[48] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[47] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[46] = __byte_perm ( 0, w[ 0], selector);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = __byte_perm (w[15], w[16], selector);
|
|
w[62] = __byte_perm (w[14], w[15], selector);
|
|
w[61] = __byte_perm (w[13], w[14], selector);
|
|
w[60] = __byte_perm (w[12], w[13], selector);
|
|
w[59] = __byte_perm (w[11], w[12], selector);
|
|
w[58] = __byte_perm (w[10], w[11], selector);
|
|
w[57] = __byte_perm (w[ 9], w[10], selector);
|
|
w[56] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[55] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[54] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[53] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[52] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[51] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[50] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[49] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[48] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[47] = __byte_perm ( 0, w[ 0], selector);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = __byte_perm (w[14], w[15], selector);
|
|
w[62] = __byte_perm (w[13], w[14], selector);
|
|
w[61] = __byte_perm (w[12], w[13], selector);
|
|
w[60] = __byte_perm (w[11], w[12], selector);
|
|
w[59] = __byte_perm (w[10], w[11], selector);
|
|
w[58] = __byte_perm (w[ 9], w[10], selector);
|
|
w[57] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[56] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[55] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[54] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[53] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[52] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[51] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[50] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[49] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[48] = __byte_perm ( 0, w[ 0], selector);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = __byte_perm (w[13], w[14], selector);
|
|
w[62] = __byte_perm (w[12], w[13], selector);
|
|
w[61] = __byte_perm (w[11], w[12], selector);
|
|
w[60] = __byte_perm (w[10], w[11], selector);
|
|
w[59] = __byte_perm (w[ 9], w[10], selector);
|
|
w[58] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[57] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[56] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[55] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[54] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[53] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[52] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[51] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[50] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[49] = __byte_perm ( 0, w[ 0], selector);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = __byte_perm (w[12], w[13], selector);
|
|
w[62] = __byte_perm (w[11], w[12], selector);
|
|
w[61] = __byte_perm (w[10], w[11], selector);
|
|
w[60] = __byte_perm (w[ 9], w[10], selector);
|
|
w[59] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[58] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[57] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[56] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[55] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[54] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[53] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[52] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[51] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[50] = __byte_perm ( 0, w[ 0], selector);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = __byte_perm (w[11], w[12], selector);
|
|
w[62] = __byte_perm (w[10], w[11], selector);
|
|
w[61] = __byte_perm (w[ 9], w[10], selector);
|
|
w[60] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[59] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[58] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[57] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[56] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[55] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[54] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[53] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[52] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[51] = __byte_perm ( 0, w[ 0], selector);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = __byte_perm (w[10], w[11], selector);
|
|
w[62] = __byte_perm (w[ 9], w[10], selector);
|
|
w[61] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[60] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[59] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[58] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[57] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[56] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[55] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[54] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[53] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[52] = __byte_perm ( 0, w[ 0], selector);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = __byte_perm (w[ 9], w[10], selector);
|
|
w[62] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[61] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[60] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[59] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[58] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[57] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[56] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[55] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[54] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[53] = __byte_perm ( 0, w[ 0], selector);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = __byte_perm (w[ 8], w[ 9], selector);
|
|
w[62] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[61] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[60] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[59] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[58] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[57] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[56] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[55] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[54] = __byte_perm ( 0, w[ 0], selector);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = __byte_perm (w[ 7], w[ 8], selector);
|
|
w[62] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[61] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[60] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[59] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[58] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[57] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[56] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[55] = __byte_perm ( 0, w[ 0], selector);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = __byte_perm (w[ 6], w[ 7], selector);
|
|
w[62] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[61] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[60] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[59] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[58] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[57] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[56] = __byte_perm ( 0, w[ 0], selector);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = __byte_perm (w[ 5], w[ 6], selector);
|
|
w[62] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[61] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[60] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[59] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[58] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[57] = __byte_perm ( 0, w[ 0], selector);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = __byte_perm (w[ 4], w[ 5], selector);
|
|
w[62] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[61] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[60] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[59] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[58] = __byte_perm ( 0, w[ 0], selector);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = __byte_perm (w[ 3], w[ 4], selector);
|
|
w[62] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[61] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[60] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[59] = __byte_perm ( 0, w[ 0], selector);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = __byte_perm (w[ 2], w[ 3], selector);
|
|
w[62] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[61] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[60] = __byte_perm ( 0, w[ 0], selector);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = __byte_perm (w[ 1], w[ 2], selector);
|
|
w[62] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[61] = __byte_perm ( 0, w[ 0], selector);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = __byte_perm (w[ 0], w[ 1], selector);
|
|
w[62] = __byte_perm ( 0, w[ 0], selector);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = __byte_perm ( 0, w[ 0], selector);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_1x64_be (u32x w[64], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = amd_bytealign (w[62], w[63], offset);
|
|
w[62] = amd_bytealign (w[61], w[62], offset);
|
|
w[61] = amd_bytealign (w[60], w[61], offset);
|
|
w[60] = amd_bytealign (w[59], w[60], offset);
|
|
w[59] = amd_bytealign (w[58], w[59], offset);
|
|
w[58] = amd_bytealign (w[57], w[58], offset);
|
|
w[57] = amd_bytealign (w[56], w[57], offset);
|
|
w[56] = amd_bytealign (w[55], w[56], offset);
|
|
w[55] = amd_bytealign (w[54], w[55], offset);
|
|
w[54] = amd_bytealign (w[53], w[54], offset);
|
|
w[53] = amd_bytealign (w[52], w[53], offset);
|
|
w[52] = amd_bytealign (w[51], w[52], offset);
|
|
w[51] = amd_bytealign (w[50], w[51], offset);
|
|
w[50] = amd_bytealign (w[49], w[50], offset);
|
|
w[49] = amd_bytealign (w[48], w[49], offset);
|
|
w[48] = amd_bytealign (w[47], w[48], offset);
|
|
w[47] = amd_bytealign (w[46], w[47], offset);
|
|
w[46] = amd_bytealign (w[45], w[46], offset);
|
|
w[45] = amd_bytealign (w[44], w[45], offset);
|
|
w[44] = amd_bytealign (w[43], w[44], offset);
|
|
w[43] = amd_bytealign (w[42], w[43], offset);
|
|
w[42] = amd_bytealign (w[41], w[42], offset);
|
|
w[41] = amd_bytealign (w[40], w[41], offset);
|
|
w[40] = amd_bytealign (w[39], w[40], offset);
|
|
w[39] = amd_bytealign (w[38], w[39], offset);
|
|
w[38] = amd_bytealign (w[37], w[38], offset);
|
|
w[37] = amd_bytealign (w[36], w[37], offset);
|
|
w[36] = amd_bytealign (w[35], w[36], offset);
|
|
w[35] = amd_bytealign (w[34], w[35], offset);
|
|
w[34] = amd_bytealign (w[33], w[34], offset);
|
|
w[33] = amd_bytealign (w[32], w[33], offset);
|
|
w[32] = amd_bytealign (w[31], w[32], offset);
|
|
w[31] = amd_bytealign (w[30], w[31], offset);
|
|
w[30] = amd_bytealign (w[29], w[30], offset);
|
|
w[29] = amd_bytealign (w[28], w[29], offset);
|
|
w[28] = amd_bytealign (w[27], w[28], offset);
|
|
w[27] = amd_bytealign (w[26], w[27], offset);
|
|
w[26] = amd_bytealign (w[25], w[26], offset);
|
|
w[25] = amd_bytealign (w[24], w[25], offset);
|
|
w[24] = amd_bytealign (w[23], w[24], offset);
|
|
w[23] = amd_bytealign (w[22], w[23], offset);
|
|
w[22] = amd_bytealign (w[21], w[22], offset);
|
|
w[21] = amd_bytealign (w[20], w[21], offset);
|
|
w[20] = amd_bytealign (w[19], w[20], offset);
|
|
w[19] = amd_bytealign (w[18], w[19], offset);
|
|
w[18] = amd_bytealign (w[17], w[18], offset);
|
|
w[17] = amd_bytealign (w[16], w[17], offset);
|
|
w[16] = amd_bytealign (w[15], w[16], offset);
|
|
w[15] = amd_bytealign (w[14], w[15], offset);
|
|
w[14] = amd_bytealign (w[13], w[14], offset);
|
|
w[13] = amd_bytealign (w[12], w[13], offset);
|
|
w[12] = amd_bytealign (w[11], w[12], offset);
|
|
w[11] = amd_bytealign (w[10], w[11], offset);
|
|
w[10] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[ 9] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[ 8] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 7] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 6] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 5] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 4] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 3] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 2] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 1] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 0] = amd_bytealign ( 0, w[ 0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = amd_bytealign (w[61], w[62], offset);
|
|
w[62] = amd_bytealign (w[60], w[61], offset);
|
|
w[61] = amd_bytealign (w[59], w[60], offset);
|
|
w[60] = amd_bytealign (w[58], w[59], offset);
|
|
w[59] = amd_bytealign (w[57], w[58], offset);
|
|
w[58] = amd_bytealign (w[56], w[57], offset);
|
|
w[57] = amd_bytealign (w[55], w[56], offset);
|
|
w[56] = amd_bytealign (w[54], w[55], offset);
|
|
w[55] = amd_bytealign (w[53], w[54], offset);
|
|
w[54] = amd_bytealign (w[52], w[53], offset);
|
|
w[53] = amd_bytealign (w[51], w[52], offset);
|
|
w[52] = amd_bytealign (w[50], w[51], offset);
|
|
w[51] = amd_bytealign (w[49], w[50], offset);
|
|
w[50] = amd_bytealign (w[48], w[49], offset);
|
|
w[49] = amd_bytealign (w[47], w[48], offset);
|
|
w[48] = amd_bytealign (w[46], w[47], offset);
|
|
w[47] = amd_bytealign (w[45], w[46], offset);
|
|
w[46] = amd_bytealign (w[44], w[45], offset);
|
|
w[45] = amd_bytealign (w[43], w[44], offset);
|
|
w[44] = amd_bytealign (w[42], w[43], offset);
|
|
w[43] = amd_bytealign (w[41], w[42], offset);
|
|
w[42] = amd_bytealign (w[40], w[41], offset);
|
|
w[41] = amd_bytealign (w[39], w[40], offset);
|
|
w[40] = amd_bytealign (w[38], w[39], offset);
|
|
w[39] = amd_bytealign (w[37], w[38], offset);
|
|
w[38] = amd_bytealign (w[36], w[37], offset);
|
|
w[37] = amd_bytealign (w[35], w[36], offset);
|
|
w[36] = amd_bytealign (w[34], w[35], offset);
|
|
w[35] = amd_bytealign (w[33], w[34], offset);
|
|
w[34] = amd_bytealign (w[32], w[33], offset);
|
|
w[33] = amd_bytealign (w[31], w[32], offset);
|
|
w[32] = amd_bytealign (w[30], w[31], offset);
|
|
w[31] = amd_bytealign (w[29], w[30], offset);
|
|
w[30] = amd_bytealign (w[28], w[29], offset);
|
|
w[29] = amd_bytealign (w[27], w[28], offset);
|
|
w[28] = amd_bytealign (w[26], w[27], offset);
|
|
w[27] = amd_bytealign (w[25], w[26], offset);
|
|
w[26] = amd_bytealign (w[24], w[25], offset);
|
|
w[25] = amd_bytealign (w[23], w[24], offset);
|
|
w[24] = amd_bytealign (w[22], w[23], offset);
|
|
w[23] = amd_bytealign (w[21], w[22], offset);
|
|
w[22] = amd_bytealign (w[20], w[21], offset);
|
|
w[21] = amd_bytealign (w[19], w[20], offset);
|
|
w[20] = amd_bytealign (w[18], w[19], offset);
|
|
w[19] = amd_bytealign (w[17], w[18], offset);
|
|
w[18] = amd_bytealign (w[16], w[17], offset);
|
|
w[17] = amd_bytealign (w[15], w[16], offset);
|
|
w[16] = amd_bytealign (w[14], w[15], offset);
|
|
w[15] = amd_bytealign (w[13], w[14], offset);
|
|
w[14] = amd_bytealign (w[12], w[13], offset);
|
|
w[13] = amd_bytealign (w[11], w[12], offset);
|
|
w[12] = amd_bytealign (w[10], w[11], offset);
|
|
w[11] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[10] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[ 9] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 8] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 7] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 6] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 5] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 4] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 3] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 2] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 1] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = amd_bytealign (w[60], w[61], offset);
|
|
w[62] = amd_bytealign (w[59], w[60], offset);
|
|
w[61] = amd_bytealign (w[58], w[59], offset);
|
|
w[60] = amd_bytealign (w[57], w[58], offset);
|
|
w[59] = amd_bytealign (w[56], w[57], offset);
|
|
w[58] = amd_bytealign (w[55], w[56], offset);
|
|
w[57] = amd_bytealign (w[54], w[55], offset);
|
|
w[56] = amd_bytealign (w[53], w[54], offset);
|
|
w[55] = amd_bytealign (w[52], w[53], offset);
|
|
w[54] = amd_bytealign (w[51], w[52], offset);
|
|
w[53] = amd_bytealign (w[50], w[51], offset);
|
|
w[52] = amd_bytealign (w[49], w[50], offset);
|
|
w[51] = amd_bytealign (w[48], w[49], offset);
|
|
w[50] = amd_bytealign (w[47], w[48], offset);
|
|
w[49] = amd_bytealign (w[46], w[47], offset);
|
|
w[48] = amd_bytealign (w[45], w[46], offset);
|
|
w[47] = amd_bytealign (w[44], w[45], offset);
|
|
w[46] = amd_bytealign (w[43], w[44], offset);
|
|
w[45] = amd_bytealign (w[42], w[43], offset);
|
|
w[44] = amd_bytealign (w[41], w[42], offset);
|
|
w[43] = amd_bytealign (w[40], w[41], offset);
|
|
w[42] = amd_bytealign (w[39], w[40], offset);
|
|
w[41] = amd_bytealign (w[38], w[39], offset);
|
|
w[40] = amd_bytealign (w[37], w[38], offset);
|
|
w[39] = amd_bytealign (w[36], w[37], offset);
|
|
w[38] = amd_bytealign (w[35], w[36], offset);
|
|
w[37] = amd_bytealign (w[34], w[35], offset);
|
|
w[36] = amd_bytealign (w[33], w[34], offset);
|
|
w[35] = amd_bytealign (w[32], w[33], offset);
|
|
w[34] = amd_bytealign (w[31], w[32], offset);
|
|
w[33] = amd_bytealign (w[30], w[31], offset);
|
|
w[32] = amd_bytealign (w[29], w[30], offset);
|
|
w[31] = amd_bytealign (w[28], w[29], offset);
|
|
w[30] = amd_bytealign (w[27], w[28], offset);
|
|
w[29] = amd_bytealign (w[26], w[27], offset);
|
|
w[28] = amd_bytealign (w[25], w[26], offset);
|
|
w[27] = amd_bytealign (w[24], w[25], offset);
|
|
w[26] = amd_bytealign (w[23], w[24], offset);
|
|
w[25] = amd_bytealign (w[22], w[23], offset);
|
|
w[24] = amd_bytealign (w[21], w[22], offset);
|
|
w[23] = amd_bytealign (w[20], w[21], offset);
|
|
w[22] = amd_bytealign (w[19], w[20], offset);
|
|
w[21] = amd_bytealign (w[18], w[19], offset);
|
|
w[20] = amd_bytealign (w[17], w[18], offset);
|
|
w[19] = amd_bytealign (w[16], w[17], offset);
|
|
w[18] = amd_bytealign (w[15], w[16], offset);
|
|
w[17] = amd_bytealign (w[14], w[15], offset);
|
|
w[16] = amd_bytealign (w[13], w[14], offset);
|
|
w[15] = amd_bytealign (w[12], w[13], offset);
|
|
w[14] = amd_bytealign (w[11], w[12], offset);
|
|
w[13] = amd_bytealign (w[10], w[11], offset);
|
|
w[12] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[11] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[10] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[ 9] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 8] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 7] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 6] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 5] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 4] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 3] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 2] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = amd_bytealign (w[59], w[60], offset);
|
|
w[62] = amd_bytealign (w[58], w[59], offset);
|
|
w[61] = amd_bytealign (w[57], w[58], offset);
|
|
w[60] = amd_bytealign (w[56], w[57], offset);
|
|
w[59] = amd_bytealign (w[55], w[56], offset);
|
|
w[58] = amd_bytealign (w[54], w[55], offset);
|
|
w[57] = amd_bytealign (w[53], w[54], offset);
|
|
w[56] = amd_bytealign (w[52], w[53], offset);
|
|
w[55] = amd_bytealign (w[51], w[52], offset);
|
|
w[54] = amd_bytealign (w[50], w[51], offset);
|
|
w[53] = amd_bytealign (w[49], w[50], offset);
|
|
w[52] = amd_bytealign (w[48], w[49], offset);
|
|
w[51] = amd_bytealign (w[47], w[48], offset);
|
|
w[50] = amd_bytealign (w[46], w[47], offset);
|
|
w[49] = amd_bytealign (w[45], w[46], offset);
|
|
w[48] = amd_bytealign (w[44], w[45], offset);
|
|
w[47] = amd_bytealign (w[43], w[44], offset);
|
|
w[46] = amd_bytealign (w[42], w[43], offset);
|
|
w[45] = amd_bytealign (w[41], w[42], offset);
|
|
w[44] = amd_bytealign (w[40], w[41], offset);
|
|
w[43] = amd_bytealign (w[39], w[40], offset);
|
|
w[42] = amd_bytealign (w[38], w[39], offset);
|
|
w[41] = amd_bytealign (w[37], w[38], offset);
|
|
w[40] = amd_bytealign (w[36], w[37], offset);
|
|
w[39] = amd_bytealign (w[35], w[36], offset);
|
|
w[38] = amd_bytealign (w[34], w[35], offset);
|
|
w[37] = amd_bytealign (w[33], w[34], offset);
|
|
w[36] = amd_bytealign (w[32], w[33], offset);
|
|
w[35] = amd_bytealign (w[31], w[32], offset);
|
|
w[34] = amd_bytealign (w[30], w[31], offset);
|
|
w[33] = amd_bytealign (w[29], w[30], offset);
|
|
w[32] = amd_bytealign (w[28], w[29], offset);
|
|
w[31] = amd_bytealign (w[27], w[28], offset);
|
|
w[30] = amd_bytealign (w[26], w[27], offset);
|
|
w[29] = amd_bytealign (w[25], w[26], offset);
|
|
w[28] = amd_bytealign (w[24], w[25], offset);
|
|
w[27] = amd_bytealign (w[23], w[24], offset);
|
|
w[26] = amd_bytealign (w[22], w[23], offset);
|
|
w[25] = amd_bytealign (w[21], w[22], offset);
|
|
w[24] = amd_bytealign (w[20], w[21], offset);
|
|
w[23] = amd_bytealign (w[19], w[20], offset);
|
|
w[22] = amd_bytealign (w[18], w[19], offset);
|
|
w[21] = amd_bytealign (w[17], w[18], offset);
|
|
w[20] = amd_bytealign (w[16], w[17], offset);
|
|
w[19] = amd_bytealign (w[15], w[16], offset);
|
|
w[18] = amd_bytealign (w[14], w[15], offset);
|
|
w[17] = amd_bytealign (w[13], w[14], offset);
|
|
w[16] = amd_bytealign (w[12], w[13], offset);
|
|
w[15] = amd_bytealign (w[11], w[12], offset);
|
|
w[14] = amd_bytealign (w[10], w[11], offset);
|
|
w[13] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[12] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[11] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[10] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[ 9] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 8] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 7] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 6] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 5] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 4] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 3] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = amd_bytealign (w[58], w[59], offset);
|
|
w[62] = amd_bytealign (w[57], w[58], offset);
|
|
w[61] = amd_bytealign (w[56], w[57], offset);
|
|
w[60] = amd_bytealign (w[55], w[56], offset);
|
|
w[59] = amd_bytealign (w[54], w[55], offset);
|
|
w[58] = amd_bytealign (w[53], w[54], offset);
|
|
w[57] = amd_bytealign (w[52], w[53], offset);
|
|
w[56] = amd_bytealign (w[51], w[52], offset);
|
|
w[55] = amd_bytealign (w[50], w[51], offset);
|
|
w[54] = amd_bytealign (w[49], w[50], offset);
|
|
w[53] = amd_bytealign (w[48], w[49], offset);
|
|
w[52] = amd_bytealign (w[47], w[48], offset);
|
|
w[51] = amd_bytealign (w[46], w[47], offset);
|
|
w[50] = amd_bytealign (w[45], w[46], offset);
|
|
w[49] = amd_bytealign (w[44], w[45], offset);
|
|
w[48] = amd_bytealign (w[43], w[44], offset);
|
|
w[47] = amd_bytealign (w[42], w[43], offset);
|
|
w[46] = amd_bytealign (w[41], w[42], offset);
|
|
w[45] = amd_bytealign (w[40], w[41], offset);
|
|
w[44] = amd_bytealign (w[39], w[40], offset);
|
|
w[43] = amd_bytealign (w[38], w[39], offset);
|
|
w[42] = amd_bytealign (w[37], w[38], offset);
|
|
w[41] = amd_bytealign (w[36], w[37], offset);
|
|
w[40] = amd_bytealign (w[35], w[36], offset);
|
|
w[39] = amd_bytealign (w[34], w[35], offset);
|
|
w[38] = amd_bytealign (w[33], w[34], offset);
|
|
w[37] = amd_bytealign (w[32], w[33], offset);
|
|
w[36] = amd_bytealign (w[31], w[32], offset);
|
|
w[35] = amd_bytealign (w[30], w[31], offset);
|
|
w[34] = amd_bytealign (w[29], w[30], offset);
|
|
w[33] = amd_bytealign (w[28], w[29], offset);
|
|
w[32] = amd_bytealign (w[27], w[28], offset);
|
|
w[31] = amd_bytealign (w[26], w[27], offset);
|
|
w[30] = amd_bytealign (w[25], w[26], offset);
|
|
w[29] = amd_bytealign (w[24], w[25], offset);
|
|
w[28] = amd_bytealign (w[23], w[24], offset);
|
|
w[27] = amd_bytealign (w[22], w[23], offset);
|
|
w[26] = amd_bytealign (w[21], w[22], offset);
|
|
w[25] = amd_bytealign (w[20], w[21], offset);
|
|
w[24] = amd_bytealign (w[19], w[20], offset);
|
|
w[23] = amd_bytealign (w[18], w[19], offset);
|
|
w[22] = amd_bytealign (w[17], w[18], offset);
|
|
w[21] = amd_bytealign (w[16], w[17], offset);
|
|
w[20] = amd_bytealign (w[15], w[16], offset);
|
|
w[19] = amd_bytealign (w[14], w[15], offset);
|
|
w[18] = amd_bytealign (w[13], w[14], offset);
|
|
w[17] = amd_bytealign (w[12], w[13], offset);
|
|
w[16] = amd_bytealign (w[11], w[12], offset);
|
|
w[15] = amd_bytealign (w[10], w[11], offset);
|
|
w[14] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[13] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[12] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[11] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[10] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[ 9] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 8] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 7] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 6] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 5] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 4] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = amd_bytealign (w[57], w[58], offset);
|
|
w[62] = amd_bytealign (w[56], w[57], offset);
|
|
w[61] = amd_bytealign (w[55], w[56], offset);
|
|
w[60] = amd_bytealign (w[54], w[55], offset);
|
|
w[59] = amd_bytealign (w[53], w[54], offset);
|
|
w[58] = amd_bytealign (w[52], w[53], offset);
|
|
w[57] = amd_bytealign (w[51], w[52], offset);
|
|
w[56] = amd_bytealign (w[50], w[51], offset);
|
|
w[55] = amd_bytealign (w[49], w[50], offset);
|
|
w[54] = amd_bytealign (w[48], w[49], offset);
|
|
w[53] = amd_bytealign (w[47], w[48], offset);
|
|
w[52] = amd_bytealign (w[46], w[47], offset);
|
|
w[51] = amd_bytealign (w[45], w[46], offset);
|
|
w[50] = amd_bytealign (w[44], w[45], offset);
|
|
w[49] = amd_bytealign (w[43], w[44], offset);
|
|
w[48] = amd_bytealign (w[42], w[43], offset);
|
|
w[47] = amd_bytealign (w[41], w[42], offset);
|
|
w[46] = amd_bytealign (w[40], w[41], offset);
|
|
w[45] = amd_bytealign (w[39], w[40], offset);
|
|
w[44] = amd_bytealign (w[38], w[39], offset);
|
|
w[43] = amd_bytealign (w[37], w[38], offset);
|
|
w[42] = amd_bytealign (w[36], w[37], offset);
|
|
w[41] = amd_bytealign (w[35], w[36], offset);
|
|
w[40] = amd_bytealign (w[34], w[35], offset);
|
|
w[39] = amd_bytealign (w[33], w[34], offset);
|
|
w[38] = amd_bytealign (w[32], w[33], offset);
|
|
w[37] = amd_bytealign (w[31], w[32], offset);
|
|
w[36] = amd_bytealign (w[30], w[31], offset);
|
|
w[35] = amd_bytealign (w[29], w[30], offset);
|
|
w[34] = amd_bytealign (w[28], w[29], offset);
|
|
w[33] = amd_bytealign (w[27], w[28], offset);
|
|
w[32] = amd_bytealign (w[26], w[27], offset);
|
|
w[31] = amd_bytealign (w[25], w[26], offset);
|
|
w[30] = amd_bytealign (w[24], w[25], offset);
|
|
w[29] = amd_bytealign (w[23], w[24], offset);
|
|
w[28] = amd_bytealign (w[22], w[23], offset);
|
|
w[27] = amd_bytealign (w[21], w[22], offset);
|
|
w[26] = amd_bytealign (w[20], w[21], offset);
|
|
w[25] = amd_bytealign (w[19], w[20], offset);
|
|
w[24] = amd_bytealign (w[18], w[19], offset);
|
|
w[23] = amd_bytealign (w[17], w[18], offset);
|
|
w[22] = amd_bytealign (w[16], w[17], offset);
|
|
w[21] = amd_bytealign (w[15], w[16], offset);
|
|
w[20] = amd_bytealign (w[14], w[15], offset);
|
|
w[19] = amd_bytealign (w[13], w[14], offset);
|
|
w[18] = amd_bytealign (w[12], w[13], offset);
|
|
w[17] = amd_bytealign (w[11], w[12], offset);
|
|
w[16] = amd_bytealign (w[10], w[11], offset);
|
|
w[15] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[14] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[13] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[12] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[11] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[10] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[ 9] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 8] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 7] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 6] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 5] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = amd_bytealign (w[56], w[57], offset);
|
|
w[62] = amd_bytealign (w[55], w[56], offset);
|
|
w[61] = amd_bytealign (w[54], w[55], offset);
|
|
w[60] = amd_bytealign (w[53], w[54], offset);
|
|
w[59] = amd_bytealign (w[52], w[53], offset);
|
|
w[58] = amd_bytealign (w[51], w[52], offset);
|
|
w[57] = amd_bytealign (w[50], w[51], offset);
|
|
w[56] = amd_bytealign (w[49], w[50], offset);
|
|
w[55] = amd_bytealign (w[48], w[49], offset);
|
|
w[54] = amd_bytealign (w[47], w[48], offset);
|
|
w[53] = amd_bytealign (w[46], w[47], offset);
|
|
w[52] = amd_bytealign (w[45], w[46], offset);
|
|
w[51] = amd_bytealign (w[44], w[45], offset);
|
|
w[50] = amd_bytealign (w[43], w[44], offset);
|
|
w[49] = amd_bytealign (w[42], w[43], offset);
|
|
w[48] = amd_bytealign (w[41], w[42], offset);
|
|
w[47] = amd_bytealign (w[40], w[41], offset);
|
|
w[46] = amd_bytealign (w[39], w[40], offset);
|
|
w[45] = amd_bytealign (w[38], w[39], offset);
|
|
w[44] = amd_bytealign (w[37], w[38], offset);
|
|
w[43] = amd_bytealign (w[36], w[37], offset);
|
|
w[42] = amd_bytealign (w[35], w[36], offset);
|
|
w[41] = amd_bytealign (w[34], w[35], offset);
|
|
w[40] = amd_bytealign (w[33], w[34], offset);
|
|
w[39] = amd_bytealign (w[32], w[33], offset);
|
|
w[38] = amd_bytealign (w[31], w[32], offset);
|
|
w[37] = amd_bytealign (w[30], w[31], offset);
|
|
w[36] = amd_bytealign (w[29], w[30], offset);
|
|
w[35] = amd_bytealign (w[28], w[29], offset);
|
|
w[34] = amd_bytealign (w[27], w[28], offset);
|
|
w[33] = amd_bytealign (w[26], w[27], offset);
|
|
w[32] = amd_bytealign (w[25], w[26], offset);
|
|
w[31] = amd_bytealign (w[24], w[25], offset);
|
|
w[30] = amd_bytealign (w[23], w[24], offset);
|
|
w[29] = amd_bytealign (w[22], w[23], offset);
|
|
w[28] = amd_bytealign (w[21], w[22], offset);
|
|
w[27] = amd_bytealign (w[20], w[21], offset);
|
|
w[26] = amd_bytealign (w[19], w[20], offset);
|
|
w[25] = amd_bytealign (w[18], w[19], offset);
|
|
w[24] = amd_bytealign (w[17], w[18], offset);
|
|
w[23] = amd_bytealign (w[16], w[17], offset);
|
|
w[22] = amd_bytealign (w[15], w[16], offset);
|
|
w[21] = amd_bytealign (w[14], w[15], offset);
|
|
w[20] = amd_bytealign (w[13], w[14], offset);
|
|
w[19] = amd_bytealign (w[12], w[13], offset);
|
|
w[18] = amd_bytealign (w[11], w[12], offset);
|
|
w[17] = amd_bytealign (w[10], w[11], offset);
|
|
w[16] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[15] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[14] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[13] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[12] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[11] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[10] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[ 9] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 8] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 7] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 6] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = amd_bytealign (w[55], w[56], offset);
|
|
w[62] = amd_bytealign (w[54], w[55], offset);
|
|
w[61] = amd_bytealign (w[53], w[54], offset);
|
|
w[60] = amd_bytealign (w[52], w[53], offset);
|
|
w[59] = amd_bytealign (w[51], w[52], offset);
|
|
w[58] = amd_bytealign (w[50], w[51], offset);
|
|
w[57] = amd_bytealign (w[49], w[50], offset);
|
|
w[56] = amd_bytealign (w[48], w[49], offset);
|
|
w[55] = amd_bytealign (w[47], w[48], offset);
|
|
w[54] = amd_bytealign (w[46], w[47], offset);
|
|
w[53] = amd_bytealign (w[45], w[46], offset);
|
|
w[52] = amd_bytealign (w[44], w[45], offset);
|
|
w[51] = amd_bytealign (w[43], w[44], offset);
|
|
w[50] = amd_bytealign (w[42], w[43], offset);
|
|
w[49] = amd_bytealign (w[41], w[42], offset);
|
|
w[48] = amd_bytealign (w[40], w[41], offset);
|
|
w[47] = amd_bytealign (w[39], w[40], offset);
|
|
w[46] = amd_bytealign (w[38], w[39], offset);
|
|
w[45] = amd_bytealign (w[37], w[38], offset);
|
|
w[44] = amd_bytealign (w[36], w[37], offset);
|
|
w[43] = amd_bytealign (w[35], w[36], offset);
|
|
w[42] = amd_bytealign (w[34], w[35], offset);
|
|
w[41] = amd_bytealign (w[33], w[34], offset);
|
|
w[40] = amd_bytealign (w[32], w[33], offset);
|
|
w[39] = amd_bytealign (w[31], w[32], offset);
|
|
w[38] = amd_bytealign (w[30], w[31], offset);
|
|
w[37] = amd_bytealign (w[29], w[30], offset);
|
|
w[36] = amd_bytealign (w[28], w[29], offset);
|
|
w[35] = amd_bytealign (w[27], w[28], offset);
|
|
w[34] = amd_bytealign (w[26], w[27], offset);
|
|
w[33] = amd_bytealign (w[25], w[26], offset);
|
|
w[32] = amd_bytealign (w[24], w[25], offset);
|
|
w[31] = amd_bytealign (w[23], w[24], offset);
|
|
w[30] = amd_bytealign (w[22], w[23], offset);
|
|
w[29] = amd_bytealign (w[21], w[22], offset);
|
|
w[28] = amd_bytealign (w[20], w[21], offset);
|
|
w[27] = amd_bytealign (w[19], w[20], offset);
|
|
w[26] = amd_bytealign (w[18], w[19], offset);
|
|
w[25] = amd_bytealign (w[17], w[18], offset);
|
|
w[24] = amd_bytealign (w[16], w[17], offset);
|
|
w[23] = amd_bytealign (w[15], w[16], offset);
|
|
w[22] = amd_bytealign (w[14], w[15], offset);
|
|
w[21] = amd_bytealign (w[13], w[14], offset);
|
|
w[20] = amd_bytealign (w[12], w[13], offset);
|
|
w[19] = amd_bytealign (w[11], w[12], offset);
|
|
w[18] = amd_bytealign (w[10], w[11], offset);
|
|
w[17] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[16] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[15] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[14] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[13] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[12] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[11] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[10] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[ 9] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 8] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 7] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = amd_bytealign (w[54], w[55], offset);
|
|
w[62] = amd_bytealign (w[53], w[54], offset);
|
|
w[61] = amd_bytealign (w[52], w[53], offset);
|
|
w[60] = amd_bytealign (w[51], w[52], offset);
|
|
w[59] = amd_bytealign (w[50], w[51], offset);
|
|
w[58] = amd_bytealign (w[49], w[50], offset);
|
|
w[57] = amd_bytealign (w[48], w[49], offset);
|
|
w[56] = amd_bytealign (w[47], w[48], offset);
|
|
w[55] = amd_bytealign (w[46], w[47], offset);
|
|
w[54] = amd_bytealign (w[45], w[46], offset);
|
|
w[53] = amd_bytealign (w[44], w[45], offset);
|
|
w[52] = amd_bytealign (w[43], w[44], offset);
|
|
w[51] = amd_bytealign (w[42], w[43], offset);
|
|
w[50] = amd_bytealign (w[41], w[42], offset);
|
|
w[49] = amd_bytealign (w[40], w[41], offset);
|
|
w[48] = amd_bytealign (w[39], w[40], offset);
|
|
w[47] = amd_bytealign (w[38], w[39], offset);
|
|
w[46] = amd_bytealign (w[37], w[38], offset);
|
|
w[45] = amd_bytealign (w[36], w[37], offset);
|
|
w[44] = amd_bytealign (w[35], w[36], offset);
|
|
w[43] = amd_bytealign (w[34], w[35], offset);
|
|
w[42] = amd_bytealign (w[33], w[34], offset);
|
|
w[41] = amd_bytealign (w[32], w[33], offset);
|
|
w[40] = amd_bytealign (w[31], w[32], offset);
|
|
w[39] = amd_bytealign (w[30], w[31], offset);
|
|
w[38] = amd_bytealign (w[29], w[30], offset);
|
|
w[37] = amd_bytealign (w[28], w[29], offset);
|
|
w[36] = amd_bytealign (w[27], w[28], offset);
|
|
w[35] = amd_bytealign (w[26], w[27], offset);
|
|
w[34] = amd_bytealign (w[25], w[26], offset);
|
|
w[33] = amd_bytealign (w[24], w[25], offset);
|
|
w[32] = amd_bytealign (w[23], w[24], offset);
|
|
w[31] = amd_bytealign (w[22], w[23], offset);
|
|
w[30] = amd_bytealign (w[21], w[22], offset);
|
|
w[29] = amd_bytealign (w[20], w[21], offset);
|
|
w[28] = amd_bytealign (w[19], w[20], offset);
|
|
w[27] = amd_bytealign (w[18], w[19], offset);
|
|
w[26] = amd_bytealign (w[17], w[18], offset);
|
|
w[25] = amd_bytealign (w[16], w[17], offset);
|
|
w[24] = amd_bytealign (w[15], w[16], offset);
|
|
w[23] = amd_bytealign (w[14], w[15], offset);
|
|
w[22] = amd_bytealign (w[13], w[14], offset);
|
|
w[21] = amd_bytealign (w[12], w[13], offset);
|
|
w[20] = amd_bytealign (w[11], w[12], offset);
|
|
w[19] = amd_bytealign (w[10], w[11], offset);
|
|
w[18] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[17] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[16] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[15] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[14] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[13] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[12] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[11] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[10] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[ 9] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 8] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = amd_bytealign (w[53], w[54], offset);
|
|
w[62] = amd_bytealign (w[52], w[53], offset);
|
|
w[61] = amd_bytealign (w[51], w[52], offset);
|
|
w[60] = amd_bytealign (w[50], w[51], offset);
|
|
w[59] = amd_bytealign (w[49], w[50], offset);
|
|
w[58] = amd_bytealign (w[48], w[49], offset);
|
|
w[57] = amd_bytealign (w[47], w[48], offset);
|
|
w[56] = amd_bytealign (w[46], w[47], offset);
|
|
w[55] = amd_bytealign (w[45], w[46], offset);
|
|
w[54] = amd_bytealign (w[44], w[45], offset);
|
|
w[53] = amd_bytealign (w[43], w[44], offset);
|
|
w[52] = amd_bytealign (w[42], w[43], offset);
|
|
w[51] = amd_bytealign (w[41], w[42], offset);
|
|
w[50] = amd_bytealign (w[40], w[41], offset);
|
|
w[49] = amd_bytealign (w[39], w[40], offset);
|
|
w[48] = amd_bytealign (w[38], w[39], offset);
|
|
w[47] = amd_bytealign (w[37], w[38], offset);
|
|
w[46] = amd_bytealign (w[36], w[37], offset);
|
|
w[45] = amd_bytealign (w[35], w[36], offset);
|
|
w[44] = amd_bytealign (w[34], w[35], offset);
|
|
w[43] = amd_bytealign (w[33], w[34], offset);
|
|
w[42] = amd_bytealign (w[32], w[33], offset);
|
|
w[41] = amd_bytealign (w[31], w[32], offset);
|
|
w[40] = amd_bytealign (w[30], w[31], offset);
|
|
w[39] = amd_bytealign (w[29], w[30], offset);
|
|
w[38] = amd_bytealign (w[28], w[29], offset);
|
|
w[37] = amd_bytealign (w[27], w[28], offset);
|
|
w[36] = amd_bytealign (w[26], w[27], offset);
|
|
w[35] = amd_bytealign (w[25], w[26], offset);
|
|
w[34] = amd_bytealign (w[24], w[25], offset);
|
|
w[33] = amd_bytealign (w[23], w[24], offset);
|
|
w[32] = amd_bytealign (w[22], w[23], offset);
|
|
w[31] = amd_bytealign (w[21], w[22], offset);
|
|
w[30] = amd_bytealign (w[20], w[21], offset);
|
|
w[29] = amd_bytealign (w[19], w[20], offset);
|
|
w[28] = amd_bytealign (w[18], w[19], offset);
|
|
w[27] = amd_bytealign (w[17], w[18], offset);
|
|
w[26] = amd_bytealign (w[16], w[17], offset);
|
|
w[25] = amd_bytealign (w[15], w[16], offset);
|
|
w[24] = amd_bytealign (w[14], w[15], offset);
|
|
w[23] = amd_bytealign (w[13], w[14], offset);
|
|
w[22] = amd_bytealign (w[12], w[13], offset);
|
|
w[21] = amd_bytealign (w[11], w[12], offset);
|
|
w[20] = amd_bytealign (w[10], w[11], offset);
|
|
w[19] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[18] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[17] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[16] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[15] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[14] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[13] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[12] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[11] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[10] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[ 9] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = amd_bytealign (w[52], w[53], offset);
|
|
w[62] = amd_bytealign (w[51], w[52], offset);
|
|
w[61] = amd_bytealign (w[50], w[51], offset);
|
|
w[60] = amd_bytealign (w[49], w[50], offset);
|
|
w[59] = amd_bytealign (w[48], w[49], offset);
|
|
w[58] = amd_bytealign (w[47], w[48], offset);
|
|
w[57] = amd_bytealign (w[46], w[47], offset);
|
|
w[56] = amd_bytealign (w[45], w[46], offset);
|
|
w[55] = amd_bytealign (w[44], w[45], offset);
|
|
w[54] = amd_bytealign (w[43], w[44], offset);
|
|
w[53] = amd_bytealign (w[42], w[43], offset);
|
|
w[52] = amd_bytealign (w[41], w[42], offset);
|
|
w[51] = amd_bytealign (w[40], w[41], offset);
|
|
w[50] = amd_bytealign (w[39], w[40], offset);
|
|
w[49] = amd_bytealign (w[38], w[39], offset);
|
|
w[48] = amd_bytealign (w[37], w[38], offset);
|
|
w[47] = amd_bytealign (w[36], w[37], offset);
|
|
w[46] = amd_bytealign (w[35], w[36], offset);
|
|
w[45] = amd_bytealign (w[34], w[35], offset);
|
|
w[44] = amd_bytealign (w[33], w[34], offset);
|
|
w[43] = amd_bytealign (w[32], w[33], offset);
|
|
w[42] = amd_bytealign (w[31], w[32], offset);
|
|
w[41] = amd_bytealign (w[30], w[31], offset);
|
|
w[40] = amd_bytealign (w[29], w[30], offset);
|
|
w[39] = amd_bytealign (w[28], w[29], offset);
|
|
w[38] = amd_bytealign (w[27], w[28], offset);
|
|
w[37] = amd_bytealign (w[26], w[27], offset);
|
|
w[36] = amd_bytealign (w[25], w[26], offset);
|
|
w[35] = amd_bytealign (w[24], w[25], offset);
|
|
w[34] = amd_bytealign (w[23], w[24], offset);
|
|
w[33] = amd_bytealign (w[22], w[23], offset);
|
|
w[32] = amd_bytealign (w[21], w[22], offset);
|
|
w[31] = amd_bytealign (w[20], w[21], offset);
|
|
w[30] = amd_bytealign (w[19], w[20], offset);
|
|
w[29] = amd_bytealign (w[18], w[19], offset);
|
|
w[28] = amd_bytealign (w[17], w[18], offset);
|
|
w[27] = amd_bytealign (w[16], w[17], offset);
|
|
w[26] = amd_bytealign (w[15], w[16], offset);
|
|
w[25] = amd_bytealign (w[14], w[15], offset);
|
|
w[24] = amd_bytealign (w[13], w[14], offset);
|
|
w[23] = amd_bytealign (w[12], w[13], offset);
|
|
w[22] = amd_bytealign (w[11], w[12], offset);
|
|
w[21] = amd_bytealign (w[10], w[11], offset);
|
|
w[20] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[19] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[18] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[17] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[16] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[15] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[14] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[13] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[12] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[11] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[10] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = amd_bytealign (w[51], w[52], offset);
|
|
w[62] = amd_bytealign (w[50], w[51], offset);
|
|
w[61] = amd_bytealign (w[49], w[50], offset);
|
|
w[60] = amd_bytealign (w[48], w[49], offset);
|
|
w[59] = amd_bytealign (w[47], w[48], offset);
|
|
w[58] = amd_bytealign (w[46], w[47], offset);
|
|
w[57] = amd_bytealign (w[45], w[46], offset);
|
|
w[56] = amd_bytealign (w[44], w[45], offset);
|
|
w[55] = amd_bytealign (w[43], w[44], offset);
|
|
w[54] = amd_bytealign (w[42], w[43], offset);
|
|
w[53] = amd_bytealign (w[41], w[42], offset);
|
|
w[52] = amd_bytealign (w[40], w[41], offset);
|
|
w[51] = amd_bytealign (w[39], w[40], offset);
|
|
w[50] = amd_bytealign (w[38], w[39], offset);
|
|
w[49] = amd_bytealign (w[37], w[38], offset);
|
|
w[48] = amd_bytealign (w[36], w[37], offset);
|
|
w[47] = amd_bytealign (w[35], w[36], offset);
|
|
w[46] = amd_bytealign (w[34], w[35], offset);
|
|
w[45] = amd_bytealign (w[33], w[34], offset);
|
|
w[44] = amd_bytealign (w[32], w[33], offset);
|
|
w[43] = amd_bytealign (w[31], w[32], offset);
|
|
w[42] = amd_bytealign (w[30], w[31], offset);
|
|
w[41] = amd_bytealign (w[29], w[30], offset);
|
|
w[40] = amd_bytealign (w[28], w[29], offset);
|
|
w[39] = amd_bytealign (w[27], w[28], offset);
|
|
w[38] = amd_bytealign (w[26], w[27], offset);
|
|
w[37] = amd_bytealign (w[25], w[26], offset);
|
|
w[36] = amd_bytealign (w[24], w[25], offset);
|
|
w[35] = amd_bytealign (w[23], w[24], offset);
|
|
w[34] = amd_bytealign (w[22], w[23], offset);
|
|
w[33] = amd_bytealign (w[21], w[22], offset);
|
|
w[32] = amd_bytealign (w[20], w[21], offset);
|
|
w[31] = amd_bytealign (w[19], w[20], offset);
|
|
w[30] = amd_bytealign (w[18], w[19], offset);
|
|
w[29] = amd_bytealign (w[17], w[18], offset);
|
|
w[28] = amd_bytealign (w[16], w[17], offset);
|
|
w[27] = amd_bytealign (w[15], w[16], offset);
|
|
w[26] = amd_bytealign (w[14], w[15], offset);
|
|
w[25] = amd_bytealign (w[13], w[14], offset);
|
|
w[24] = amd_bytealign (w[12], w[13], offset);
|
|
w[23] = amd_bytealign (w[11], w[12], offset);
|
|
w[22] = amd_bytealign (w[10], w[11], offset);
|
|
w[21] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[20] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[19] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[18] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[17] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[16] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[15] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[14] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[13] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[12] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[11] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = amd_bytealign (w[50], w[51], offset);
|
|
w[62] = amd_bytealign (w[49], w[50], offset);
|
|
w[61] = amd_bytealign (w[48], w[49], offset);
|
|
w[60] = amd_bytealign (w[47], w[48], offset);
|
|
w[59] = amd_bytealign (w[46], w[47], offset);
|
|
w[58] = amd_bytealign (w[45], w[46], offset);
|
|
w[57] = amd_bytealign (w[44], w[45], offset);
|
|
w[56] = amd_bytealign (w[43], w[44], offset);
|
|
w[55] = amd_bytealign (w[42], w[43], offset);
|
|
w[54] = amd_bytealign (w[41], w[42], offset);
|
|
w[53] = amd_bytealign (w[40], w[41], offset);
|
|
w[52] = amd_bytealign (w[39], w[40], offset);
|
|
w[51] = amd_bytealign (w[38], w[39], offset);
|
|
w[50] = amd_bytealign (w[37], w[38], offset);
|
|
w[49] = amd_bytealign (w[36], w[37], offset);
|
|
w[48] = amd_bytealign (w[35], w[36], offset);
|
|
w[47] = amd_bytealign (w[34], w[35], offset);
|
|
w[46] = amd_bytealign (w[33], w[34], offset);
|
|
w[45] = amd_bytealign (w[32], w[33], offset);
|
|
w[44] = amd_bytealign (w[31], w[32], offset);
|
|
w[43] = amd_bytealign (w[30], w[31], offset);
|
|
w[42] = amd_bytealign (w[29], w[30], offset);
|
|
w[41] = amd_bytealign (w[28], w[29], offset);
|
|
w[40] = amd_bytealign (w[27], w[28], offset);
|
|
w[39] = amd_bytealign (w[26], w[27], offset);
|
|
w[38] = amd_bytealign (w[25], w[26], offset);
|
|
w[37] = amd_bytealign (w[24], w[25], offset);
|
|
w[36] = amd_bytealign (w[23], w[24], offset);
|
|
w[35] = amd_bytealign (w[22], w[23], offset);
|
|
w[34] = amd_bytealign (w[21], w[22], offset);
|
|
w[33] = amd_bytealign (w[20], w[21], offset);
|
|
w[32] = amd_bytealign (w[19], w[20], offset);
|
|
w[31] = amd_bytealign (w[18], w[19], offset);
|
|
w[30] = amd_bytealign (w[17], w[18], offset);
|
|
w[29] = amd_bytealign (w[16], w[17], offset);
|
|
w[28] = amd_bytealign (w[15], w[16], offset);
|
|
w[27] = amd_bytealign (w[14], w[15], offset);
|
|
w[26] = amd_bytealign (w[13], w[14], offset);
|
|
w[25] = amd_bytealign (w[12], w[13], offset);
|
|
w[24] = amd_bytealign (w[11], w[12], offset);
|
|
w[23] = amd_bytealign (w[10], w[11], offset);
|
|
w[22] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[21] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[20] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[19] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[18] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[17] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[16] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[15] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[14] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[13] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[12] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = amd_bytealign (w[49], w[50], offset);
|
|
w[62] = amd_bytealign (w[48], w[49], offset);
|
|
w[61] = amd_bytealign (w[47], w[48], offset);
|
|
w[60] = amd_bytealign (w[46], w[47], offset);
|
|
w[59] = amd_bytealign (w[45], w[46], offset);
|
|
w[58] = amd_bytealign (w[44], w[45], offset);
|
|
w[57] = amd_bytealign (w[43], w[44], offset);
|
|
w[56] = amd_bytealign (w[42], w[43], offset);
|
|
w[55] = amd_bytealign (w[41], w[42], offset);
|
|
w[54] = amd_bytealign (w[40], w[41], offset);
|
|
w[53] = amd_bytealign (w[39], w[40], offset);
|
|
w[52] = amd_bytealign (w[38], w[39], offset);
|
|
w[51] = amd_bytealign (w[37], w[38], offset);
|
|
w[50] = amd_bytealign (w[36], w[37], offset);
|
|
w[49] = amd_bytealign (w[35], w[36], offset);
|
|
w[48] = amd_bytealign (w[34], w[35], offset);
|
|
w[47] = amd_bytealign (w[33], w[34], offset);
|
|
w[46] = amd_bytealign (w[32], w[33], offset);
|
|
w[45] = amd_bytealign (w[31], w[32], offset);
|
|
w[44] = amd_bytealign (w[30], w[31], offset);
|
|
w[43] = amd_bytealign (w[29], w[30], offset);
|
|
w[42] = amd_bytealign (w[28], w[29], offset);
|
|
w[41] = amd_bytealign (w[27], w[28], offset);
|
|
w[40] = amd_bytealign (w[26], w[27], offset);
|
|
w[39] = amd_bytealign (w[25], w[26], offset);
|
|
w[38] = amd_bytealign (w[24], w[25], offset);
|
|
w[37] = amd_bytealign (w[23], w[24], offset);
|
|
w[36] = amd_bytealign (w[22], w[23], offset);
|
|
w[35] = amd_bytealign (w[21], w[22], offset);
|
|
w[34] = amd_bytealign (w[20], w[21], offset);
|
|
w[33] = amd_bytealign (w[19], w[20], offset);
|
|
w[32] = amd_bytealign (w[18], w[19], offset);
|
|
w[31] = amd_bytealign (w[17], w[18], offset);
|
|
w[30] = amd_bytealign (w[16], w[17], offset);
|
|
w[29] = amd_bytealign (w[15], w[16], offset);
|
|
w[28] = amd_bytealign (w[14], w[15], offset);
|
|
w[27] = amd_bytealign (w[13], w[14], offset);
|
|
w[26] = amd_bytealign (w[12], w[13], offset);
|
|
w[25] = amd_bytealign (w[11], w[12], offset);
|
|
w[24] = amd_bytealign (w[10], w[11], offset);
|
|
w[23] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[22] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[21] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[20] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[19] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[18] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[17] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[16] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[15] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[14] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[13] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = amd_bytealign (w[48], w[49], offset);
|
|
w[62] = amd_bytealign (w[47], w[48], offset);
|
|
w[61] = amd_bytealign (w[46], w[47], offset);
|
|
w[60] = amd_bytealign (w[45], w[46], offset);
|
|
w[59] = amd_bytealign (w[44], w[45], offset);
|
|
w[58] = amd_bytealign (w[43], w[44], offset);
|
|
w[57] = amd_bytealign (w[42], w[43], offset);
|
|
w[56] = amd_bytealign (w[41], w[42], offset);
|
|
w[55] = amd_bytealign (w[40], w[41], offset);
|
|
w[54] = amd_bytealign (w[39], w[40], offset);
|
|
w[53] = amd_bytealign (w[38], w[39], offset);
|
|
w[52] = amd_bytealign (w[37], w[38], offset);
|
|
w[51] = amd_bytealign (w[36], w[37], offset);
|
|
w[50] = amd_bytealign (w[35], w[36], offset);
|
|
w[49] = amd_bytealign (w[34], w[35], offset);
|
|
w[48] = amd_bytealign (w[33], w[34], offset);
|
|
w[47] = amd_bytealign (w[32], w[33], offset);
|
|
w[46] = amd_bytealign (w[31], w[32], offset);
|
|
w[45] = amd_bytealign (w[30], w[31], offset);
|
|
w[44] = amd_bytealign (w[29], w[30], offset);
|
|
w[43] = amd_bytealign (w[28], w[29], offset);
|
|
w[42] = amd_bytealign (w[27], w[28], offset);
|
|
w[41] = amd_bytealign (w[26], w[27], offset);
|
|
w[40] = amd_bytealign (w[25], w[26], offset);
|
|
w[39] = amd_bytealign (w[24], w[25], offset);
|
|
w[38] = amd_bytealign (w[23], w[24], offset);
|
|
w[37] = amd_bytealign (w[22], w[23], offset);
|
|
w[36] = amd_bytealign (w[21], w[22], offset);
|
|
w[35] = amd_bytealign (w[20], w[21], offset);
|
|
w[34] = amd_bytealign (w[19], w[20], offset);
|
|
w[33] = amd_bytealign (w[18], w[19], offset);
|
|
w[32] = amd_bytealign (w[17], w[18], offset);
|
|
w[31] = amd_bytealign (w[16], w[17], offset);
|
|
w[30] = amd_bytealign (w[15], w[16], offset);
|
|
w[29] = amd_bytealign (w[14], w[15], offset);
|
|
w[28] = amd_bytealign (w[13], w[14], offset);
|
|
w[27] = amd_bytealign (w[12], w[13], offset);
|
|
w[26] = amd_bytealign (w[11], w[12], offset);
|
|
w[25] = amd_bytealign (w[10], w[11], offset);
|
|
w[24] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[23] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[22] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[21] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[20] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[19] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[18] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[17] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[16] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[15] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[14] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = amd_bytealign (w[47], w[48], offset);
|
|
w[62] = amd_bytealign (w[46], w[47], offset);
|
|
w[61] = amd_bytealign (w[45], w[46], offset);
|
|
w[60] = amd_bytealign (w[44], w[45], offset);
|
|
w[59] = amd_bytealign (w[43], w[44], offset);
|
|
w[58] = amd_bytealign (w[42], w[43], offset);
|
|
w[57] = amd_bytealign (w[41], w[42], offset);
|
|
w[56] = amd_bytealign (w[40], w[41], offset);
|
|
w[55] = amd_bytealign (w[39], w[40], offset);
|
|
w[54] = amd_bytealign (w[38], w[39], offset);
|
|
w[53] = amd_bytealign (w[37], w[38], offset);
|
|
w[52] = amd_bytealign (w[36], w[37], offset);
|
|
w[51] = amd_bytealign (w[35], w[36], offset);
|
|
w[50] = amd_bytealign (w[34], w[35], offset);
|
|
w[49] = amd_bytealign (w[33], w[34], offset);
|
|
w[48] = amd_bytealign (w[32], w[33], offset);
|
|
w[47] = amd_bytealign (w[31], w[32], offset);
|
|
w[46] = amd_bytealign (w[30], w[31], offset);
|
|
w[45] = amd_bytealign (w[29], w[30], offset);
|
|
w[44] = amd_bytealign (w[28], w[29], offset);
|
|
w[43] = amd_bytealign (w[27], w[28], offset);
|
|
w[42] = amd_bytealign (w[26], w[27], offset);
|
|
w[41] = amd_bytealign (w[25], w[26], offset);
|
|
w[40] = amd_bytealign (w[24], w[25], offset);
|
|
w[39] = amd_bytealign (w[23], w[24], offset);
|
|
w[38] = amd_bytealign (w[22], w[23], offset);
|
|
w[37] = amd_bytealign (w[21], w[22], offset);
|
|
w[36] = amd_bytealign (w[20], w[21], offset);
|
|
w[35] = amd_bytealign (w[19], w[20], offset);
|
|
w[34] = amd_bytealign (w[18], w[19], offset);
|
|
w[33] = amd_bytealign (w[17], w[18], offset);
|
|
w[32] = amd_bytealign (w[16], w[17], offset);
|
|
w[31] = amd_bytealign (w[15], w[16], offset);
|
|
w[30] = amd_bytealign (w[14], w[15], offset);
|
|
w[29] = amd_bytealign (w[13], w[14], offset);
|
|
w[28] = amd_bytealign (w[12], w[13], offset);
|
|
w[27] = amd_bytealign (w[11], w[12], offset);
|
|
w[26] = amd_bytealign (w[10], w[11], offset);
|
|
w[25] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[24] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[23] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[22] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[21] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[20] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[19] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[18] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[17] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[16] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[15] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = amd_bytealign (w[46], w[47], offset);
|
|
w[62] = amd_bytealign (w[45], w[46], offset);
|
|
w[61] = amd_bytealign (w[44], w[45], offset);
|
|
w[60] = amd_bytealign (w[43], w[44], offset);
|
|
w[59] = amd_bytealign (w[42], w[43], offset);
|
|
w[58] = amd_bytealign (w[41], w[42], offset);
|
|
w[57] = amd_bytealign (w[40], w[41], offset);
|
|
w[56] = amd_bytealign (w[39], w[40], offset);
|
|
w[55] = amd_bytealign (w[38], w[39], offset);
|
|
w[54] = amd_bytealign (w[37], w[38], offset);
|
|
w[53] = amd_bytealign (w[36], w[37], offset);
|
|
w[52] = amd_bytealign (w[35], w[36], offset);
|
|
w[51] = amd_bytealign (w[34], w[35], offset);
|
|
w[50] = amd_bytealign (w[33], w[34], offset);
|
|
w[49] = amd_bytealign (w[32], w[33], offset);
|
|
w[48] = amd_bytealign (w[31], w[32], offset);
|
|
w[47] = amd_bytealign (w[30], w[31], offset);
|
|
w[46] = amd_bytealign (w[29], w[30], offset);
|
|
w[45] = amd_bytealign (w[28], w[29], offset);
|
|
w[44] = amd_bytealign (w[27], w[28], offset);
|
|
w[43] = amd_bytealign (w[26], w[27], offset);
|
|
w[42] = amd_bytealign (w[25], w[26], offset);
|
|
w[41] = amd_bytealign (w[24], w[25], offset);
|
|
w[40] = amd_bytealign (w[23], w[24], offset);
|
|
w[39] = amd_bytealign (w[22], w[23], offset);
|
|
w[38] = amd_bytealign (w[21], w[22], offset);
|
|
w[37] = amd_bytealign (w[20], w[21], offset);
|
|
w[36] = amd_bytealign (w[19], w[20], offset);
|
|
w[35] = amd_bytealign (w[18], w[19], offset);
|
|
w[34] = amd_bytealign (w[17], w[18], offset);
|
|
w[33] = amd_bytealign (w[16], w[17], offset);
|
|
w[32] = amd_bytealign (w[15], w[16], offset);
|
|
w[31] = amd_bytealign (w[14], w[15], offset);
|
|
w[30] = amd_bytealign (w[13], w[14], offset);
|
|
w[29] = amd_bytealign (w[12], w[13], offset);
|
|
w[28] = amd_bytealign (w[11], w[12], offset);
|
|
w[27] = amd_bytealign (w[10], w[11], offset);
|
|
w[26] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[25] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[24] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[23] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[22] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[21] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[20] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[19] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[18] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[17] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[16] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = amd_bytealign (w[45], w[46], offset);
|
|
w[62] = amd_bytealign (w[44], w[45], offset);
|
|
w[61] = amd_bytealign (w[43], w[44], offset);
|
|
w[60] = amd_bytealign (w[42], w[43], offset);
|
|
w[59] = amd_bytealign (w[41], w[42], offset);
|
|
w[58] = amd_bytealign (w[40], w[41], offset);
|
|
w[57] = amd_bytealign (w[39], w[40], offset);
|
|
w[56] = amd_bytealign (w[38], w[39], offset);
|
|
w[55] = amd_bytealign (w[37], w[38], offset);
|
|
w[54] = amd_bytealign (w[36], w[37], offset);
|
|
w[53] = amd_bytealign (w[35], w[36], offset);
|
|
w[52] = amd_bytealign (w[34], w[35], offset);
|
|
w[51] = amd_bytealign (w[33], w[34], offset);
|
|
w[50] = amd_bytealign (w[32], w[33], offset);
|
|
w[49] = amd_bytealign (w[31], w[32], offset);
|
|
w[48] = amd_bytealign (w[30], w[31], offset);
|
|
w[47] = amd_bytealign (w[29], w[30], offset);
|
|
w[46] = amd_bytealign (w[28], w[29], offset);
|
|
w[45] = amd_bytealign (w[27], w[28], offset);
|
|
w[44] = amd_bytealign (w[26], w[27], offset);
|
|
w[43] = amd_bytealign (w[25], w[26], offset);
|
|
w[42] = amd_bytealign (w[24], w[25], offset);
|
|
w[41] = amd_bytealign (w[23], w[24], offset);
|
|
w[40] = amd_bytealign (w[22], w[23], offset);
|
|
w[39] = amd_bytealign (w[21], w[22], offset);
|
|
w[38] = amd_bytealign (w[20], w[21], offset);
|
|
w[37] = amd_bytealign (w[19], w[20], offset);
|
|
w[36] = amd_bytealign (w[18], w[19], offset);
|
|
w[35] = amd_bytealign (w[17], w[18], offset);
|
|
w[34] = amd_bytealign (w[16], w[17], offset);
|
|
w[33] = amd_bytealign (w[15], w[16], offset);
|
|
w[32] = amd_bytealign (w[14], w[15], offset);
|
|
w[31] = amd_bytealign (w[13], w[14], offset);
|
|
w[30] = amd_bytealign (w[12], w[13], offset);
|
|
w[29] = amd_bytealign (w[11], w[12], offset);
|
|
w[28] = amd_bytealign (w[10], w[11], offset);
|
|
w[27] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[26] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[25] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[24] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[23] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[22] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[21] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[20] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[19] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[18] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[17] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = amd_bytealign (w[44], w[45], offset);
|
|
w[62] = amd_bytealign (w[43], w[44], offset);
|
|
w[61] = amd_bytealign (w[42], w[43], offset);
|
|
w[60] = amd_bytealign (w[41], w[42], offset);
|
|
w[59] = amd_bytealign (w[40], w[41], offset);
|
|
w[58] = amd_bytealign (w[39], w[40], offset);
|
|
w[57] = amd_bytealign (w[38], w[39], offset);
|
|
w[56] = amd_bytealign (w[37], w[38], offset);
|
|
w[55] = amd_bytealign (w[36], w[37], offset);
|
|
w[54] = amd_bytealign (w[35], w[36], offset);
|
|
w[53] = amd_bytealign (w[34], w[35], offset);
|
|
w[52] = amd_bytealign (w[33], w[34], offset);
|
|
w[51] = amd_bytealign (w[32], w[33], offset);
|
|
w[50] = amd_bytealign (w[31], w[32], offset);
|
|
w[49] = amd_bytealign (w[30], w[31], offset);
|
|
w[48] = amd_bytealign (w[29], w[30], offset);
|
|
w[47] = amd_bytealign (w[28], w[29], offset);
|
|
w[46] = amd_bytealign (w[27], w[28], offset);
|
|
w[45] = amd_bytealign (w[26], w[27], offset);
|
|
w[44] = amd_bytealign (w[25], w[26], offset);
|
|
w[43] = amd_bytealign (w[24], w[25], offset);
|
|
w[42] = amd_bytealign (w[23], w[24], offset);
|
|
w[41] = amd_bytealign (w[22], w[23], offset);
|
|
w[40] = amd_bytealign (w[21], w[22], offset);
|
|
w[39] = amd_bytealign (w[20], w[21], offset);
|
|
w[38] = amd_bytealign (w[19], w[20], offset);
|
|
w[37] = amd_bytealign (w[18], w[19], offset);
|
|
w[36] = amd_bytealign (w[17], w[18], offset);
|
|
w[35] = amd_bytealign (w[16], w[17], offset);
|
|
w[34] = amd_bytealign (w[15], w[16], offset);
|
|
w[33] = amd_bytealign (w[14], w[15], offset);
|
|
w[32] = amd_bytealign (w[13], w[14], offset);
|
|
w[31] = amd_bytealign (w[12], w[13], offset);
|
|
w[30] = amd_bytealign (w[11], w[12], offset);
|
|
w[29] = amd_bytealign (w[10], w[11], offset);
|
|
w[28] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[27] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[26] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[25] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[24] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[23] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[22] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[21] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[20] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[19] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[18] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = amd_bytealign (w[43], w[44], offset);
|
|
w[62] = amd_bytealign (w[42], w[43], offset);
|
|
w[61] = amd_bytealign (w[41], w[42], offset);
|
|
w[60] = amd_bytealign (w[40], w[41], offset);
|
|
w[59] = amd_bytealign (w[39], w[40], offset);
|
|
w[58] = amd_bytealign (w[38], w[39], offset);
|
|
w[57] = amd_bytealign (w[37], w[38], offset);
|
|
w[56] = amd_bytealign (w[36], w[37], offset);
|
|
w[55] = amd_bytealign (w[35], w[36], offset);
|
|
w[54] = amd_bytealign (w[34], w[35], offset);
|
|
w[53] = amd_bytealign (w[33], w[34], offset);
|
|
w[52] = amd_bytealign (w[32], w[33], offset);
|
|
w[51] = amd_bytealign (w[31], w[32], offset);
|
|
w[50] = amd_bytealign (w[30], w[31], offset);
|
|
w[49] = amd_bytealign (w[29], w[30], offset);
|
|
w[48] = amd_bytealign (w[28], w[29], offset);
|
|
w[47] = amd_bytealign (w[27], w[28], offset);
|
|
w[46] = amd_bytealign (w[26], w[27], offset);
|
|
w[45] = amd_bytealign (w[25], w[26], offset);
|
|
w[44] = amd_bytealign (w[24], w[25], offset);
|
|
w[43] = amd_bytealign (w[23], w[24], offset);
|
|
w[42] = amd_bytealign (w[22], w[23], offset);
|
|
w[41] = amd_bytealign (w[21], w[22], offset);
|
|
w[40] = amd_bytealign (w[20], w[21], offset);
|
|
w[39] = amd_bytealign (w[19], w[20], offset);
|
|
w[38] = amd_bytealign (w[18], w[19], offset);
|
|
w[37] = amd_bytealign (w[17], w[18], offset);
|
|
w[36] = amd_bytealign (w[16], w[17], offset);
|
|
w[35] = amd_bytealign (w[15], w[16], offset);
|
|
w[34] = amd_bytealign (w[14], w[15], offset);
|
|
w[33] = amd_bytealign (w[13], w[14], offset);
|
|
w[32] = amd_bytealign (w[12], w[13], offset);
|
|
w[31] = amd_bytealign (w[11], w[12], offset);
|
|
w[30] = amd_bytealign (w[10], w[11], offset);
|
|
w[29] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[28] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[27] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[26] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[25] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[24] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[23] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[22] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[21] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[20] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[19] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = amd_bytealign (w[42], w[43], offset);
|
|
w[62] = amd_bytealign (w[41], w[42], offset);
|
|
w[61] = amd_bytealign (w[40], w[41], offset);
|
|
w[60] = amd_bytealign (w[39], w[40], offset);
|
|
w[59] = amd_bytealign (w[38], w[39], offset);
|
|
w[58] = amd_bytealign (w[37], w[38], offset);
|
|
w[57] = amd_bytealign (w[36], w[37], offset);
|
|
w[56] = amd_bytealign (w[35], w[36], offset);
|
|
w[55] = amd_bytealign (w[34], w[35], offset);
|
|
w[54] = amd_bytealign (w[33], w[34], offset);
|
|
w[53] = amd_bytealign (w[32], w[33], offset);
|
|
w[52] = amd_bytealign (w[31], w[32], offset);
|
|
w[51] = amd_bytealign (w[30], w[31], offset);
|
|
w[50] = amd_bytealign (w[29], w[30], offset);
|
|
w[49] = amd_bytealign (w[28], w[29], offset);
|
|
w[48] = amd_bytealign (w[27], w[28], offset);
|
|
w[47] = amd_bytealign (w[26], w[27], offset);
|
|
w[46] = amd_bytealign (w[25], w[26], offset);
|
|
w[45] = amd_bytealign (w[24], w[25], offset);
|
|
w[44] = amd_bytealign (w[23], w[24], offset);
|
|
w[43] = amd_bytealign (w[22], w[23], offset);
|
|
w[42] = amd_bytealign (w[21], w[22], offset);
|
|
w[41] = amd_bytealign (w[20], w[21], offset);
|
|
w[40] = amd_bytealign (w[19], w[20], offset);
|
|
w[39] = amd_bytealign (w[18], w[19], offset);
|
|
w[38] = amd_bytealign (w[17], w[18], offset);
|
|
w[37] = amd_bytealign (w[16], w[17], offset);
|
|
w[36] = amd_bytealign (w[15], w[16], offset);
|
|
w[35] = amd_bytealign (w[14], w[15], offset);
|
|
w[34] = amd_bytealign (w[13], w[14], offset);
|
|
w[33] = amd_bytealign (w[12], w[13], offset);
|
|
w[32] = amd_bytealign (w[11], w[12], offset);
|
|
w[31] = amd_bytealign (w[10], w[11], offset);
|
|
w[30] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[29] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[28] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[27] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[26] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[25] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[24] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[23] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[22] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[21] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[20] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = amd_bytealign (w[41], w[42], offset);
|
|
w[62] = amd_bytealign (w[40], w[41], offset);
|
|
w[61] = amd_bytealign (w[39], w[40], offset);
|
|
w[60] = amd_bytealign (w[38], w[39], offset);
|
|
w[59] = amd_bytealign (w[37], w[38], offset);
|
|
w[58] = amd_bytealign (w[36], w[37], offset);
|
|
w[57] = amd_bytealign (w[35], w[36], offset);
|
|
w[56] = amd_bytealign (w[34], w[35], offset);
|
|
w[55] = amd_bytealign (w[33], w[34], offset);
|
|
w[54] = amd_bytealign (w[32], w[33], offset);
|
|
w[53] = amd_bytealign (w[31], w[32], offset);
|
|
w[52] = amd_bytealign (w[30], w[31], offset);
|
|
w[51] = amd_bytealign (w[29], w[30], offset);
|
|
w[50] = amd_bytealign (w[28], w[29], offset);
|
|
w[49] = amd_bytealign (w[27], w[28], offset);
|
|
w[48] = amd_bytealign (w[26], w[27], offset);
|
|
w[47] = amd_bytealign (w[25], w[26], offset);
|
|
w[46] = amd_bytealign (w[24], w[25], offset);
|
|
w[45] = amd_bytealign (w[23], w[24], offset);
|
|
w[44] = amd_bytealign (w[22], w[23], offset);
|
|
w[43] = amd_bytealign (w[21], w[22], offset);
|
|
w[42] = amd_bytealign (w[20], w[21], offset);
|
|
w[41] = amd_bytealign (w[19], w[20], offset);
|
|
w[40] = amd_bytealign (w[18], w[19], offset);
|
|
w[39] = amd_bytealign (w[17], w[18], offset);
|
|
w[38] = amd_bytealign (w[16], w[17], offset);
|
|
w[37] = amd_bytealign (w[15], w[16], offset);
|
|
w[36] = amd_bytealign (w[14], w[15], offset);
|
|
w[35] = amd_bytealign (w[13], w[14], offset);
|
|
w[34] = amd_bytealign (w[12], w[13], offset);
|
|
w[33] = amd_bytealign (w[11], w[12], offset);
|
|
w[32] = amd_bytealign (w[10], w[11], offset);
|
|
w[31] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[30] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[29] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[28] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[27] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[26] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[25] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[24] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[23] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[22] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[21] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = amd_bytealign (w[40], w[41], offset);
|
|
w[62] = amd_bytealign (w[39], w[40], offset);
|
|
w[61] = amd_bytealign (w[38], w[39], offset);
|
|
w[60] = amd_bytealign (w[37], w[38], offset);
|
|
w[59] = amd_bytealign (w[36], w[37], offset);
|
|
w[58] = amd_bytealign (w[35], w[36], offset);
|
|
w[57] = amd_bytealign (w[34], w[35], offset);
|
|
w[56] = amd_bytealign (w[33], w[34], offset);
|
|
w[55] = amd_bytealign (w[32], w[33], offset);
|
|
w[54] = amd_bytealign (w[31], w[32], offset);
|
|
w[53] = amd_bytealign (w[30], w[31], offset);
|
|
w[52] = amd_bytealign (w[29], w[30], offset);
|
|
w[51] = amd_bytealign (w[28], w[29], offset);
|
|
w[50] = amd_bytealign (w[27], w[28], offset);
|
|
w[49] = amd_bytealign (w[26], w[27], offset);
|
|
w[48] = amd_bytealign (w[25], w[26], offset);
|
|
w[47] = amd_bytealign (w[24], w[25], offset);
|
|
w[46] = amd_bytealign (w[23], w[24], offset);
|
|
w[45] = amd_bytealign (w[22], w[23], offset);
|
|
w[44] = amd_bytealign (w[21], w[22], offset);
|
|
w[43] = amd_bytealign (w[20], w[21], offset);
|
|
w[42] = amd_bytealign (w[19], w[20], offset);
|
|
w[41] = amd_bytealign (w[18], w[19], offset);
|
|
w[40] = amd_bytealign (w[17], w[18], offset);
|
|
w[39] = amd_bytealign (w[16], w[17], offset);
|
|
w[38] = amd_bytealign (w[15], w[16], offset);
|
|
w[37] = amd_bytealign (w[14], w[15], offset);
|
|
w[36] = amd_bytealign (w[13], w[14], offset);
|
|
w[35] = amd_bytealign (w[12], w[13], offset);
|
|
w[34] = amd_bytealign (w[11], w[12], offset);
|
|
w[33] = amd_bytealign (w[10], w[11], offset);
|
|
w[32] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[31] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[30] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[29] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[28] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[27] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[26] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[25] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[24] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[23] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[22] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = amd_bytealign (w[39], w[40], offset);
|
|
w[62] = amd_bytealign (w[38], w[39], offset);
|
|
w[61] = amd_bytealign (w[37], w[38], offset);
|
|
w[60] = amd_bytealign (w[36], w[37], offset);
|
|
w[59] = amd_bytealign (w[35], w[36], offset);
|
|
w[58] = amd_bytealign (w[34], w[35], offset);
|
|
w[57] = amd_bytealign (w[33], w[34], offset);
|
|
w[56] = amd_bytealign (w[32], w[33], offset);
|
|
w[55] = amd_bytealign (w[31], w[32], offset);
|
|
w[54] = amd_bytealign (w[30], w[31], offset);
|
|
w[53] = amd_bytealign (w[29], w[30], offset);
|
|
w[52] = amd_bytealign (w[28], w[29], offset);
|
|
w[51] = amd_bytealign (w[27], w[28], offset);
|
|
w[50] = amd_bytealign (w[26], w[27], offset);
|
|
w[49] = amd_bytealign (w[25], w[26], offset);
|
|
w[48] = amd_bytealign (w[24], w[25], offset);
|
|
w[47] = amd_bytealign (w[23], w[24], offset);
|
|
w[46] = amd_bytealign (w[22], w[23], offset);
|
|
w[45] = amd_bytealign (w[21], w[22], offset);
|
|
w[44] = amd_bytealign (w[20], w[21], offset);
|
|
w[43] = amd_bytealign (w[19], w[20], offset);
|
|
w[42] = amd_bytealign (w[18], w[19], offset);
|
|
w[41] = amd_bytealign (w[17], w[18], offset);
|
|
w[40] = amd_bytealign (w[16], w[17], offset);
|
|
w[39] = amd_bytealign (w[15], w[16], offset);
|
|
w[38] = amd_bytealign (w[14], w[15], offset);
|
|
w[37] = amd_bytealign (w[13], w[14], offset);
|
|
w[36] = amd_bytealign (w[12], w[13], offset);
|
|
w[35] = amd_bytealign (w[11], w[12], offset);
|
|
w[34] = amd_bytealign (w[10], w[11], offset);
|
|
w[33] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[32] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[31] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[30] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[29] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[28] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[27] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[26] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[25] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[24] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[23] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = amd_bytealign (w[38], w[39], offset);
|
|
w[62] = amd_bytealign (w[37], w[38], offset);
|
|
w[61] = amd_bytealign (w[36], w[37], offset);
|
|
w[60] = amd_bytealign (w[35], w[36], offset);
|
|
w[59] = amd_bytealign (w[34], w[35], offset);
|
|
w[58] = amd_bytealign (w[33], w[34], offset);
|
|
w[57] = amd_bytealign (w[32], w[33], offset);
|
|
w[56] = amd_bytealign (w[31], w[32], offset);
|
|
w[55] = amd_bytealign (w[30], w[31], offset);
|
|
w[54] = amd_bytealign (w[29], w[30], offset);
|
|
w[53] = amd_bytealign (w[28], w[29], offset);
|
|
w[52] = amd_bytealign (w[27], w[28], offset);
|
|
w[51] = amd_bytealign (w[26], w[27], offset);
|
|
w[50] = amd_bytealign (w[25], w[26], offset);
|
|
w[49] = amd_bytealign (w[24], w[25], offset);
|
|
w[48] = amd_bytealign (w[23], w[24], offset);
|
|
w[47] = amd_bytealign (w[22], w[23], offset);
|
|
w[46] = amd_bytealign (w[21], w[22], offset);
|
|
w[45] = amd_bytealign (w[20], w[21], offset);
|
|
w[44] = amd_bytealign (w[19], w[20], offset);
|
|
w[43] = amd_bytealign (w[18], w[19], offset);
|
|
w[42] = amd_bytealign (w[17], w[18], offset);
|
|
w[41] = amd_bytealign (w[16], w[17], offset);
|
|
w[40] = amd_bytealign (w[15], w[16], offset);
|
|
w[39] = amd_bytealign (w[14], w[15], offset);
|
|
w[38] = amd_bytealign (w[13], w[14], offset);
|
|
w[37] = amd_bytealign (w[12], w[13], offset);
|
|
w[36] = amd_bytealign (w[11], w[12], offset);
|
|
w[35] = amd_bytealign (w[10], w[11], offset);
|
|
w[34] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[33] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[32] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[31] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[30] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[29] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[28] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[27] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[26] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[25] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[24] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = amd_bytealign (w[37], w[38], offset);
|
|
w[62] = amd_bytealign (w[36], w[37], offset);
|
|
w[61] = amd_bytealign (w[35], w[36], offset);
|
|
w[60] = amd_bytealign (w[34], w[35], offset);
|
|
w[59] = amd_bytealign (w[33], w[34], offset);
|
|
w[58] = amd_bytealign (w[32], w[33], offset);
|
|
w[57] = amd_bytealign (w[31], w[32], offset);
|
|
w[56] = amd_bytealign (w[30], w[31], offset);
|
|
w[55] = amd_bytealign (w[29], w[30], offset);
|
|
w[54] = amd_bytealign (w[28], w[29], offset);
|
|
w[53] = amd_bytealign (w[27], w[28], offset);
|
|
w[52] = amd_bytealign (w[26], w[27], offset);
|
|
w[51] = amd_bytealign (w[25], w[26], offset);
|
|
w[50] = amd_bytealign (w[24], w[25], offset);
|
|
w[49] = amd_bytealign (w[23], w[24], offset);
|
|
w[48] = amd_bytealign (w[22], w[23], offset);
|
|
w[47] = amd_bytealign (w[21], w[22], offset);
|
|
w[46] = amd_bytealign (w[20], w[21], offset);
|
|
w[45] = amd_bytealign (w[19], w[20], offset);
|
|
w[44] = amd_bytealign (w[18], w[19], offset);
|
|
w[43] = amd_bytealign (w[17], w[18], offset);
|
|
w[42] = amd_bytealign (w[16], w[17], offset);
|
|
w[41] = amd_bytealign (w[15], w[16], offset);
|
|
w[40] = amd_bytealign (w[14], w[15], offset);
|
|
w[39] = amd_bytealign (w[13], w[14], offset);
|
|
w[38] = amd_bytealign (w[12], w[13], offset);
|
|
w[37] = amd_bytealign (w[11], w[12], offset);
|
|
w[36] = amd_bytealign (w[10], w[11], offset);
|
|
w[35] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[34] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[33] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[32] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[31] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[30] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[29] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[28] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[27] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[26] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[25] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = amd_bytealign (w[36], w[37], offset);
|
|
w[62] = amd_bytealign (w[35], w[36], offset);
|
|
w[61] = amd_bytealign (w[34], w[35], offset);
|
|
w[60] = amd_bytealign (w[33], w[34], offset);
|
|
w[59] = amd_bytealign (w[32], w[33], offset);
|
|
w[58] = amd_bytealign (w[31], w[32], offset);
|
|
w[57] = amd_bytealign (w[30], w[31], offset);
|
|
w[56] = amd_bytealign (w[29], w[30], offset);
|
|
w[55] = amd_bytealign (w[28], w[29], offset);
|
|
w[54] = amd_bytealign (w[27], w[28], offset);
|
|
w[53] = amd_bytealign (w[26], w[27], offset);
|
|
w[52] = amd_bytealign (w[25], w[26], offset);
|
|
w[51] = amd_bytealign (w[24], w[25], offset);
|
|
w[50] = amd_bytealign (w[23], w[24], offset);
|
|
w[49] = amd_bytealign (w[22], w[23], offset);
|
|
w[48] = amd_bytealign (w[21], w[22], offset);
|
|
w[47] = amd_bytealign (w[20], w[21], offset);
|
|
w[46] = amd_bytealign (w[19], w[20], offset);
|
|
w[45] = amd_bytealign (w[18], w[19], offset);
|
|
w[44] = amd_bytealign (w[17], w[18], offset);
|
|
w[43] = amd_bytealign (w[16], w[17], offset);
|
|
w[42] = amd_bytealign (w[15], w[16], offset);
|
|
w[41] = amd_bytealign (w[14], w[15], offset);
|
|
w[40] = amd_bytealign (w[13], w[14], offset);
|
|
w[39] = amd_bytealign (w[12], w[13], offset);
|
|
w[38] = amd_bytealign (w[11], w[12], offset);
|
|
w[37] = amd_bytealign (w[10], w[11], offset);
|
|
w[36] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[35] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[34] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[33] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[32] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[31] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[30] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[29] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[28] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[27] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[26] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = amd_bytealign (w[35], w[36], offset);
|
|
w[62] = amd_bytealign (w[34], w[35], offset);
|
|
w[61] = amd_bytealign (w[33], w[34], offset);
|
|
w[60] = amd_bytealign (w[32], w[33], offset);
|
|
w[59] = amd_bytealign (w[31], w[32], offset);
|
|
w[58] = amd_bytealign (w[30], w[31], offset);
|
|
w[57] = amd_bytealign (w[29], w[30], offset);
|
|
w[56] = amd_bytealign (w[28], w[29], offset);
|
|
w[55] = amd_bytealign (w[27], w[28], offset);
|
|
w[54] = amd_bytealign (w[26], w[27], offset);
|
|
w[53] = amd_bytealign (w[25], w[26], offset);
|
|
w[52] = amd_bytealign (w[24], w[25], offset);
|
|
w[51] = amd_bytealign (w[23], w[24], offset);
|
|
w[50] = amd_bytealign (w[22], w[23], offset);
|
|
w[49] = amd_bytealign (w[21], w[22], offset);
|
|
w[48] = amd_bytealign (w[20], w[21], offset);
|
|
w[47] = amd_bytealign (w[19], w[20], offset);
|
|
w[46] = amd_bytealign (w[18], w[19], offset);
|
|
w[45] = amd_bytealign (w[17], w[18], offset);
|
|
w[44] = amd_bytealign (w[16], w[17], offset);
|
|
w[43] = amd_bytealign (w[15], w[16], offset);
|
|
w[42] = amd_bytealign (w[14], w[15], offset);
|
|
w[41] = amd_bytealign (w[13], w[14], offset);
|
|
w[40] = amd_bytealign (w[12], w[13], offset);
|
|
w[39] = amd_bytealign (w[11], w[12], offset);
|
|
w[38] = amd_bytealign (w[10], w[11], offset);
|
|
w[37] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[36] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[35] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[34] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[33] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[32] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[31] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[30] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[29] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[28] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[27] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = amd_bytealign (w[34], w[35], offset);
|
|
w[62] = amd_bytealign (w[33], w[34], offset);
|
|
w[61] = amd_bytealign (w[32], w[33], offset);
|
|
w[60] = amd_bytealign (w[31], w[32], offset);
|
|
w[59] = amd_bytealign (w[30], w[31], offset);
|
|
w[58] = amd_bytealign (w[29], w[30], offset);
|
|
w[57] = amd_bytealign (w[28], w[29], offset);
|
|
w[56] = amd_bytealign (w[27], w[28], offset);
|
|
w[55] = amd_bytealign (w[26], w[27], offset);
|
|
w[54] = amd_bytealign (w[25], w[26], offset);
|
|
w[53] = amd_bytealign (w[24], w[25], offset);
|
|
w[52] = amd_bytealign (w[23], w[24], offset);
|
|
w[51] = amd_bytealign (w[22], w[23], offset);
|
|
w[50] = amd_bytealign (w[21], w[22], offset);
|
|
w[49] = amd_bytealign (w[20], w[21], offset);
|
|
w[48] = amd_bytealign (w[19], w[20], offset);
|
|
w[47] = amd_bytealign (w[18], w[19], offset);
|
|
w[46] = amd_bytealign (w[17], w[18], offset);
|
|
w[45] = amd_bytealign (w[16], w[17], offset);
|
|
w[44] = amd_bytealign (w[15], w[16], offset);
|
|
w[43] = amd_bytealign (w[14], w[15], offset);
|
|
w[42] = amd_bytealign (w[13], w[14], offset);
|
|
w[41] = amd_bytealign (w[12], w[13], offset);
|
|
w[40] = amd_bytealign (w[11], w[12], offset);
|
|
w[39] = amd_bytealign (w[10], w[11], offset);
|
|
w[38] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[37] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[36] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[35] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[34] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[33] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[32] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[31] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[30] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[29] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[28] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = amd_bytealign (w[33], w[34], offset);
|
|
w[62] = amd_bytealign (w[32], w[33], offset);
|
|
w[61] = amd_bytealign (w[31], w[32], offset);
|
|
w[60] = amd_bytealign (w[30], w[31], offset);
|
|
w[59] = amd_bytealign (w[29], w[30], offset);
|
|
w[58] = amd_bytealign (w[28], w[29], offset);
|
|
w[57] = amd_bytealign (w[27], w[28], offset);
|
|
w[56] = amd_bytealign (w[26], w[27], offset);
|
|
w[55] = amd_bytealign (w[25], w[26], offset);
|
|
w[54] = amd_bytealign (w[24], w[25], offset);
|
|
w[53] = amd_bytealign (w[23], w[24], offset);
|
|
w[52] = amd_bytealign (w[22], w[23], offset);
|
|
w[51] = amd_bytealign (w[21], w[22], offset);
|
|
w[50] = amd_bytealign (w[20], w[21], offset);
|
|
w[49] = amd_bytealign (w[19], w[20], offset);
|
|
w[48] = amd_bytealign (w[18], w[19], offset);
|
|
w[47] = amd_bytealign (w[17], w[18], offset);
|
|
w[46] = amd_bytealign (w[16], w[17], offset);
|
|
w[45] = amd_bytealign (w[15], w[16], offset);
|
|
w[44] = amd_bytealign (w[14], w[15], offset);
|
|
w[43] = amd_bytealign (w[13], w[14], offset);
|
|
w[42] = amd_bytealign (w[12], w[13], offset);
|
|
w[41] = amd_bytealign (w[11], w[12], offset);
|
|
w[40] = amd_bytealign (w[10], w[11], offset);
|
|
w[39] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[38] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[37] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[36] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[35] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[34] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[33] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[32] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[31] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[30] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[29] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = amd_bytealign (w[32], w[33], offset);
|
|
w[62] = amd_bytealign (w[31], w[32], offset);
|
|
w[61] = amd_bytealign (w[30], w[31], offset);
|
|
w[60] = amd_bytealign (w[29], w[30], offset);
|
|
w[59] = amd_bytealign (w[28], w[29], offset);
|
|
w[58] = amd_bytealign (w[27], w[28], offset);
|
|
w[57] = amd_bytealign (w[26], w[27], offset);
|
|
w[56] = amd_bytealign (w[25], w[26], offset);
|
|
w[55] = amd_bytealign (w[24], w[25], offset);
|
|
w[54] = amd_bytealign (w[23], w[24], offset);
|
|
w[53] = amd_bytealign (w[22], w[23], offset);
|
|
w[52] = amd_bytealign (w[21], w[22], offset);
|
|
w[51] = amd_bytealign (w[20], w[21], offset);
|
|
w[50] = amd_bytealign (w[19], w[20], offset);
|
|
w[49] = amd_bytealign (w[18], w[19], offset);
|
|
w[48] = amd_bytealign (w[17], w[18], offset);
|
|
w[47] = amd_bytealign (w[16], w[17], offset);
|
|
w[46] = amd_bytealign (w[15], w[16], offset);
|
|
w[45] = amd_bytealign (w[14], w[15], offset);
|
|
w[44] = amd_bytealign (w[13], w[14], offset);
|
|
w[43] = amd_bytealign (w[12], w[13], offset);
|
|
w[42] = amd_bytealign (w[11], w[12], offset);
|
|
w[41] = amd_bytealign (w[10], w[11], offset);
|
|
w[40] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[39] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[38] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[37] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[36] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[35] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[34] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[33] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[32] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[31] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[30] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = amd_bytealign (w[31], w[32], offset);
|
|
w[62] = amd_bytealign (w[30], w[31], offset);
|
|
w[61] = amd_bytealign (w[29], w[30], offset);
|
|
w[60] = amd_bytealign (w[28], w[29], offset);
|
|
w[59] = amd_bytealign (w[27], w[28], offset);
|
|
w[58] = amd_bytealign (w[26], w[27], offset);
|
|
w[57] = amd_bytealign (w[25], w[26], offset);
|
|
w[56] = amd_bytealign (w[24], w[25], offset);
|
|
w[55] = amd_bytealign (w[23], w[24], offset);
|
|
w[54] = amd_bytealign (w[22], w[23], offset);
|
|
w[53] = amd_bytealign (w[21], w[22], offset);
|
|
w[52] = amd_bytealign (w[20], w[21], offset);
|
|
w[51] = amd_bytealign (w[19], w[20], offset);
|
|
w[50] = amd_bytealign (w[18], w[19], offset);
|
|
w[49] = amd_bytealign (w[17], w[18], offset);
|
|
w[48] = amd_bytealign (w[16], w[17], offset);
|
|
w[47] = amd_bytealign (w[15], w[16], offset);
|
|
w[46] = amd_bytealign (w[14], w[15], offset);
|
|
w[45] = amd_bytealign (w[13], w[14], offset);
|
|
w[44] = amd_bytealign (w[12], w[13], offset);
|
|
w[43] = amd_bytealign (w[11], w[12], offset);
|
|
w[42] = amd_bytealign (w[10], w[11], offset);
|
|
w[41] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[40] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[39] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[38] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[37] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[36] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[35] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[34] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[33] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[32] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[31] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = amd_bytealign (w[30], w[31], offset);
|
|
w[62] = amd_bytealign (w[29], w[30], offset);
|
|
w[61] = amd_bytealign (w[28], w[29], offset);
|
|
w[60] = amd_bytealign (w[27], w[28], offset);
|
|
w[59] = amd_bytealign (w[26], w[27], offset);
|
|
w[58] = amd_bytealign (w[25], w[26], offset);
|
|
w[57] = amd_bytealign (w[24], w[25], offset);
|
|
w[56] = amd_bytealign (w[23], w[24], offset);
|
|
w[55] = amd_bytealign (w[22], w[23], offset);
|
|
w[54] = amd_bytealign (w[21], w[22], offset);
|
|
w[53] = amd_bytealign (w[20], w[21], offset);
|
|
w[52] = amd_bytealign (w[19], w[20], offset);
|
|
w[51] = amd_bytealign (w[18], w[19], offset);
|
|
w[50] = amd_bytealign (w[17], w[18], offset);
|
|
w[49] = amd_bytealign (w[16], w[17], offset);
|
|
w[48] = amd_bytealign (w[15], w[16], offset);
|
|
w[47] = amd_bytealign (w[14], w[15], offset);
|
|
w[46] = amd_bytealign (w[13], w[14], offset);
|
|
w[45] = amd_bytealign (w[12], w[13], offset);
|
|
w[44] = amd_bytealign (w[11], w[12], offset);
|
|
w[43] = amd_bytealign (w[10], w[11], offset);
|
|
w[42] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[41] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[40] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[39] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[38] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[37] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[36] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[35] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[34] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[33] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[32] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = amd_bytealign (w[29], w[30], offset);
|
|
w[62] = amd_bytealign (w[28], w[29], offset);
|
|
w[61] = amd_bytealign (w[27], w[28], offset);
|
|
w[60] = amd_bytealign (w[26], w[27], offset);
|
|
w[59] = amd_bytealign (w[25], w[26], offset);
|
|
w[58] = amd_bytealign (w[24], w[25], offset);
|
|
w[57] = amd_bytealign (w[23], w[24], offset);
|
|
w[56] = amd_bytealign (w[22], w[23], offset);
|
|
w[55] = amd_bytealign (w[21], w[22], offset);
|
|
w[54] = amd_bytealign (w[20], w[21], offset);
|
|
w[53] = amd_bytealign (w[19], w[20], offset);
|
|
w[52] = amd_bytealign (w[18], w[19], offset);
|
|
w[51] = amd_bytealign (w[17], w[18], offset);
|
|
w[50] = amd_bytealign (w[16], w[17], offset);
|
|
w[49] = amd_bytealign (w[15], w[16], offset);
|
|
w[48] = amd_bytealign (w[14], w[15], offset);
|
|
w[47] = amd_bytealign (w[13], w[14], offset);
|
|
w[46] = amd_bytealign (w[12], w[13], offset);
|
|
w[45] = amd_bytealign (w[11], w[12], offset);
|
|
w[44] = amd_bytealign (w[10], w[11], offset);
|
|
w[43] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[42] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[41] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[40] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[39] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[38] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[37] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[36] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[35] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[34] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[33] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = amd_bytealign (w[28], w[29], offset);
|
|
w[62] = amd_bytealign (w[27], w[28], offset);
|
|
w[61] = amd_bytealign (w[26], w[27], offset);
|
|
w[60] = amd_bytealign (w[25], w[26], offset);
|
|
w[59] = amd_bytealign (w[24], w[25], offset);
|
|
w[58] = amd_bytealign (w[23], w[24], offset);
|
|
w[57] = amd_bytealign (w[22], w[23], offset);
|
|
w[56] = amd_bytealign (w[21], w[22], offset);
|
|
w[55] = amd_bytealign (w[20], w[21], offset);
|
|
w[54] = amd_bytealign (w[19], w[20], offset);
|
|
w[53] = amd_bytealign (w[18], w[19], offset);
|
|
w[52] = amd_bytealign (w[17], w[18], offset);
|
|
w[51] = amd_bytealign (w[16], w[17], offset);
|
|
w[50] = amd_bytealign (w[15], w[16], offset);
|
|
w[49] = amd_bytealign (w[14], w[15], offset);
|
|
w[48] = amd_bytealign (w[13], w[14], offset);
|
|
w[47] = amd_bytealign (w[12], w[13], offset);
|
|
w[46] = amd_bytealign (w[11], w[12], offset);
|
|
w[45] = amd_bytealign (w[10], w[11], offset);
|
|
w[44] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[43] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[42] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[41] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[40] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[39] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[38] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[37] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[36] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[35] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[34] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = amd_bytealign (w[27], w[28], offset);
|
|
w[62] = amd_bytealign (w[26], w[27], offset);
|
|
w[61] = amd_bytealign (w[25], w[26], offset);
|
|
w[60] = amd_bytealign (w[24], w[25], offset);
|
|
w[59] = amd_bytealign (w[23], w[24], offset);
|
|
w[58] = amd_bytealign (w[22], w[23], offset);
|
|
w[57] = amd_bytealign (w[21], w[22], offset);
|
|
w[56] = amd_bytealign (w[20], w[21], offset);
|
|
w[55] = amd_bytealign (w[19], w[20], offset);
|
|
w[54] = amd_bytealign (w[18], w[19], offset);
|
|
w[53] = amd_bytealign (w[17], w[18], offset);
|
|
w[52] = amd_bytealign (w[16], w[17], offset);
|
|
w[51] = amd_bytealign (w[15], w[16], offset);
|
|
w[50] = amd_bytealign (w[14], w[15], offset);
|
|
w[49] = amd_bytealign (w[13], w[14], offset);
|
|
w[48] = amd_bytealign (w[12], w[13], offset);
|
|
w[47] = amd_bytealign (w[11], w[12], offset);
|
|
w[46] = amd_bytealign (w[10], w[11], offset);
|
|
w[45] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[44] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[43] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[42] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[41] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[40] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[39] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[38] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[37] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[36] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[35] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = amd_bytealign (w[26], w[27], offset);
|
|
w[62] = amd_bytealign (w[25], w[26], offset);
|
|
w[61] = amd_bytealign (w[24], w[25], offset);
|
|
w[60] = amd_bytealign (w[23], w[24], offset);
|
|
w[59] = amd_bytealign (w[22], w[23], offset);
|
|
w[58] = amd_bytealign (w[21], w[22], offset);
|
|
w[57] = amd_bytealign (w[20], w[21], offset);
|
|
w[56] = amd_bytealign (w[19], w[20], offset);
|
|
w[55] = amd_bytealign (w[18], w[19], offset);
|
|
w[54] = amd_bytealign (w[17], w[18], offset);
|
|
w[53] = amd_bytealign (w[16], w[17], offset);
|
|
w[52] = amd_bytealign (w[15], w[16], offset);
|
|
w[51] = amd_bytealign (w[14], w[15], offset);
|
|
w[50] = amd_bytealign (w[13], w[14], offset);
|
|
w[49] = amd_bytealign (w[12], w[13], offset);
|
|
w[48] = amd_bytealign (w[11], w[12], offset);
|
|
w[47] = amd_bytealign (w[10], w[11], offset);
|
|
w[46] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[45] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[44] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[43] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[42] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[41] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[40] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[39] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[38] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[37] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[36] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = amd_bytealign (w[25], w[26], offset);
|
|
w[62] = amd_bytealign (w[24], w[25], offset);
|
|
w[61] = amd_bytealign (w[23], w[24], offset);
|
|
w[60] = amd_bytealign (w[22], w[23], offset);
|
|
w[59] = amd_bytealign (w[21], w[22], offset);
|
|
w[58] = amd_bytealign (w[20], w[21], offset);
|
|
w[57] = amd_bytealign (w[19], w[20], offset);
|
|
w[56] = amd_bytealign (w[18], w[19], offset);
|
|
w[55] = amd_bytealign (w[17], w[18], offset);
|
|
w[54] = amd_bytealign (w[16], w[17], offset);
|
|
w[53] = amd_bytealign (w[15], w[16], offset);
|
|
w[52] = amd_bytealign (w[14], w[15], offset);
|
|
w[51] = amd_bytealign (w[13], w[14], offset);
|
|
w[50] = amd_bytealign (w[12], w[13], offset);
|
|
w[49] = amd_bytealign (w[11], w[12], offset);
|
|
w[48] = amd_bytealign (w[10], w[11], offset);
|
|
w[47] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[46] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[45] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[44] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[43] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[42] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[41] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[40] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[39] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[38] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[37] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = amd_bytealign (w[24], w[25], offset);
|
|
w[62] = amd_bytealign (w[23], w[24], offset);
|
|
w[61] = amd_bytealign (w[22], w[23], offset);
|
|
w[60] = amd_bytealign (w[21], w[22], offset);
|
|
w[59] = amd_bytealign (w[20], w[21], offset);
|
|
w[58] = amd_bytealign (w[19], w[20], offset);
|
|
w[57] = amd_bytealign (w[18], w[19], offset);
|
|
w[56] = amd_bytealign (w[17], w[18], offset);
|
|
w[55] = amd_bytealign (w[16], w[17], offset);
|
|
w[54] = amd_bytealign (w[15], w[16], offset);
|
|
w[53] = amd_bytealign (w[14], w[15], offset);
|
|
w[52] = amd_bytealign (w[13], w[14], offset);
|
|
w[51] = amd_bytealign (w[12], w[13], offset);
|
|
w[50] = amd_bytealign (w[11], w[12], offset);
|
|
w[49] = amd_bytealign (w[10], w[11], offset);
|
|
w[48] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[47] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[46] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[45] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[44] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[43] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[42] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[41] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[40] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[39] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[38] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = amd_bytealign (w[23], w[24], offset);
|
|
w[62] = amd_bytealign (w[22], w[23], offset);
|
|
w[61] = amd_bytealign (w[21], w[22], offset);
|
|
w[60] = amd_bytealign (w[20], w[21], offset);
|
|
w[59] = amd_bytealign (w[19], w[20], offset);
|
|
w[58] = amd_bytealign (w[18], w[19], offset);
|
|
w[57] = amd_bytealign (w[17], w[18], offset);
|
|
w[56] = amd_bytealign (w[16], w[17], offset);
|
|
w[55] = amd_bytealign (w[15], w[16], offset);
|
|
w[54] = amd_bytealign (w[14], w[15], offset);
|
|
w[53] = amd_bytealign (w[13], w[14], offset);
|
|
w[52] = amd_bytealign (w[12], w[13], offset);
|
|
w[51] = amd_bytealign (w[11], w[12], offset);
|
|
w[50] = amd_bytealign (w[10], w[11], offset);
|
|
w[49] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[48] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[47] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[46] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[45] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[44] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[43] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[42] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[41] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[40] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[39] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = amd_bytealign (w[22], w[23], offset);
|
|
w[62] = amd_bytealign (w[21], w[22], offset);
|
|
w[61] = amd_bytealign (w[20], w[21], offset);
|
|
w[60] = amd_bytealign (w[19], w[20], offset);
|
|
w[59] = amd_bytealign (w[18], w[19], offset);
|
|
w[58] = amd_bytealign (w[17], w[18], offset);
|
|
w[57] = amd_bytealign (w[16], w[17], offset);
|
|
w[56] = amd_bytealign (w[15], w[16], offset);
|
|
w[55] = amd_bytealign (w[14], w[15], offset);
|
|
w[54] = amd_bytealign (w[13], w[14], offset);
|
|
w[53] = amd_bytealign (w[12], w[13], offset);
|
|
w[52] = amd_bytealign (w[11], w[12], offset);
|
|
w[51] = amd_bytealign (w[10], w[11], offset);
|
|
w[50] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[49] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[48] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[47] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[46] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[45] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[44] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[43] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[42] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[41] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[40] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = amd_bytealign (w[21], w[22], offset);
|
|
w[62] = amd_bytealign (w[20], w[21], offset);
|
|
w[61] = amd_bytealign (w[19], w[20], offset);
|
|
w[60] = amd_bytealign (w[18], w[19], offset);
|
|
w[59] = amd_bytealign (w[17], w[18], offset);
|
|
w[58] = amd_bytealign (w[16], w[17], offset);
|
|
w[57] = amd_bytealign (w[15], w[16], offset);
|
|
w[56] = amd_bytealign (w[14], w[15], offset);
|
|
w[55] = amd_bytealign (w[13], w[14], offset);
|
|
w[54] = amd_bytealign (w[12], w[13], offset);
|
|
w[53] = amd_bytealign (w[11], w[12], offset);
|
|
w[52] = amd_bytealign (w[10], w[11], offset);
|
|
w[51] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[50] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[49] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[48] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[47] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[46] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[45] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[44] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[43] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[42] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[41] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = amd_bytealign (w[20], w[21], offset);
|
|
w[62] = amd_bytealign (w[19], w[20], offset);
|
|
w[61] = amd_bytealign (w[18], w[19], offset);
|
|
w[60] = amd_bytealign (w[17], w[18], offset);
|
|
w[59] = amd_bytealign (w[16], w[17], offset);
|
|
w[58] = amd_bytealign (w[15], w[16], offset);
|
|
w[57] = amd_bytealign (w[14], w[15], offset);
|
|
w[56] = amd_bytealign (w[13], w[14], offset);
|
|
w[55] = amd_bytealign (w[12], w[13], offset);
|
|
w[54] = amd_bytealign (w[11], w[12], offset);
|
|
w[53] = amd_bytealign (w[10], w[11], offset);
|
|
w[52] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[51] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[50] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[49] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[48] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[47] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[46] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[45] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[44] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[43] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[42] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = amd_bytealign (w[19], w[20], offset);
|
|
w[62] = amd_bytealign (w[18], w[19], offset);
|
|
w[61] = amd_bytealign (w[17], w[18], offset);
|
|
w[60] = amd_bytealign (w[16], w[17], offset);
|
|
w[59] = amd_bytealign (w[15], w[16], offset);
|
|
w[58] = amd_bytealign (w[14], w[15], offset);
|
|
w[57] = amd_bytealign (w[13], w[14], offset);
|
|
w[56] = amd_bytealign (w[12], w[13], offset);
|
|
w[55] = amd_bytealign (w[11], w[12], offset);
|
|
w[54] = amd_bytealign (w[10], w[11], offset);
|
|
w[53] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[52] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[51] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[50] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[49] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[48] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[47] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[46] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[45] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[44] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[43] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = amd_bytealign (w[18], w[19], offset);
|
|
w[62] = amd_bytealign (w[17], w[18], offset);
|
|
w[61] = amd_bytealign (w[16], w[17], offset);
|
|
w[60] = amd_bytealign (w[15], w[16], offset);
|
|
w[59] = amd_bytealign (w[14], w[15], offset);
|
|
w[58] = amd_bytealign (w[13], w[14], offset);
|
|
w[57] = amd_bytealign (w[12], w[13], offset);
|
|
w[56] = amd_bytealign (w[11], w[12], offset);
|
|
w[55] = amd_bytealign (w[10], w[11], offset);
|
|
w[54] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[53] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[52] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[51] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[50] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[49] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[48] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[47] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[46] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[45] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[44] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = amd_bytealign (w[17], w[18], offset);
|
|
w[62] = amd_bytealign (w[16], w[17], offset);
|
|
w[61] = amd_bytealign (w[15], w[16], offset);
|
|
w[60] = amd_bytealign (w[14], w[15], offset);
|
|
w[59] = amd_bytealign (w[13], w[14], offset);
|
|
w[58] = amd_bytealign (w[12], w[13], offset);
|
|
w[57] = amd_bytealign (w[11], w[12], offset);
|
|
w[56] = amd_bytealign (w[10], w[11], offset);
|
|
w[55] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[54] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[53] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[52] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[51] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[50] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[49] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[48] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[47] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[46] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[45] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = amd_bytealign (w[16], w[17], offset);
|
|
w[62] = amd_bytealign (w[15], w[16], offset);
|
|
w[61] = amd_bytealign (w[14], w[15], offset);
|
|
w[60] = amd_bytealign (w[13], w[14], offset);
|
|
w[59] = amd_bytealign (w[12], w[13], offset);
|
|
w[58] = amd_bytealign (w[11], w[12], offset);
|
|
w[57] = amd_bytealign (w[10], w[11], offset);
|
|
w[56] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[55] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[54] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[53] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[52] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[51] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[50] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[49] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[48] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[47] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[46] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = amd_bytealign (w[15], w[16], offset);
|
|
w[62] = amd_bytealign (w[14], w[15], offset);
|
|
w[61] = amd_bytealign (w[13], w[14], offset);
|
|
w[60] = amd_bytealign (w[12], w[13], offset);
|
|
w[59] = amd_bytealign (w[11], w[12], offset);
|
|
w[58] = amd_bytealign (w[10], w[11], offset);
|
|
w[57] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[56] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[55] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[54] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[53] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[52] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[51] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[50] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[49] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[48] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[47] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = amd_bytealign (w[14], w[15], offset);
|
|
w[62] = amd_bytealign (w[13], w[14], offset);
|
|
w[61] = amd_bytealign (w[12], w[13], offset);
|
|
w[60] = amd_bytealign (w[11], w[12], offset);
|
|
w[59] = amd_bytealign (w[10], w[11], offset);
|
|
w[58] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[57] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[56] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[55] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[54] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[53] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[52] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[51] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[50] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[49] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[48] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = amd_bytealign (w[13], w[14], offset);
|
|
w[62] = amd_bytealign (w[12], w[13], offset);
|
|
w[61] = amd_bytealign (w[11], w[12], offset);
|
|
w[60] = amd_bytealign (w[10], w[11], offset);
|
|
w[59] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[58] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[57] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[56] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[55] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[54] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[53] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[52] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[51] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[50] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[49] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = amd_bytealign (w[12], w[13], offset);
|
|
w[62] = amd_bytealign (w[11], w[12], offset);
|
|
w[61] = amd_bytealign (w[10], w[11], offset);
|
|
w[60] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[59] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[58] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[57] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[56] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[55] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[54] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[53] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[52] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[51] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[50] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = amd_bytealign (w[11], w[12], offset);
|
|
w[62] = amd_bytealign (w[10], w[11], offset);
|
|
w[61] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[60] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[59] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[58] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[57] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[56] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[55] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[54] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[53] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[52] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[51] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = amd_bytealign (w[10], w[11], offset);
|
|
w[62] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[61] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[60] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[59] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[58] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[57] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[56] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[55] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[54] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[53] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[52] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = amd_bytealign (w[ 9], w[10], offset);
|
|
w[62] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[61] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[60] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[59] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[58] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[57] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[56] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[55] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[54] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[53] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = amd_bytealign (w[ 8], w[ 9], offset);
|
|
w[62] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[61] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[60] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[59] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[58] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[57] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[56] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[55] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[54] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = amd_bytealign (w[ 7], w[ 8], offset);
|
|
w[62] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[61] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[60] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[59] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[58] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[57] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[56] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[55] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = amd_bytealign (w[ 6], w[ 7], offset);
|
|
w[62] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[61] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[60] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[59] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[58] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[57] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[56] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = amd_bytealign (w[ 5], w[ 6], offset);
|
|
w[62] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[61] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[60] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[59] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[58] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[57] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = amd_bytealign (w[ 4], w[ 5], offset);
|
|
w[62] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[61] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[60] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[59] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[58] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = amd_bytealign (w[ 3], w[ 4], offset);
|
|
w[62] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[61] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[60] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[59] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = amd_bytealign (w[ 2], w[ 3], offset);
|
|
w[62] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[61] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[60] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = amd_bytealign (w[ 1], w[ 2], offset);
|
|
w[62] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[61] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = amd_bytealign (w[ 0], w[ 1], offset);
|
|
w[62] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = amd_bytealign ( 0, w[ 0], offset);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = __byte_perm (w[63], w[62], selector);
|
|
w[62] = __byte_perm (w[62], w[61], selector);
|
|
w[61] = __byte_perm (w[61], w[60], selector);
|
|
w[60] = __byte_perm (w[60], w[59], selector);
|
|
w[59] = __byte_perm (w[59], w[58], selector);
|
|
w[58] = __byte_perm (w[58], w[57], selector);
|
|
w[57] = __byte_perm (w[57], w[56], selector);
|
|
w[56] = __byte_perm (w[56], w[55], selector);
|
|
w[55] = __byte_perm (w[55], w[54], selector);
|
|
w[54] = __byte_perm (w[54], w[53], selector);
|
|
w[53] = __byte_perm (w[53], w[52], selector);
|
|
w[52] = __byte_perm (w[52], w[51], selector);
|
|
w[51] = __byte_perm (w[51], w[50], selector);
|
|
w[50] = __byte_perm (w[50], w[49], selector);
|
|
w[49] = __byte_perm (w[49], w[48], selector);
|
|
w[48] = __byte_perm (w[48], w[47], selector);
|
|
w[47] = __byte_perm (w[47], w[46], selector);
|
|
w[46] = __byte_perm (w[46], w[45], selector);
|
|
w[45] = __byte_perm (w[45], w[44], selector);
|
|
w[44] = __byte_perm (w[44], w[43], selector);
|
|
w[43] = __byte_perm (w[43], w[42], selector);
|
|
w[42] = __byte_perm (w[42], w[41], selector);
|
|
w[41] = __byte_perm (w[41], w[40], selector);
|
|
w[40] = __byte_perm (w[40], w[39], selector);
|
|
w[39] = __byte_perm (w[39], w[38], selector);
|
|
w[38] = __byte_perm (w[38], w[37], selector);
|
|
w[37] = __byte_perm (w[37], w[36], selector);
|
|
w[36] = __byte_perm (w[36], w[35], selector);
|
|
w[35] = __byte_perm (w[35], w[34], selector);
|
|
w[34] = __byte_perm (w[34], w[33], selector);
|
|
w[33] = __byte_perm (w[33], w[32], selector);
|
|
w[32] = __byte_perm (w[32], w[31], selector);
|
|
w[31] = __byte_perm (w[31], w[30], selector);
|
|
w[30] = __byte_perm (w[30], w[29], selector);
|
|
w[29] = __byte_perm (w[29], w[28], selector);
|
|
w[28] = __byte_perm (w[28], w[27], selector);
|
|
w[27] = __byte_perm (w[27], w[26], selector);
|
|
w[26] = __byte_perm (w[26], w[25], selector);
|
|
w[25] = __byte_perm (w[25], w[24], selector);
|
|
w[24] = __byte_perm (w[24], w[23], selector);
|
|
w[23] = __byte_perm (w[23], w[22], selector);
|
|
w[22] = __byte_perm (w[22], w[21], selector);
|
|
w[21] = __byte_perm (w[21], w[20], selector);
|
|
w[20] = __byte_perm (w[20], w[19], selector);
|
|
w[19] = __byte_perm (w[19], w[18], selector);
|
|
w[18] = __byte_perm (w[18], w[17], selector);
|
|
w[17] = __byte_perm (w[17], w[16], selector);
|
|
w[16] = __byte_perm (w[16], w[15], selector);
|
|
w[15] = __byte_perm (w[15], w[14], selector);
|
|
w[14] = __byte_perm (w[14], w[13], selector);
|
|
w[13] = __byte_perm (w[13], w[12], selector);
|
|
w[12] = __byte_perm (w[12], w[11], selector);
|
|
w[11] = __byte_perm (w[11], w[10], selector);
|
|
w[10] = __byte_perm (w[10], w[ 9], selector);
|
|
w[ 9] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[ 8] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[ 7] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[ 6] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[ 5] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 4] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 3] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 2] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 1] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 0] = __byte_perm (w[ 0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = __byte_perm (w[62], w[61], selector);
|
|
w[62] = __byte_perm (w[61], w[60], selector);
|
|
w[61] = __byte_perm (w[60], w[59], selector);
|
|
w[60] = __byte_perm (w[59], w[58], selector);
|
|
w[59] = __byte_perm (w[58], w[57], selector);
|
|
w[58] = __byte_perm (w[57], w[56], selector);
|
|
w[57] = __byte_perm (w[56], w[55], selector);
|
|
w[56] = __byte_perm (w[55], w[54], selector);
|
|
w[55] = __byte_perm (w[54], w[53], selector);
|
|
w[54] = __byte_perm (w[53], w[52], selector);
|
|
w[53] = __byte_perm (w[52], w[51], selector);
|
|
w[52] = __byte_perm (w[51], w[50], selector);
|
|
w[51] = __byte_perm (w[50], w[49], selector);
|
|
w[50] = __byte_perm (w[49], w[48], selector);
|
|
w[49] = __byte_perm (w[48], w[47], selector);
|
|
w[48] = __byte_perm (w[47], w[46], selector);
|
|
w[47] = __byte_perm (w[46], w[45], selector);
|
|
w[46] = __byte_perm (w[45], w[44], selector);
|
|
w[45] = __byte_perm (w[44], w[43], selector);
|
|
w[44] = __byte_perm (w[43], w[42], selector);
|
|
w[43] = __byte_perm (w[42], w[41], selector);
|
|
w[42] = __byte_perm (w[41], w[40], selector);
|
|
w[41] = __byte_perm (w[40], w[39], selector);
|
|
w[40] = __byte_perm (w[39], w[38], selector);
|
|
w[39] = __byte_perm (w[38], w[37], selector);
|
|
w[38] = __byte_perm (w[37], w[36], selector);
|
|
w[37] = __byte_perm (w[36], w[35], selector);
|
|
w[36] = __byte_perm (w[35], w[34], selector);
|
|
w[35] = __byte_perm (w[34], w[33], selector);
|
|
w[34] = __byte_perm (w[33], w[32], selector);
|
|
w[33] = __byte_perm (w[32], w[31], selector);
|
|
w[32] = __byte_perm (w[31], w[30], selector);
|
|
w[31] = __byte_perm (w[30], w[29], selector);
|
|
w[30] = __byte_perm (w[29], w[28], selector);
|
|
w[29] = __byte_perm (w[28], w[27], selector);
|
|
w[28] = __byte_perm (w[27], w[26], selector);
|
|
w[27] = __byte_perm (w[26], w[25], selector);
|
|
w[26] = __byte_perm (w[25], w[24], selector);
|
|
w[25] = __byte_perm (w[24], w[23], selector);
|
|
w[24] = __byte_perm (w[23], w[22], selector);
|
|
w[23] = __byte_perm (w[22], w[21], selector);
|
|
w[22] = __byte_perm (w[21], w[20], selector);
|
|
w[21] = __byte_perm (w[20], w[19], selector);
|
|
w[20] = __byte_perm (w[19], w[18], selector);
|
|
w[19] = __byte_perm (w[18], w[17], selector);
|
|
w[18] = __byte_perm (w[17], w[16], selector);
|
|
w[17] = __byte_perm (w[16], w[15], selector);
|
|
w[16] = __byte_perm (w[15], w[14], selector);
|
|
w[15] = __byte_perm (w[14], w[13], selector);
|
|
w[14] = __byte_perm (w[13], w[12], selector);
|
|
w[13] = __byte_perm (w[12], w[11], selector);
|
|
w[12] = __byte_perm (w[11], w[10], selector);
|
|
w[11] = __byte_perm (w[10], w[ 9], selector);
|
|
w[10] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[ 9] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[ 8] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[ 7] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[ 6] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 5] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 4] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 3] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 2] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 1] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = __byte_perm (w[61], w[60], selector);
|
|
w[62] = __byte_perm (w[60], w[59], selector);
|
|
w[61] = __byte_perm (w[59], w[58], selector);
|
|
w[60] = __byte_perm (w[58], w[57], selector);
|
|
w[59] = __byte_perm (w[57], w[56], selector);
|
|
w[58] = __byte_perm (w[56], w[55], selector);
|
|
w[57] = __byte_perm (w[55], w[54], selector);
|
|
w[56] = __byte_perm (w[54], w[53], selector);
|
|
w[55] = __byte_perm (w[53], w[52], selector);
|
|
w[54] = __byte_perm (w[52], w[51], selector);
|
|
w[53] = __byte_perm (w[51], w[50], selector);
|
|
w[52] = __byte_perm (w[50], w[49], selector);
|
|
w[51] = __byte_perm (w[49], w[48], selector);
|
|
w[50] = __byte_perm (w[48], w[47], selector);
|
|
w[49] = __byte_perm (w[47], w[46], selector);
|
|
w[48] = __byte_perm (w[46], w[45], selector);
|
|
w[47] = __byte_perm (w[45], w[44], selector);
|
|
w[46] = __byte_perm (w[44], w[43], selector);
|
|
w[45] = __byte_perm (w[43], w[42], selector);
|
|
w[44] = __byte_perm (w[42], w[41], selector);
|
|
w[43] = __byte_perm (w[41], w[40], selector);
|
|
w[42] = __byte_perm (w[40], w[39], selector);
|
|
w[41] = __byte_perm (w[39], w[38], selector);
|
|
w[40] = __byte_perm (w[38], w[37], selector);
|
|
w[39] = __byte_perm (w[37], w[36], selector);
|
|
w[38] = __byte_perm (w[36], w[35], selector);
|
|
w[37] = __byte_perm (w[35], w[34], selector);
|
|
w[36] = __byte_perm (w[34], w[33], selector);
|
|
w[35] = __byte_perm (w[33], w[32], selector);
|
|
w[34] = __byte_perm (w[32], w[31], selector);
|
|
w[33] = __byte_perm (w[31], w[30], selector);
|
|
w[32] = __byte_perm (w[30], w[29], selector);
|
|
w[31] = __byte_perm (w[29], w[28], selector);
|
|
w[30] = __byte_perm (w[28], w[27], selector);
|
|
w[29] = __byte_perm (w[27], w[26], selector);
|
|
w[28] = __byte_perm (w[26], w[25], selector);
|
|
w[27] = __byte_perm (w[25], w[24], selector);
|
|
w[26] = __byte_perm (w[24], w[23], selector);
|
|
w[25] = __byte_perm (w[23], w[22], selector);
|
|
w[24] = __byte_perm (w[22], w[21], selector);
|
|
w[23] = __byte_perm (w[21], w[20], selector);
|
|
w[22] = __byte_perm (w[20], w[19], selector);
|
|
w[21] = __byte_perm (w[19], w[18], selector);
|
|
w[20] = __byte_perm (w[18], w[17], selector);
|
|
w[19] = __byte_perm (w[17], w[16], selector);
|
|
w[18] = __byte_perm (w[16], w[15], selector);
|
|
w[17] = __byte_perm (w[15], w[14], selector);
|
|
w[16] = __byte_perm (w[14], w[13], selector);
|
|
w[15] = __byte_perm (w[13], w[12], selector);
|
|
w[14] = __byte_perm (w[12], w[11], selector);
|
|
w[13] = __byte_perm (w[11], w[10], selector);
|
|
w[12] = __byte_perm (w[10], w[ 9], selector);
|
|
w[11] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[10] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[ 9] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[ 8] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[ 7] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 6] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 5] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 4] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 3] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 2] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = __byte_perm (w[60], w[59], selector);
|
|
w[62] = __byte_perm (w[59], w[58], selector);
|
|
w[61] = __byte_perm (w[58], w[57], selector);
|
|
w[60] = __byte_perm (w[57], w[56], selector);
|
|
w[59] = __byte_perm (w[56], w[55], selector);
|
|
w[58] = __byte_perm (w[55], w[54], selector);
|
|
w[57] = __byte_perm (w[54], w[53], selector);
|
|
w[56] = __byte_perm (w[53], w[52], selector);
|
|
w[55] = __byte_perm (w[52], w[51], selector);
|
|
w[54] = __byte_perm (w[51], w[50], selector);
|
|
w[53] = __byte_perm (w[50], w[49], selector);
|
|
w[52] = __byte_perm (w[49], w[48], selector);
|
|
w[51] = __byte_perm (w[48], w[47], selector);
|
|
w[50] = __byte_perm (w[47], w[46], selector);
|
|
w[49] = __byte_perm (w[46], w[45], selector);
|
|
w[48] = __byte_perm (w[45], w[44], selector);
|
|
w[47] = __byte_perm (w[44], w[43], selector);
|
|
w[46] = __byte_perm (w[43], w[42], selector);
|
|
w[45] = __byte_perm (w[42], w[41], selector);
|
|
w[44] = __byte_perm (w[41], w[40], selector);
|
|
w[43] = __byte_perm (w[40], w[39], selector);
|
|
w[42] = __byte_perm (w[39], w[38], selector);
|
|
w[41] = __byte_perm (w[38], w[37], selector);
|
|
w[40] = __byte_perm (w[37], w[36], selector);
|
|
w[39] = __byte_perm (w[36], w[35], selector);
|
|
w[38] = __byte_perm (w[35], w[34], selector);
|
|
w[37] = __byte_perm (w[34], w[33], selector);
|
|
w[36] = __byte_perm (w[33], w[32], selector);
|
|
w[35] = __byte_perm (w[32], w[31], selector);
|
|
w[34] = __byte_perm (w[31], w[30], selector);
|
|
w[33] = __byte_perm (w[30], w[29], selector);
|
|
w[32] = __byte_perm (w[29], w[28], selector);
|
|
w[31] = __byte_perm (w[28], w[27], selector);
|
|
w[30] = __byte_perm (w[27], w[26], selector);
|
|
w[29] = __byte_perm (w[26], w[25], selector);
|
|
w[28] = __byte_perm (w[25], w[24], selector);
|
|
w[27] = __byte_perm (w[24], w[23], selector);
|
|
w[26] = __byte_perm (w[23], w[22], selector);
|
|
w[25] = __byte_perm (w[22], w[21], selector);
|
|
w[24] = __byte_perm (w[21], w[20], selector);
|
|
w[23] = __byte_perm (w[20], w[19], selector);
|
|
w[22] = __byte_perm (w[19], w[18], selector);
|
|
w[21] = __byte_perm (w[18], w[17], selector);
|
|
w[20] = __byte_perm (w[17], w[16], selector);
|
|
w[19] = __byte_perm (w[16], w[15], selector);
|
|
w[18] = __byte_perm (w[15], w[14], selector);
|
|
w[17] = __byte_perm (w[14], w[13], selector);
|
|
w[16] = __byte_perm (w[13], w[12], selector);
|
|
w[15] = __byte_perm (w[12], w[11], selector);
|
|
w[14] = __byte_perm (w[11], w[10], selector);
|
|
w[13] = __byte_perm (w[10], w[ 9], selector);
|
|
w[12] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[11] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[10] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[ 9] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[ 8] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 7] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 6] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 5] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 4] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 3] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = __byte_perm (w[59], w[58], selector);
|
|
w[62] = __byte_perm (w[58], w[57], selector);
|
|
w[61] = __byte_perm (w[57], w[56], selector);
|
|
w[60] = __byte_perm (w[56], w[55], selector);
|
|
w[59] = __byte_perm (w[55], w[54], selector);
|
|
w[58] = __byte_perm (w[54], w[53], selector);
|
|
w[57] = __byte_perm (w[53], w[52], selector);
|
|
w[56] = __byte_perm (w[52], w[51], selector);
|
|
w[55] = __byte_perm (w[51], w[50], selector);
|
|
w[54] = __byte_perm (w[50], w[49], selector);
|
|
w[53] = __byte_perm (w[49], w[48], selector);
|
|
w[52] = __byte_perm (w[48], w[47], selector);
|
|
w[51] = __byte_perm (w[47], w[46], selector);
|
|
w[50] = __byte_perm (w[46], w[45], selector);
|
|
w[49] = __byte_perm (w[45], w[44], selector);
|
|
w[48] = __byte_perm (w[44], w[43], selector);
|
|
w[47] = __byte_perm (w[43], w[42], selector);
|
|
w[46] = __byte_perm (w[42], w[41], selector);
|
|
w[45] = __byte_perm (w[41], w[40], selector);
|
|
w[44] = __byte_perm (w[40], w[39], selector);
|
|
w[43] = __byte_perm (w[39], w[38], selector);
|
|
w[42] = __byte_perm (w[38], w[37], selector);
|
|
w[41] = __byte_perm (w[37], w[36], selector);
|
|
w[40] = __byte_perm (w[36], w[35], selector);
|
|
w[39] = __byte_perm (w[35], w[34], selector);
|
|
w[38] = __byte_perm (w[34], w[33], selector);
|
|
w[37] = __byte_perm (w[33], w[32], selector);
|
|
w[36] = __byte_perm (w[32], w[31], selector);
|
|
w[35] = __byte_perm (w[31], w[30], selector);
|
|
w[34] = __byte_perm (w[30], w[29], selector);
|
|
w[33] = __byte_perm (w[29], w[28], selector);
|
|
w[32] = __byte_perm (w[28], w[27], selector);
|
|
w[31] = __byte_perm (w[27], w[26], selector);
|
|
w[30] = __byte_perm (w[26], w[25], selector);
|
|
w[29] = __byte_perm (w[25], w[24], selector);
|
|
w[28] = __byte_perm (w[24], w[23], selector);
|
|
w[27] = __byte_perm (w[23], w[22], selector);
|
|
w[26] = __byte_perm (w[22], w[21], selector);
|
|
w[25] = __byte_perm (w[21], w[20], selector);
|
|
w[24] = __byte_perm (w[20], w[19], selector);
|
|
w[23] = __byte_perm (w[19], w[18], selector);
|
|
w[22] = __byte_perm (w[18], w[17], selector);
|
|
w[21] = __byte_perm (w[17], w[16], selector);
|
|
w[20] = __byte_perm (w[16], w[15], selector);
|
|
w[19] = __byte_perm (w[15], w[14], selector);
|
|
w[18] = __byte_perm (w[14], w[13], selector);
|
|
w[17] = __byte_perm (w[13], w[12], selector);
|
|
w[16] = __byte_perm (w[12], w[11], selector);
|
|
w[15] = __byte_perm (w[11], w[10], selector);
|
|
w[14] = __byte_perm (w[10], w[ 9], selector);
|
|
w[13] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[12] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[11] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[10] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[ 9] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 8] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 7] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 6] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 5] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 4] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = __byte_perm (w[58], w[57], selector);
|
|
w[62] = __byte_perm (w[57], w[56], selector);
|
|
w[61] = __byte_perm (w[56], w[55], selector);
|
|
w[60] = __byte_perm (w[55], w[54], selector);
|
|
w[59] = __byte_perm (w[54], w[53], selector);
|
|
w[58] = __byte_perm (w[53], w[52], selector);
|
|
w[57] = __byte_perm (w[52], w[51], selector);
|
|
w[56] = __byte_perm (w[51], w[50], selector);
|
|
w[55] = __byte_perm (w[50], w[49], selector);
|
|
w[54] = __byte_perm (w[49], w[48], selector);
|
|
w[53] = __byte_perm (w[48], w[47], selector);
|
|
w[52] = __byte_perm (w[47], w[46], selector);
|
|
w[51] = __byte_perm (w[46], w[45], selector);
|
|
w[50] = __byte_perm (w[45], w[44], selector);
|
|
w[49] = __byte_perm (w[44], w[43], selector);
|
|
w[48] = __byte_perm (w[43], w[42], selector);
|
|
w[47] = __byte_perm (w[42], w[41], selector);
|
|
w[46] = __byte_perm (w[41], w[40], selector);
|
|
w[45] = __byte_perm (w[40], w[39], selector);
|
|
w[44] = __byte_perm (w[39], w[38], selector);
|
|
w[43] = __byte_perm (w[38], w[37], selector);
|
|
w[42] = __byte_perm (w[37], w[36], selector);
|
|
w[41] = __byte_perm (w[36], w[35], selector);
|
|
w[40] = __byte_perm (w[35], w[34], selector);
|
|
w[39] = __byte_perm (w[34], w[33], selector);
|
|
w[38] = __byte_perm (w[33], w[32], selector);
|
|
w[37] = __byte_perm (w[32], w[31], selector);
|
|
w[36] = __byte_perm (w[31], w[30], selector);
|
|
w[35] = __byte_perm (w[30], w[29], selector);
|
|
w[34] = __byte_perm (w[29], w[28], selector);
|
|
w[33] = __byte_perm (w[28], w[27], selector);
|
|
w[32] = __byte_perm (w[27], w[26], selector);
|
|
w[31] = __byte_perm (w[26], w[25], selector);
|
|
w[30] = __byte_perm (w[25], w[24], selector);
|
|
w[29] = __byte_perm (w[24], w[23], selector);
|
|
w[28] = __byte_perm (w[23], w[22], selector);
|
|
w[27] = __byte_perm (w[22], w[21], selector);
|
|
w[26] = __byte_perm (w[21], w[20], selector);
|
|
w[25] = __byte_perm (w[20], w[19], selector);
|
|
w[24] = __byte_perm (w[19], w[18], selector);
|
|
w[23] = __byte_perm (w[18], w[17], selector);
|
|
w[22] = __byte_perm (w[17], w[16], selector);
|
|
w[21] = __byte_perm (w[16], w[15], selector);
|
|
w[20] = __byte_perm (w[15], w[14], selector);
|
|
w[19] = __byte_perm (w[14], w[13], selector);
|
|
w[18] = __byte_perm (w[13], w[12], selector);
|
|
w[17] = __byte_perm (w[12], w[11], selector);
|
|
w[16] = __byte_perm (w[11], w[10], selector);
|
|
w[15] = __byte_perm (w[10], w[ 9], selector);
|
|
w[14] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[13] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[12] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[11] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[10] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[ 9] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 8] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 7] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 6] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 5] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = __byte_perm (w[57], w[56], selector);
|
|
w[62] = __byte_perm (w[56], w[55], selector);
|
|
w[61] = __byte_perm (w[55], w[54], selector);
|
|
w[60] = __byte_perm (w[54], w[53], selector);
|
|
w[59] = __byte_perm (w[53], w[52], selector);
|
|
w[58] = __byte_perm (w[52], w[51], selector);
|
|
w[57] = __byte_perm (w[51], w[50], selector);
|
|
w[56] = __byte_perm (w[50], w[49], selector);
|
|
w[55] = __byte_perm (w[49], w[48], selector);
|
|
w[54] = __byte_perm (w[48], w[47], selector);
|
|
w[53] = __byte_perm (w[47], w[46], selector);
|
|
w[52] = __byte_perm (w[46], w[45], selector);
|
|
w[51] = __byte_perm (w[45], w[44], selector);
|
|
w[50] = __byte_perm (w[44], w[43], selector);
|
|
w[49] = __byte_perm (w[43], w[42], selector);
|
|
w[48] = __byte_perm (w[42], w[41], selector);
|
|
w[47] = __byte_perm (w[41], w[40], selector);
|
|
w[46] = __byte_perm (w[40], w[39], selector);
|
|
w[45] = __byte_perm (w[39], w[38], selector);
|
|
w[44] = __byte_perm (w[38], w[37], selector);
|
|
w[43] = __byte_perm (w[37], w[36], selector);
|
|
w[42] = __byte_perm (w[36], w[35], selector);
|
|
w[41] = __byte_perm (w[35], w[34], selector);
|
|
w[40] = __byte_perm (w[34], w[33], selector);
|
|
w[39] = __byte_perm (w[33], w[32], selector);
|
|
w[38] = __byte_perm (w[32], w[31], selector);
|
|
w[37] = __byte_perm (w[31], w[30], selector);
|
|
w[36] = __byte_perm (w[30], w[29], selector);
|
|
w[35] = __byte_perm (w[29], w[28], selector);
|
|
w[34] = __byte_perm (w[28], w[27], selector);
|
|
w[33] = __byte_perm (w[27], w[26], selector);
|
|
w[32] = __byte_perm (w[26], w[25], selector);
|
|
w[31] = __byte_perm (w[25], w[24], selector);
|
|
w[30] = __byte_perm (w[24], w[23], selector);
|
|
w[29] = __byte_perm (w[23], w[22], selector);
|
|
w[28] = __byte_perm (w[22], w[21], selector);
|
|
w[27] = __byte_perm (w[21], w[20], selector);
|
|
w[26] = __byte_perm (w[20], w[19], selector);
|
|
w[25] = __byte_perm (w[19], w[18], selector);
|
|
w[24] = __byte_perm (w[18], w[17], selector);
|
|
w[23] = __byte_perm (w[17], w[16], selector);
|
|
w[22] = __byte_perm (w[16], w[15], selector);
|
|
w[21] = __byte_perm (w[15], w[14], selector);
|
|
w[20] = __byte_perm (w[14], w[13], selector);
|
|
w[19] = __byte_perm (w[13], w[12], selector);
|
|
w[18] = __byte_perm (w[12], w[11], selector);
|
|
w[17] = __byte_perm (w[11], w[10], selector);
|
|
w[16] = __byte_perm (w[10], w[ 9], selector);
|
|
w[15] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[14] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[13] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[12] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[11] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[10] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[ 9] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 8] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 7] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 6] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = __byte_perm (w[56], w[55], selector);
|
|
w[62] = __byte_perm (w[55], w[54], selector);
|
|
w[61] = __byte_perm (w[54], w[53], selector);
|
|
w[60] = __byte_perm (w[53], w[52], selector);
|
|
w[59] = __byte_perm (w[52], w[51], selector);
|
|
w[58] = __byte_perm (w[51], w[50], selector);
|
|
w[57] = __byte_perm (w[50], w[49], selector);
|
|
w[56] = __byte_perm (w[49], w[48], selector);
|
|
w[55] = __byte_perm (w[48], w[47], selector);
|
|
w[54] = __byte_perm (w[47], w[46], selector);
|
|
w[53] = __byte_perm (w[46], w[45], selector);
|
|
w[52] = __byte_perm (w[45], w[44], selector);
|
|
w[51] = __byte_perm (w[44], w[43], selector);
|
|
w[50] = __byte_perm (w[43], w[42], selector);
|
|
w[49] = __byte_perm (w[42], w[41], selector);
|
|
w[48] = __byte_perm (w[41], w[40], selector);
|
|
w[47] = __byte_perm (w[40], w[39], selector);
|
|
w[46] = __byte_perm (w[39], w[38], selector);
|
|
w[45] = __byte_perm (w[38], w[37], selector);
|
|
w[44] = __byte_perm (w[37], w[36], selector);
|
|
w[43] = __byte_perm (w[36], w[35], selector);
|
|
w[42] = __byte_perm (w[35], w[34], selector);
|
|
w[41] = __byte_perm (w[34], w[33], selector);
|
|
w[40] = __byte_perm (w[33], w[32], selector);
|
|
w[39] = __byte_perm (w[32], w[31], selector);
|
|
w[38] = __byte_perm (w[31], w[30], selector);
|
|
w[37] = __byte_perm (w[30], w[29], selector);
|
|
w[36] = __byte_perm (w[29], w[28], selector);
|
|
w[35] = __byte_perm (w[28], w[27], selector);
|
|
w[34] = __byte_perm (w[27], w[26], selector);
|
|
w[33] = __byte_perm (w[26], w[25], selector);
|
|
w[32] = __byte_perm (w[25], w[24], selector);
|
|
w[31] = __byte_perm (w[24], w[23], selector);
|
|
w[30] = __byte_perm (w[23], w[22], selector);
|
|
w[29] = __byte_perm (w[22], w[21], selector);
|
|
w[28] = __byte_perm (w[21], w[20], selector);
|
|
w[27] = __byte_perm (w[20], w[19], selector);
|
|
w[26] = __byte_perm (w[19], w[18], selector);
|
|
w[25] = __byte_perm (w[18], w[17], selector);
|
|
w[24] = __byte_perm (w[17], w[16], selector);
|
|
w[23] = __byte_perm (w[16], w[15], selector);
|
|
w[22] = __byte_perm (w[15], w[14], selector);
|
|
w[21] = __byte_perm (w[14], w[13], selector);
|
|
w[20] = __byte_perm (w[13], w[12], selector);
|
|
w[19] = __byte_perm (w[12], w[11], selector);
|
|
w[18] = __byte_perm (w[11], w[10], selector);
|
|
w[17] = __byte_perm (w[10], w[ 9], selector);
|
|
w[16] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[15] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[14] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[13] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[12] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[11] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[10] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[ 9] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 8] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 7] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = __byte_perm (w[55], w[54], selector);
|
|
w[62] = __byte_perm (w[54], w[53], selector);
|
|
w[61] = __byte_perm (w[53], w[52], selector);
|
|
w[60] = __byte_perm (w[52], w[51], selector);
|
|
w[59] = __byte_perm (w[51], w[50], selector);
|
|
w[58] = __byte_perm (w[50], w[49], selector);
|
|
w[57] = __byte_perm (w[49], w[48], selector);
|
|
w[56] = __byte_perm (w[48], w[47], selector);
|
|
w[55] = __byte_perm (w[47], w[46], selector);
|
|
w[54] = __byte_perm (w[46], w[45], selector);
|
|
w[53] = __byte_perm (w[45], w[44], selector);
|
|
w[52] = __byte_perm (w[44], w[43], selector);
|
|
w[51] = __byte_perm (w[43], w[42], selector);
|
|
w[50] = __byte_perm (w[42], w[41], selector);
|
|
w[49] = __byte_perm (w[41], w[40], selector);
|
|
w[48] = __byte_perm (w[40], w[39], selector);
|
|
w[47] = __byte_perm (w[39], w[38], selector);
|
|
w[46] = __byte_perm (w[38], w[37], selector);
|
|
w[45] = __byte_perm (w[37], w[36], selector);
|
|
w[44] = __byte_perm (w[36], w[35], selector);
|
|
w[43] = __byte_perm (w[35], w[34], selector);
|
|
w[42] = __byte_perm (w[34], w[33], selector);
|
|
w[41] = __byte_perm (w[33], w[32], selector);
|
|
w[40] = __byte_perm (w[32], w[31], selector);
|
|
w[39] = __byte_perm (w[31], w[30], selector);
|
|
w[38] = __byte_perm (w[30], w[29], selector);
|
|
w[37] = __byte_perm (w[29], w[28], selector);
|
|
w[36] = __byte_perm (w[28], w[27], selector);
|
|
w[35] = __byte_perm (w[27], w[26], selector);
|
|
w[34] = __byte_perm (w[26], w[25], selector);
|
|
w[33] = __byte_perm (w[25], w[24], selector);
|
|
w[32] = __byte_perm (w[24], w[23], selector);
|
|
w[31] = __byte_perm (w[23], w[22], selector);
|
|
w[30] = __byte_perm (w[22], w[21], selector);
|
|
w[29] = __byte_perm (w[21], w[20], selector);
|
|
w[28] = __byte_perm (w[20], w[19], selector);
|
|
w[27] = __byte_perm (w[19], w[18], selector);
|
|
w[26] = __byte_perm (w[18], w[17], selector);
|
|
w[25] = __byte_perm (w[17], w[16], selector);
|
|
w[24] = __byte_perm (w[16], w[15], selector);
|
|
w[23] = __byte_perm (w[15], w[14], selector);
|
|
w[22] = __byte_perm (w[14], w[13], selector);
|
|
w[21] = __byte_perm (w[13], w[12], selector);
|
|
w[20] = __byte_perm (w[12], w[11], selector);
|
|
w[19] = __byte_perm (w[11], w[10], selector);
|
|
w[18] = __byte_perm (w[10], w[ 9], selector);
|
|
w[17] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[16] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[15] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[14] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[13] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[12] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[11] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[10] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[ 9] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 8] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = __byte_perm (w[54], w[53], selector);
|
|
w[62] = __byte_perm (w[53], w[52], selector);
|
|
w[61] = __byte_perm (w[52], w[51], selector);
|
|
w[60] = __byte_perm (w[51], w[50], selector);
|
|
w[59] = __byte_perm (w[50], w[49], selector);
|
|
w[58] = __byte_perm (w[49], w[48], selector);
|
|
w[57] = __byte_perm (w[48], w[47], selector);
|
|
w[56] = __byte_perm (w[47], w[46], selector);
|
|
w[55] = __byte_perm (w[46], w[45], selector);
|
|
w[54] = __byte_perm (w[45], w[44], selector);
|
|
w[53] = __byte_perm (w[44], w[43], selector);
|
|
w[52] = __byte_perm (w[43], w[42], selector);
|
|
w[51] = __byte_perm (w[42], w[41], selector);
|
|
w[50] = __byte_perm (w[41], w[40], selector);
|
|
w[49] = __byte_perm (w[40], w[39], selector);
|
|
w[48] = __byte_perm (w[39], w[38], selector);
|
|
w[47] = __byte_perm (w[38], w[37], selector);
|
|
w[46] = __byte_perm (w[37], w[36], selector);
|
|
w[45] = __byte_perm (w[36], w[35], selector);
|
|
w[44] = __byte_perm (w[35], w[34], selector);
|
|
w[43] = __byte_perm (w[34], w[33], selector);
|
|
w[42] = __byte_perm (w[33], w[32], selector);
|
|
w[41] = __byte_perm (w[32], w[31], selector);
|
|
w[40] = __byte_perm (w[31], w[30], selector);
|
|
w[39] = __byte_perm (w[30], w[29], selector);
|
|
w[38] = __byte_perm (w[29], w[28], selector);
|
|
w[37] = __byte_perm (w[28], w[27], selector);
|
|
w[36] = __byte_perm (w[27], w[26], selector);
|
|
w[35] = __byte_perm (w[26], w[25], selector);
|
|
w[34] = __byte_perm (w[25], w[24], selector);
|
|
w[33] = __byte_perm (w[24], w[23], selector);
|
|
w[32] = __byte_perm (w[23], w[22], selector);
|
|
w[31] = __byte_perm (w[22], w[21], selector);
|
|
w[30] = __byte_perm (w[21], w[20], selector);
|
|
w[29] = __byte_perm (w[20], w[19], selector);
|
|
w[28] = __byte_perm (w[19], w[18], selector);
|
|
w[27] = __byte_perm (w[18], w[17], selector);
|
|
w[26] = __byte_perm (w[17], w[16], selector);
|
|
w[25] = __byte_perm (w[16], w[15], selector);
|
|
w[24] = __byte_perm (w[15], w[14], selector);
|
|
w[23] = __byte_perm (w[14], w[13], selector);
|
|
w[22] = __byte_perm (w[13], w[12], selector);
|
|
w[21] = __byte_perm (w[12], w[11], selector);
|
|
w[20] = __byte_perm (w[11], w[10], selector);
|
|
w[19] = __byte_perm (w[10], w[ 9], selector);
|
|
w[18] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[17] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[16] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[15] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[14] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[13] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[12] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[11] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[10] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[ 9] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = __byte_perm (w[53], w[52], selector);
|
|
w[62] = __byte_perm (w[52], w[51], selector);
|
|
w[61] = __byte_perm (w[51], w[50], selector);
|
|
w[60] = __byte_perm (w[50], w[49], selector);
|
|
w[59] = __byte_perm (w[49], w[48], selector);
|
|
w[58] = __byte_perm (w[48], w[47], selector);
|
|
w[57] = __byte_perm (w[47], w[46], selector);
|
|
w[56] = __byte_perm (w[46], w[45], selector);
|
|
w[55] = __byte_perm (w[45], w[44], selector);
|
|
w[54] = __byte_perm (w[44], w[43], selector);
|
|
w[53] = __byte_perm (w[43], w[42], selector);
|
|
w[52] = __byte_perm (w[42], w[41], selector);
|
|
w[51] = __byte_perm (w[41], w[40], selector);
|
|
w[50] = __byte_perm (w[40], w[39], selector);
|
|
w[49] = __byte_perm (w[39], w[38], selector);
|
|
w[48] = __byte_perm (w[38], w[37], selector);
|
|
w[47] = __byte_perm (w[37], w[36], selector);
|
|
w[46] = __byte_perm (w[36], w[35], selector);
|
|
w[45] = __byte_perm (w[35], w[34], selector);
|
|
w[44] = __byte_perm (w[34], w[33], selector);
|
|
w[43] = __byte_perm (w[33], w[32], selector);
|
|
w[42] = __byte_perm (w[32], w[31], selector);
|
|
w[41] = __byte_perm (w[31], w[30], selector);
|
|
w[40] = __byte_perm (w[30], w[29], selector);
|
|
w[39] = __byte_perm (w[29], w[28], selector);
|
|
w[38] = __byte_perm (w[28], w[27], selector);
|
|
w[37] = __byte_perm (w[27], w[26], selector);
|
|
w[36] = __byte_perm (w[26], w[25], selector);
|
|
w[35] = __byte_perm (w[25], w[24], selector);
|
|
w[34] = __byte_perm (w[24], w[23], selector);
|
|
w[33] = __byte_perm (w[23], w[22], selector);
|
|
w[32] = __byte_perm (w[22], w[21], selector);
|
|
w[31] = __byte_perm (w[21], w[20], selector);
|
|
w[30] = __byte_perm (w[20], w[19], selector);
|
|
w[29] = __byte_perm (w[19], w[18], selector);
|
|
w[28] = __byte_perm (w[18], w[17], selector);
|
|
w[27] = __byte_perm (w[17], w[16], selector);
|
|
w[26] = __byte_perm (w[16], w[15], selector);
|
|
w[25] = __byte_perm (w[15], w[14], selector);
|
|
w[24] = __byte_perm (w[14], w[13], selector);
|
|
w[23] = __byte_perm (w[13], w[12], selector);
|
|
w[22] = __byte_perm (w[12], w[11], selector);
|
|
w[21] = __byte_perm (w[11], w[10], selector);
|
|
w[20] = __byte_perm (w[10], w[ 9], selector);
|
|
w[19] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[18] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[17] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[16] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[15] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[14] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[13] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[12] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[11] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[10] = __byte_perm (w[ 0], 0, selector);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = __byte_perm (w[52], w[51], selector);
|
|
w[62] = __byte_perm (w[51], w[50], selector);
|
|
w[61] = __byte_perm (w[50], w[49], selector);
|
|
w[60] = __byte_perm (w[49], w[48], selector);
|
|
w[59] = __byte_perm (w[48], w[47], selector);
|
|
w[58] = __byte_perm (w[47], w[46], selector);
|
|
w[57] = __byte_perm (w[46], w[45], selector);
|
|
w[56] = __byte_perm (w[45], w[44], selector);
|
|
w[55] = __byte_perm (w[44], w[43], selector);
|
|
w[54] = __byte_perm (w[43], w[42], selector);
|
|
w[53] = __byte_perm (w[42], w[41], selector);
|
|
w[52] = __byte_perm (w[41], w[40], selector);
|
|
w[51] = __byte_perm (w[40], w[39], selector);
|
|
w[50] = __byte_perm (w[39], w[38], selector);
|
|
w[49] = __byte_perm (w[38], w[37], selector);
|
|
w[48] = __byte_perm (w[37], w[36], selector);
|
|
w[47] = __byte_perm (w[36], w[35], selector);
|
|
w[46] = __byte_perm (w[35], w[34], selector);
|
|
w[45] = __byte_perm (w[34], w[33], selector);
|
|
w[44] = __byte_perm (w[33], w[32], selector);
|
|
w[43] = __byte_perm (w[32], w[31], selector);
|
|
w[42] = __byte_perm (w[31], w[30], selector);
|
|
w[41] = __byte_perm (w[30], w[29], selector);
|
|
w[40] = __byte_perm (w[29], w[28], selector);
|
|
w[39] = __byte_perm (w[28], w[27], selector);
|
|
w[38] = __byte_perm (w[27], w[26], selector);
|
|
w[37] = __byte_perm (w[26], w[25], selector);
|
|
w[36] = __byte_perm (w[25], w[24], selector);
|
|
w[35] = __byte_perm (w[24], w[23], selector);
|
|
w[34] = __byte_perm (w[23], w[22], selector);
|
|
w[33] = __byte_perm (w[22], w[21], selector);
|
|
w[32] = __byte_perm (w[21], w[20], selector);
|
|
w[31] = __byte_perm (w[20], w[19], selector);
|
|
w[30] = __byte_perm (w[19], w[18], selector);
|
|
w[29] = __byte_perm (w[18], w[17], selector);
|
|
w[28] = __byte_perm (w[17], w[16], selector);
|
|
w[27] = __byte_perm (w[16], w[15], selector);
|
|
w[26] = __byte_perm (w[15], w[14], selector);
|
|
w[25] = __byte_perm (w[14], w[13], selector);
|
|
w[24] = __byte_perm (w[13], w[12], selector);
|
|
w[23] = __byte_perm (w[12], w[11], selector);
|
|
w[22] = __byte_perm (w[11], w[10], selector);
|
|
w[21] = __byte_perm (w[10], w[ 9], selector);
|
|
w[20] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[19] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[18] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[17] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[16] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[15] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[14] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[13] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[12] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[11] = __byte_perm (w[ 0], 0, selector);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = __byte_perm (w[51], w[50], selector);
|
|
w[62] = __byte_perm (w[50], w[49], selector);
|
|
w[61] = __byte_perm (w[49], w[48], selector);
|
|
w[60] = __byte_perm (w[48], w[47], selector);
|
|
w[59] = __byte_perm (w[47], w[46], selector);
|
|
w[58] = __byte_perm (w[46], w[45], selector);
|
|
w[57] = __byte_perm (w[45], w[44], selector);
|
|
w[56] = __byte_perm (w[44], w[43], selector);
|
|
w[55] = __byte_perm (w[43], w[42], selector);
|
|
w[54] = __byte_perm (w[42], w[41], selector);
|
|
w[53] = __byte_perm (w[41], w[40], selector);
|
|
w[52] = __byte_perm (w[40], w[39], selector);
|
|
w[51] = __byte_perm (w[39], w[38], selector);
|
|
w[50] = __byte_perm (w[38], w[37], selector);
|
|
w[49] = __byte_perm (w[37], w[36], selector);
|
|
w[48] = __byte_perm (w[36], w[35], selector);
|
|
w[47] = __byte_perm (w[35], w[34], selector);
|
|
w[46] = __byte_perm (w[34], w[33], selector);
|
|
w[45] = __byte_perm (w[33], w[32], selector);
|
|
w[44] = __byte_perm (w[32], w[31], selector);
|
|
w[43] = __byte_perm (w[31], w[30], selector);
|
|
w[42] = __byte_perm (w[30], w[29], selector);
|
|
w[41] = __byte_perm (w[29], w[28], selector);
|
|
w[40] = __byte_perm (w[28], w[27], selector);
|
|
w[39] = __byte_perm (w[27], w[26], selector);
|
|
w[38] = __byte_perm (w[26], w[25], selector);
|
|
w[37] = __byte_perm (w[25], w[24], selector);
|
|
w[36] = __byte_perm (w[24], w[23], selector);
|
|
w[35] = __byte_perm (w[23], w[22], selector);
|
|
w[34] = __byte_perm (w[22], w[21], selector);
|
|
w[33] = __byte_perm (w[21], w[20], selector);
|
|
w[32] = __byte_perm (w[20], w[19], selector);
|
|
w[31] = __byte_perm (w[19], w[18], selector);
|
|
w[30] = __byte_perm (w[18], w[17], selector);
|
|
w[29] = __byte_perm (w[17], w[16], selector);
|
|
w[28] = __byte_perm (w[16], w[15], selector);
|
|
w[27] = __byte_perm (w[15], w[14], selector);
|
|
w[26] = __byte_perm (w[14], w[13], selector);
|
|
w[25] = __byte_perm (w[13], w[12], selector);
|
|
w[24] = __byte_perm (w[12], w[11], selector);
|
|
w[23] = __byte_perm (w[11], w[10], selector);
|
|
w[22] = __byte_perm (w[10], w[ 9], selector);
|
|
w[21] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[20] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[19] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[18] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[17] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[16] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[15] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[14] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[13] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[12] = __byte_perm (w[ 0], 0, selector);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = __byte_perm (w[50], w[49], selector);
|
|
w[62] = __byte_perm (w[49], w[48], selector);
|
|
w[61] = __byte_perm (w[48], w[47], selector);
|
|
w[60] = __byte_perm (w[47], w[46], selector);
|
|
w[59] = __byte_perm (w[46], w[45], selector);
|
|
w[58] = __byte_perm (w[45], w[44], selector);
|
|
w[57] = __byte_perm (w[44], w[43], selector);
|
|
w[56] = __byte_perm (w[43], w[42], selector);
|
|
w[55] = __byte_perm (w[42], w[41], selector);
|
|
w[54] = __byte_perm (w[41], w[40], selector);
|
|
w[53] = __byte_perm (w[40], w[39], selector);
|
|
w[52] = __byte_perm (w[39], w[38], selector);
|
|
w[51] = __byte_perm (w[38], w[37], selector);
|
|
w[50] = __byte_perm (w[37], w[36], selector);
|
|
w[49] = __byte_perm (w[36], w[35], selector);
|
|
w[48] = __byte_perm (w[35], w[34], selector);
|
|
w[47] = __byte_perm (w[34], w[33], selector);
|
|
w[46] = __byte_perm (w[33], w[32], selector);
|
|
w[45] = __byte_perm (w[32], w[31], selector);
|
|
w[44] = __byte_perm (w[31], w[30], selector);
|
|
w[43] = __byte_perm (w[30], w[29], selector);
|
|
w[42] = __byte_perm (w[29], w[28], selector);
|
|
w[41] = __byte_perm (w[28], w[27], selector);
|
|
w[40] = __byte_perm (w[27], w[26], selector);
|
|
w[39] = __byte_perm (w[26], w[25], selector);
|
|
w[38] = __byte_perm (w[25], w[24], selector);
|
|
w[37] = __byte_perm (w[24], w[23], selector);
|
|
w[36] = __byte_perm (w[23], w[22], selector);
|
|
w[35] = __byte_perm (w[22], w[21], selector);
|
|
w[34] = __byte_perm (w[21], w[20], selector);
|
|
w[33] = __byte_perm (w[20], w[19], selector);
|
|
w[32] = __byte_perm (w[19], w[18], selector);
|
|
w[31] = __byte_perm (w[18], w[17], selector);
|
|
w[30] = __byte_perm (w[17], w[16], selector);
|
|
w[29] = __byte_perm (w[16], w[15], selector);
|
|
w[28] = __byte_perm (w[15], w[14], selector);
|
|
w[27] = __byte_perm (w[14], w[13], selector);
|
|
w[26] = __byte_perm (w[13], w[12], selector);
|
|
w[25] = __byte_perm (w[12], w[11], selector);
|
|
w[24] = __byte_perm (w[11], w[10], selector);
|
|
w[23] = __byte_perm (w[10], w[ 9], selector);
|
|
w[22] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[21] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[20] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[19] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[18] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[17] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[16] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[15] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[14] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[13] = __byte_perm (w[ 0], 0, selector);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = __byte_perm (w[49], w[48], selector);
|
|
w[62] = __byte_perm (w[48], w[47], selector);
|
|
w[61] = __byte_perm (w[47], w[46], selector);
|
|
w[60] = __byte_perm (w[46], w[45], selector);
|
|
w[59] = __byte_perm (w[45], w[44], selector);
|
|
w[58] = __byte_perm (w[44], w[43], selector);
|
|
w[57] = __byte_perm (w[43], w[42], selector);
|
|
w[56] = __byte_perm (w[42], w[41], selector);
|
|
w[55] = __byte_perm (w[41], w[40], selector);
|
|
w[54] = __byte_perm (w[40], w[39], selector);
|
|
w[53] = __byte_perm (w[39], w[38], selector);
|
|
w[52] = __byte_perm (w[38], w[37], selector);
|
|
w[51] = __byte_perm (w[37], w[36], selector);
|
|
w[50] = __byte_perm (w[36], w[35], selector);
|
|
w[49] = __byte_perm (w[35], w[34], selector);
|
|
w[48] = __byte_perm (w[34], w[33], selector);
|
|
w[47] = __byte_perm (w[33], w[32], selector);
|
|
w[46] = __byte_perm (w[32], w[31], selector);
|
|
w[45] = __byte_perm (w[31], w[30], selector);
|
|
w[44] = __byte_perm (w[30], w[29], selector);
|
|
w[43] = __byte_perm (w[29], w[28], selector);
|
|
w[42] = __byte_perm (w[28], w[27], selector);
|
|
w[41] = __byte_perm (w[27], w[26], selector);
|
|
w[40] = __byte_perm (w[26], w[25], selector);
|
|
w[39] = __byte_perm (w[25], w[24], selector);
|
|
w[38] = __byte_perm (w[24], w[23], selector);
|
|
w[37] = __byte_perm (w[23], w[22], selector);
|
|
w[36] = __byte_perm (w[22], w[21], selector);
|
|
w[35] = __byte_perm (w[21], w[20], selector);
|
|
w[34] = __byte_perm (w[20], w[19], selector);
|
|
w[33] = __byte_perm (w[19], w[18], selector);
|
|
w[32] = __byte_perm (w[18], w[17], selector);
|
|
w[31] = __byte_perm (w[17], w[16], selector);
|
|
w[30] = __byte_perm (w[16], w[15], selector);
|
|
w[29] = __byte_perm (w[15], w[14], selector);
|
|
w[28] = __byte_perm (w[14], w[13], selector);
|
|
w[27] = __byte_perm (w[13], w[12], selector);
|
|
w[26] = __byte_perm (w[12], w[11], selector);
|
|
w[25] = __byte_perm (w[11], w[10], selector);
|
|
w[24] = __byte_perm (w[10], w[ 9], selector);
|
|
w[23] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[22] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[21] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[20] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[19] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[18] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[17] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[16] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[15] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[14] = __byte_perm (w[ 0], 0, selector);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = __byte_perm (w[48], w[47], selector);
|
|
w[62] = __byte_perm (w[47], w[46], selector);
|
|
w[61] = __byte_perm (w[46], w[45], selector);
|
|
w[60] = __byte_perm (w[45], w[44], selector);
|
|
w[59] = __byte_perm (w[44], w[43], selector);
|
|
w[58] = __byte_perm (w[43], w[42], selector);
|
|
w[57] = __byte_perm (w[42], w[41], selector);
|
|
w[56] = __byte_perm (w[41], w[40], selector);
|
|
w[55] = __byte_perm (w[40], w[39], selector);
|
|
w[54] = __byte_perm (w[39], w[38], selector);
|
|
w[53] = __byte_perm (w[38], w[37], selector);
|
|
w[52] = __byte_perm (w[37], w[36], selector);
|
|
w[51] = __byte_perm (w[36], w[35], selector);
|
|
w[50] = __byte_perm (w[35], w[34], selector);
|
|
w[49] = __byte_perm (w[34], w[33], selector);
|
|
w[48] = __byte_perm (w[33], w[32], selector);
|
|
w[47] = __byte_perm (w[32], w[31], selector);
|
|
w[46] = __byte_perm (w[31], w[30], selector);
|
|
w[45] = __byte_perm (w[30], w[29], selector);
|
|
w[44] = __byte_perm (w[29], w[28], selector);
|
|
w[43] = __byte_perm (w[28], w[27], selector);
|
|
w[42] = __byte_perm (w[27], w[26], selector);
|
|
w[41] = __byte_perm (w[26], w[25], selector);
|
|
w[40] = __byte_perm (w[25], w[24], selector);
|
|
w[39] = __byte_perm (w[24], w[23], selector);
|
|
w[38] = __byte_perm (w[23], w[22], selector);
|
|
w[37] = __byte_perm (w[22], w[21], selector);
|
|
w[36] = __byte_perm (w[21], w[20], selector);
|
|
w[35] = __byte_perm (w[20], w[19], selector);
|
|
w[34] = __byte_perm (w[19], w[18], selector);
|
|
w[33] = __byte_perm (w[18], w[17], selector);
|
|
w[32] = __byte_perm (w[17], w[16], selector);
|
|
w[31] = __byte_perm (w[16], w[15], selector);
|
|
w[30] = __byte_perm (w[15], w[14], selector);
|
|
w[29] = __byte_perm (w[14], w[13], selector);
|
|
w[28] = __byte_perm (w[13], w[12], selector);
|
|
w[27] = __byte_perm (w[12], w[11], selector);
|
|
w[26] = __byte_perm (w[11], w[10], selector);
|
|
w[25] = __byte_perm (w[10], w[ 9], selector);
|
|
w[24] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[23] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[22] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[21] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[20] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[19] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[18] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[17] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[16] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[15] = __byte_perm (w[ 0], 0, selector);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = __byte_perm (w[47], w[46], selector);
|
|
w[62] = __byte_perm (w[46], w[45], selector);
|
|
w[61] = __byte_perm (w[45], w[44], selector);
|
|
w[60] = __byte_perm (w[44], w[43], selector);
|
|
w[59] = __byte_perm (w[43], w[42], selector);
|
|
w[58] = __byte_perm (w[42], w[41], selector);
|
|
w[57] = __byte_perm (w[41], w[40], selector);
|
|
w[56] = __byte_perm (w[40], w[39], selector);
|
|
w[55] = __byte_perm (w[39], w[38], selector);
|
|
w[54] = __byte_perm (w[38], w[37], selector);
|
|
w[53] = __byte_perm (w[37], w[36], selector);
|
|
w[52] = __byte_perm (w[36], w[35], selector);
|
|
w[51] = __byte_perm (w[35], w[34], selector);
|
|
w[50] = __byte_perm (w[34], w[33], selector);
|
|
w[49] = __byte_perm (w[33], w[32], selector);
|
|
w[48] = __byte_perm (w[32], w[31], selector);
|
|
w[47] = __byte_perm (w[31], w[30], selector);
|
|
w[46] = __byte_perm (w[30], w[29], selector);
|
|
w[45] = __byte_perm (w[29], w[28], selector);
|
|
w[44] = __byte_perm (w[28], w[27], selector);
|
|
w[43] = __byte_perm (w[27], w[26], selector);
|
|
w[42] = __byte_perm (w[26], w[25], selector);
|
|
w[41] = __byte_perm (w[25], w[24], selector);
|
|
w[40] = __byte_perm (w[24], w[23], selector);
|
|
w[39] = __byte_perm (w[23], w[22], selector);
|
|
w[38] = __byte_perm (w[22], w[21], selector);
|
|
w[37] = __byte_perm (w[21], w[20], selector);
|
|
w[36] = __byte_perm (w[20], w[19], selector);
|
|
w[35] = __byte_perm (w[19], w[18], selector);
|
|
w[34] = __byte_perm (w[18], w[17], selector);
|
|
w[33] = __byte_perm (w[17], w[16], selector);
|
|
w[32] = __byte_perm (w[16], w[15], selector);
|
|
w[31] = __byte_perm (w[15], w[14], selector);
|
|
w[30] = __byte_perm (w[14], w[13], selector);
|
|
w[29] = __byte_perm (w[13], w[12], selector);
|
|
w[28] = __byte_perm (w[12], w[11], selector);
|
|
w[27] = __byte_perm (w[11], w[10], selector);
|
|
w[26] = __byte_perm (w[10], w[ 9], selector);
|
|
w[25] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[24] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[23] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[22] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[21] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[20] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[19] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[18] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[17] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[16] = __byte_perm (w[ 0], 0, selector);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = __byte_perm (w[46], w[45], selector);
|
|
w[62] = __byte_perm (w[45], w[44], selector);
|
|
w[61] = __byte_perm (w[44], w[43], selector);
|
|
w[60] = __byte_perm (w[43], w[42], selector);
|
|
w[59] = __byte_perm (w[42], w[41], selector);
|
|
w[58] = __byte_perm (w[41], w[40], selector);
|
|
w[57] = __byte_perm (w[40], w[39], selector);
|
|
w[56] = __byte_perm (w[39], w[38], selector);
|
|
w[55] = __byte_perm (w[38], w[37], selector);
|
|
w[54] = __byte_perm (w[37], w[36], selector);
|
|
w[53] = __byte_perm (w[36], w[35], selector);
|
|
w[52] = __byte_perm (w[35], w[34], selector);
|
|
w[51] = __byte_perm (w[34], w[33], selector);
|
|
w[50] = __byte_perm (w[33], w[32], selector);
|
|
w[49] = __byte_perm (w[32], w[31], selector);
|
|
w[48] = __byte_perm (w[31], w[30], selector);
|
|
w[47] = __byte_perm (w[30], w[29], selector);
|
|
w[46] = __byte_perm (w[29], w[28], selector);
|
|
w[45] = __byte_perm (w[28], w[27], selector);
|
|
w[44] = __byte_perm (w[27], w[26], selector);
|
|
w[43] = __byte_perm (w[26], w[25], selector);
|
|
w[42] = __byte_perm (w[25], w[24], selector);
|
|
w[41] = __byte_perm (w[24], w[23], selector);
|
|
w[40] = __byte_perm (w[23], w[22], selector);
|
|
w[39] = __byte_perm (w[22], w[21], selector);
|
|
w[38] = __byte_perm (w[21], w[20], selector);
|
|
w[37] = __byte_perm (w[20], w[19], selector);
|
|
w[36] = __byte_perm (w[19], w[18], selector);
|
|
w[35] = __byte_perm (w[18], w[17], selector);
|
|
w[34] = __byte_perm (w[17], w[16], selector);
|
|
w[33] = __byte_perm (w[16], w[15], selector);
|
|
w[32] = __byte_perm (w[15], w[14], selector);
|
|
w[31] = __byte_perm (w[14], w[13], selector);
|
|
w[30] = __byte_perm (w[13], w[12], selector);
|
|
w[29] = __byte_perm (w[12], w[11], selector);
|
|
w[28] = __byte_perm (w[11], w[10], selector);
|
|
w[27] = __byte_perm (w[10], w[ 9], selector);
|
|
w[26] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[25] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[24] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[23] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[22] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[21] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[20] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[19] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[18] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[17] = __byte_perm (w[ 0], 0, selector);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = __byte_perm (w[45], w[44], selector);
|
|
w[62] = __byte_perm (w[44], w[43], selector);
|
|
w[61] = __byte_perm (w[43], w[42], selector);
|
|
w[60] = __byte_perm (w[42], w[41], selector);
|
|
w[59] = __byte_perm (w[41], w[40], selector);
|
|
w[58] = __byte_perm (w[40], w[39], selector);
|
|
w[57] = __byte_perm (w[39], w[38], selector);
|
|
w[56] = __byte_perm (w[38], w[37], selector);
|
|
w[55] = __byte_perm (w[37], w[36], selector);
|
|
w[54] = __byte_perm (w[36], w[35], selector);
|
|
w[53] = __byte_perm (w[35], w[34], selector);
|
|
w[52] = __byte_perm (w[34], w[33], selector);
|
|
w[51] = __byte_perm (w[33], w[32], selector);
|
|
w[50] = __byte_perm (w[32], w[31], selector);
|
|
w[49] = __byte_perm (w[31], w[30], selector);
|
|
w[48] = __byte_perm (w[30], w[29], selector);
|
|
w[47] = __byte_perm (w[29], w[28], selector);
|
|
w[46] = __byte_perm (w[28], w[27], selector);
|
|
w[45] = __byte_perm (w[27], w[26], selector);
|
|
w[44] = __byte_perm (w[26], w[25], selector);
|
|
w[43] = __byte_perm (w[25], w[24], selector);
|
|
w[42] = __byte_perm (w[24], w[23], selector);
|
|
w[41] = __byte_perm (w[23], w[22], selector);
|
|
w[40] = __byte_perm (w[22], w[21], selector);
|
|
w[39] = __byte_perm (w[21], w[20], selector);
|
|
w[38] = __byte_perm (w[20], w[19], selector);
|
|
w[37] = __byte_perm (w[19], w[18], selector);
|
|
w[36] = __byte_perm (w[18], w[17], selector);
|
|
w[35] = __byte_perm (w[17], w[16], selector);
|
|
w[34] = __byte_perm (w[16], w[15], selector);
|
|
w[33] = __byte_perm (w[15], w[14], selector);
|
|
w[32] = __byte_perm (w[14], w[13], selector);
|
|
w[31] = __byte_perm (w[13], w[12], selector);
|
|
w[30] = __byte_perm (w[12], w[11], selector);
|
|
w[29] = __byte_perm (w[11], w[10], selector);
|
|
w[28] = __byte_perm (w[10], w[ 9], selector);
|
|
w[27] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[26] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[25] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[24] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[23] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[22] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[21] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[20] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[19] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[18] = __byte_perm (w[ 0], 0, selector);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = __byte_perm (w[44], w[43], selector);
|
|
w[62] = __byte_perm (w[43], w[42], selector);
|
|
w[61] = __byte_perm (w[42], w[41], selector);
|
|
w[60] = __byte_perm (w[41], w[40], selector);
|
|
w[59] = __byte_perm (w[40], w[39], selector);
|
|
w[58] = __byte_perm (w[39], w[38], selector);
|
|
w[57] = __byte_perm (w[38], w[37], selector);
|
|
w[56] = __byte_perm (w[37], w[36], selector);
|
|
w[55] = __byte_perm (w[36], w[35], selector);
|
|
w[54] = __byte_perm (w[35], w[34], selector);
|
|
w[53] = __byte_perm (w[34], w[33], selector);
|
|
w[52] = __byte_perm (w[33], w[32], selector);
|
|
w[51] = __byte_perm (w[32], w[31], selector);
|
|
w[50] = __byte_perm (w[31], w[30], selector);
|
|
w[49] = __byte_perm (w[30], w[29], selector);
|
|
w[48] = __byte_perm (w[29], w[28], selector);
|
|
w[47] = __byte_perm (w[28], w[27], selector);
|
|
w[46] = __byte_perm (w[27], w[26], selector);
|
|
w[45] = __byte_perm (w[26], w[25], selector);
|
|
w[44] = __byte_perm (w[25], w[24], selector);
|
|
w[43] = __byte_perm (w[24], w[23], selector);
|
|
w[42] = __byte_perm (w[23], w[22], selector);
|
|
w[41] = __byte_perm (w[22], w[21], selector);
|
|
w[40] = __byte_perm (w[21], w[20], selector);
|
|
w[39] = __byte_perm (w[20], w[19], selector);
|
|
w[38] = __byte_perm (w[19], w[18], selector);
|
|
w[37] = __byte_perm (w[18], w[17], selector);
|
|
w[36] = __byte_perm (w[17], w[16], selector);
|
|
w[35] = __byte_perm (w[16], w[15], selector);
|
|
w[34] = __byte_perm (w[15], w[14], selector);
|
|
w[33] = __byte_perm (w[14], w[13], selector);
|
|
w[32] = __byte_perm (w[13], w[12], selector);
|
|
w[31] = __byte_perm (w[12], w[11], selector);
|
|
w[30] = __byte_perm (w[11], w[10], selector);
|
|
w[29] = __byte_perm (w[10], w[ 9], selector);
|
|
w[28] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[27] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[26] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[25] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[24] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[23] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[22] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[21] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[20] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[19] = __byte_perm (w[ 0], 0, selector);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = __byte_perm (w[43], w[42], selector);
|
|
w[62] = __byte_perm (w[42], w[41], selector);
|
|
w[61] = __byte_perm (w[41], w[40], selector);
|
|
w[60] = __byte_perm (w[40], w[39], selector);
|
|
w[59] = __byte_perm (w[39], w[38], selector);
|
|
w[58] = __byte_perm (w[38], w[37], selector);
|
|
w[57] = __byte_perm (w[37], w[36], selector);
|
|
w[56] = __byte_perm (w[36], w[35], selector);
|
|
w[55] = __byte_perm (w[35], w[34], selector);
|
|
w[54] = __byte_perm (w[34], w[33], selector);
|
|
w[53] = __byte_perm (w[33], w[32], selector);
|
|
w[52] = __byte_perm (w[32], w[31], selector);
|
|
w[51] = __byte_perm (w[31], w[30], selector);
|
|
w[50] = __byte_perm (w[30], w[29], selector);
|
|
w[49] = __byte_perm (w[29], w[28], selector);
|
|
w[48] = __byte_perm (w[28], w[27], selector);
|
|
w[47] = __byte_perm (w[27], w[26], selector);
|
|
w[46] = __byte_perm (w[26], w[25], selector);
|
|
w[45] = __byte_perm (w[25], w[24], selector);
|
|
w[44] = __byte_perm (w[24], w[23], selector);
|
|
w[43] = __byte_perm (w[23], w[22], selector);
|
|
w[42] = __byte_perm (w[22], w[21], selector);
|
|
w[41] = __byte_perm (w[21], w[20], selector);
|
|
w[40] = __byte_perm (w[20], w[19], selector);
|
|
w[39] = __byte_perm (w[19], w[18], selector);
|
|
w[38] = __byte_perm (w[18], w[17], selector);
|
|
w[37] = __byte_perm (w[17], w[16], selector);
|
|
w[36] = __byte_perm (w[16], w[15], selector);
|
|
w[35] = __byte_perm (w[15], w[14], selector);
|
|
w[34] = __byte_perm (w[14], w[13], selector);
|
|
w[33] = __byte_perm (w[13], w[12], selector);
|
|
w[32] = __byte_perm (w[12], w[11], selector);
|
|
w[31] = __byte_perm (w[11], w[10], selector);
|
|
w[30] = __byte_perm (w[10], w[ 9], selector);
|
|
w[29] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[28] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[27] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[26] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[25] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[24] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[23] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[22] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[21] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[20] = __byte_perm (w[ 0], 0, selector);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = __byte_perm (w[42], w[41], selector);
|
|
w[62] = __byte_perm (w[41], w[40], selector);
|
|
w[61] = __byte_perm (w[40], w[39], selector);
|
|
w[60] = __byte_perm (w[39], w[38], selector);
|
|
w[59] = __byte_perm (w[38], w[37], selector);
|
|
w[58] = __byte_perm (w[37], w[36], selector);
|
|
w[57] = __byte_perm (w[36], w[35], selector);
|
|
w[56] = __byte_perm (w[35], w[34], selector);
|
|
w[55] = __byte_perm (w[34], w[33], selector);
|
|
w[54] = __byte_perm (w[33], w[32], selector);
|
|
w[53] = __byte_perm (w[32], w[31], selector);
|
|
w[52] = __byte_perm (w[31], w[30], selector);
|
|
w[51] = __byte_perm (w[30], w[29], selector);
|
|
w[50] = __byte_perm (w[29], w[28], selector);
|
|
w[49] = __byte_perm (w[28], w[27], selector);
|
|
w[48] = __byte_perm (w[27], w[26], selector);
|
|
w[47] = __byte_perm (w[26], w[25], selector);
|
|
w[46] = __byte_perm (w[25], w[24], selector);
|
|
w[45] = __byte_perm (w[24], w[23], selector);
|
|
w[44] = __byte_perm (w[23], w[22], selector);
|
|
w[43] = __byte_perm (w[22], w[21], selector);
|
|
w[42] = __byte_perm (w[21], w[20], selector);
|
|
w[41] = __byte_perm (w[20], w[19], selector);
|
|
w[40] = __byte_perm (w[19], w[18], selector);
|
|
w[39] = __byte_perm (w[18], w[17], selector);
|
|
w[38] = __byte_perm (w[17], w[16], selector);
|
|
w[37] = __byte_perm (w[16], w[15], selector);
|
|
w[36] = __byte_perm (w[15], w[14], selector);
|
|
w[35] = __byte_perm (w[14], w[13], selector);
|
|
w[34] = __byte_perm (w[13], w[12], selector);
|
|
w[33] = __byte_perm (w[12], w[11], selector);
|
|
w[32] = __byte_perm (w[11], w[10], selector);
|
|
w[31] = __byte_perm (w[10], w[ 9], selector);
|
|
w[30] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[29] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[28] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[27] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[26] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[25] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[24] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[23] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[22] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[21] = __byte_perm (w[ 0], 0, selector);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = __byte_perm (w[41], w[40], selector);
|
|
w[62] = __byte_perm (w[40], w[39], selector);
|
|
w[61] = __byte_perm (w[39], w[38], selector);
|
|
w[60] = __byte_perm (w[38], w[37], selector);
|
|
w[59] = __byte_perm (w[37], w[36], selector);
|
|
w[58] = __byte_perm (w[36], w[35], selector);
|
|
w[57] = __byte_perm (w[35], w[34], selector);
|
|
w[56] = __byte_perm (w[34], w[33], selector);
|
|
w[55] = __byte_perm (w[33], w[32], selector);
|
|
w[54] = __byte_perm (w[32], w[31], selector);
|
|
w[53] = __byte_perm (w[31], w[30], selector);
|
|
w[52] = __byte_perm (w[30], w[29], selector);
|
|
w[51] = __byte_perm (w[29], w[28], selector);
|
|
w[50] = __byte_perm (w[28], w[27], selector);
|
|
w[49] = __byte_perm (w[27], w[26], selector);
|
|
w[48] = __byte_perm (w[26], w[25], selector);
|
|
w[47] = __byte_perm (w[25], w[24], selector);
|
|
w[46] = __byte_perm (w[24], w[23], selector);
|
|
w[45] = __byte_perm (w[23], w[22], selector);
|
|
w[44] = __byte_perm (w[22], w[21], selector);
|
|
w[43] = __byte_perm (w[21], w[20], selector);
|
|
w[42] = __byte_perm (w[20], w[19], selector);
|
|
w[41] = __byte_perm (w[19], w[18], selector);
|
|
w[40] = __byte_perm (w[18], w[17], selector);
|
|
w[39] = __byte_perm (w[17], w[16], selector);
|
|
w[38] = __byte_perm (w[16], w[15], selector);
|
|
w[37] = __byte_perm (w[15], w[14], selector);
|
|
w[36] = __byte_perm (w[14], w[13], selector);
|
|
w[35] = __byte_perm (w[13], w[12], selector);
|
|
w[34] = __byte_perm (w[12], w[11], selector);
|
|
w[33] = __byte_perm (w[11], w[10], selector);
|
|
w[32] = __byte_perm (w[10], w[ 9], selector);
|
|
w[31] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[30] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[29] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[28] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[27] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[26] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[25] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[24] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[23] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[22] = __byte_perm (w[ 0], 0, selector);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = __byte_perm (w[40], w[39], selector);
|
|
w[62] = __byte_perm (w[39], w[38], selector);
|
|
w[61] = __byte_perm (w[38], w[37], selector);
|
|
w[60] = __byte_perm (w[37], w[36], selector);
|
|
w[59] = __byte_perm (w[36], w[35], selector);
|
|
w[58] = __byte_perm (w[35], w[34], selector);
|
|
w[57] = __byte_perm (w[34], w[33], selector);
|
|
w[56] = __byte_perm (w[33], w[32], selector);
|
|
w[55] = __byte_perm (w[32], w[31], selector);
|
|
w[54] = __byte_perm (w[31], w[30], selector);
|
|
w[53] = __byte_perm (w[30], w[29], selector);
|
|
w[52] = __byte_perm (w[29], w[28], selector);
|
|
w[51] = __byte_perm (w[28], w[27], selector);
|
|
w[50] = __byte_perm (w[27], w[26], selector);
|
|
w[49] = __byte_perm (w[26], w[25], selector);
|
|
w[48] = __byte_perm (w[25], w[24], selector);
|
|
w[47] = __byte_perm (w[24], w[23], selector);
|
|
w[46] = __byte_perm (w[23], w[22], selector);
|
|
w[45] = __byte_perm (w[22], w[21], selector);
|
|
w[44] = __byte_perm (w[21], w[20], selector);
|
|
w[43] = __byte_perm (w[20], w[19], selector);
|
|
w[42] = __byte_perm (w[19], w[18], selector);
|
|
w[41] = __byte_perm (w[18], w[17], selector);
|
|
w[40] = __byte_perm (w[17], w[16], selector);
|
|
w[39] = __byte_perm (w[16], w[15], selector);
|
|
w[38] = __byte_perm (w[15], w[14], selector);
|
|
w[37] = __byte_perm (w[14], w[13], selector);
|
|
w[36] = __byte_perm (w[13], w[12], selector);
|
|
w[35] = __byte_perm (w[12], w[11], selector);
|
|
w[34] = __byte_perm (w[11], w[10], selector);
|
|
w[33] = __byte_perm (w[10], w[ 9], selector);
|
|
w[32] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[31] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[30] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[29] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[28] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[27] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[26] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[25] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[24] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[23] = __byte_perm (w[ 0], 0, selector);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = __byte_perm (w[39], w[38], selector);
|
|
w[62] = __byte_perm (w[38], w[37], selector);
|
|
w[61] = __byte_perm (w[37], w[36], selector);
|
|
w[60] = __byte_perm (w[36], w[35], selector);
|
|
w[59] = __byte_perm (w[35], w[34], selector);
|
|
w[58] = __byte_perm (w[34], w[33], selector);
|
|
w[57] = __byte_perm (w[33], w[32], selector);
|
|
w[56] = __byte_perm (w[32], w[31], selector);
|
|
w[55] = __byte_perm (w[31], w[30], selector);
|
|
w[54] = __byte_perm (w[30], w[29], selector);
|
|
w[53] = __byte_perm (w[29], w[28], selector);
|
|
w[52] = __byte_perm (w[28], w[27], selector);
|
|
w[51] = __byte_perm (w[27], w[26], selector);
|
|
w[50] = __byte_perm (w[26], w[25], selector);
|
|
w[49] = __byte_perm (w[25], w[24], selector);
|
|
w[48] = __byte_perm (w[24], w[23], selector);
|
|
w[47] = __byte_perm (w[23], w[22], selector);
|
|
w[46] = __byte_perm (w[22], w[21], selector);
|
|
w[45] = __byte_perm (w[21], w[20], selector);
|
|
w[44] = __byte_perm (w[20], w[19], selector);
|
|
w[43] = __byte_perm (w[19], w[18], selector);
|
|
w[42] = __byte_perm (w[18], w[17], selector);
|
|
w[41] = __byte_perm (w[17], w[16], selector);
|
|
w[40] = __byte_perm (w[16], w[15], selector);
|
|
w[39] = __byte_perm (w[15], w[14], selector);
|
|
w[38] = __byte_perm (w[14], w[13], selector);
|
|
w[37] = __byte_perm (w[13], w[12], selector);
|
|
w[36] = __byte_perm (w[12], w[11], selector);
|
|
w[35] = __byte_perm (w[11], w[10], selector);
|
|
w[34] = __byte_perm (w[10], w[ 9], selector);
|
|
w[33] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[32] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[31] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[30] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[29] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[28] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[27] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[26] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[25] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[24] = __byte_perm (w[ 0], 0, selector);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = __byte_perm (w[38], w[37], selector);
|
|
w[62] = __byte_perm (w[37], w[36], selector);
|
|
w[61] = __byte_perm (w[36], w[35], selector);
|
|
w[60] = __byte_perm (w[35], w[34], selector);
|
|
w[59] = __byte_perm (w[34], w[33], selector);
|
|
w[58] = __byte_perm (w[33], w[32], selector);
|
|
w[57] = __byte_perm (w[32], w[31], selector);
|
|
w[56] = __byte_perm (w[31], w[30], selector);
|
|
w[55] = __byte_perm (w[30], w[29], selector);
|
|
w[54] = __byte_perm (w[29], w[28], selector);
|
|
w[53] = __byte_perm (w[28], w[27], selector);
|
|
w[52] = __byte_perm (w[27], w[26], selector);
|
|
w[51] = __byte_perm (w[26], w[25], selector);
|
|
w[50] = __byte_perm (w[25], w[24], selector);
|
|
w[49] = __byte_perm (w[24], w[23], selector);
|
|
w[48] = __byte_perm (w[23], w[22], selector);
|
|
w[47] = __byte_perm (w[22], w[21], selector);
|
|
w[46] = __byte_perm (w[21], w[20], selector);
|
|
w[45] = __byte_perm (w[20], w[19], selector);
|
|
w[44] = __byte_perm (w[19], w[18], selector);
|
|
w[43] = __byte_perm (w[18], w[17], selector);
|
|
w[42] = __byte_perm (w[17], w[16], selector);
|
|
w[41] = __byte_perm (w[16], w[15], selector);
|
|
w[40] = __byte_perm (w[15], w[14], selector);
|
|
w[39] = __byte_perm (w[14], w[13], selector);
|
|
w[38] = __byte_perm (w[13], w[12], selector);
|
|
w[37] = __byte_perm (w[12], w[11], selector);
|
|
w[36] = __byte_perm (w[11], w[10], selector);
|
|
w[35] = __byte_perm (w[10], w[ 9], selector);
|
|
w[34] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[33] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[32] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[31] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[30] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[29] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[28] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[27] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[26] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[25] = __byte_perm (w[ 0], 0, selector);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = __byte_perm (w[37], w[36], selector);
|
|
w[62] = __byte_perm (w[36], w[35], selector);
|
|
w[61] = __byte_perm (w[35], w[34], selector);
|
|
w[60] = __byte_perm (w[34], w[33], selector);
|
|
w[59] = __byte_perm (w[33], w[32], selector);
|
|
w[58] = __byte_perm (w[32], w[31], selector);
|
|
w[57] = __byte_perm (w[31], w[30], selector);
|
|
w[56] = __byte_perm (w[30], w[29], selector);
|
|
w[55] = __byte_perm (w[29], w[28], selector);
|
|
w[54] = __byte_perm (w[28], w[27], selector);
|
|
w[53] = __byte_perm (w[27], w[26], selector);
|
|
w[52] = __byte_perm (w[26], w[25], selector);
|
|
w[51] = __byte_perm (w[25], w[24], selector);
|
|
w[50] = __byte_perm (w[24], w[23], selector);
|
|
w[49] = __byte_perm (w[23], w[22], selector);
|
|
w[48] = __byte_perm (w[22], w[21], selector);
|
|
w[47] = __byte_perm (w[21], w[20], selector);
|
|
w[46] = __byte_perm (w[20], w[19], selector);
|
|
w[45] = __byte_perm (w[19], w[18], selector);
|
|
w[44] = __byte_perm (w[18], w[17], selector);
|
|
w[43] = __byte_perm (w[17], w[16], selector);
|
|
w[42] = __byte_perm (w[16], w[15], selector);
|
|
w[41] = __byte_perm (w[15], w[14], selector);
|
|
w[40] = __byte_perm (w[14], w[13], selector);
|
|
w[39] = __byte_perm (w[13], w[12], selector);
|
|
w[38] = __byte_perm (w[12], w[11], selector);
|
|
w[37] = __byte_perm (w[11], w[10], selector);
|
|
w[36] = __byte_perm (w[10], w[ 9], selector);
|
|
w[35] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[34] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[33] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[32] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[31] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[30] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[29] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[28] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[27] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[26] = __byte_perm (w[ 0], 0, selector);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = __byte_perm (w[36], w[35], selector);
|
|
w[62] = __byte_perm (w[35], w[34], selector);
|
|
w[61] = __byte_perm (w[34], w[33], selector);
|
|
w[60] = __byte_perm (w[33], w[32], selector);
|
|
w[59] = __byte_perm (w[32], w[31], selector);
|
|
w[58] = __byte_perm (w[31], w[30], selector);
|
|
w[57] = __byte_perm (w[30], w[29], selector);
|
|
w[56] = __byte_perm (w[29], w[28], selector);
|
|
w[55] = __byte_perm (w[28], w[27], selector);
|
|
w[54] = __byte_perm (w[27], w[26], selector);
|
|
w[53] = __byte_perm (w[26], w[25], selector);
|
|
w[52] = __byte_perm (w[25], w[24], selector);
|
|
w[51] = __byte_perm (w[24], w[23], selector);
|
|
w[50] = __byte_perm (w[23], w[22], selector);
|
|
w[49] = __byte_perm (w[22], w[21], selector);
|
|
w[48] = __byte_perm (w[21], w[20], selector);
|
|
w[47] = __byte_perm (w[20], w[19], selector);
|
|
w[46] = __byte_perm (w[19], w[18], selector);
|
|
w[45] = __byte_perm (w[18], w[17], selector);
|
|
w[44] = __byte_perm (w[17], w[16], selector);
|
|
w[43] = __byte_perm (w[16], w[15], selector);
|
|
w[42] = __byte_perm (w[15], w[14], selector);
|
|
w[41] = __byte_perm (w[14], w[13], selector);
|
|
w[40] = __byte_perm (w[13], w[12], selector);
|
|
w[39] = __byte_perm (w[12], w[11], selector);
|
|
w[38] = __byte_perm (w[11], w[10], selector);
|
|
w[37] = __byte_perm (w[10], w[ 9], selector);
|
|
w[36] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[35] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[34] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[33] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[32] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[31] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[30] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[29] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[28] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[27] = __byte_perm (w[ 0], 0, selector);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = __byte_perm (w[35], w[34], selector);
|
|
w[62] = __byte_perm (w[34], w[33], selector);
|
|
w[61] = __byte_perm (w[33], w[32], selector);
|
|
w[60] = __byte_perm (w[32], w[31], selector);
|
|
w[59] = __byte_perm (w[31], w[30], selector);
|
|
w[58] = __byte_perm (w[30], w[29], selector);
|
|
w[57] = __byte_perm (w[29], w[28], selector);
|
|
w[56] = __byte_perm (w[28], w[27], selector);
|
|
w[55] = __byte_perm (w[27], w[26], selector);
|
|
w[54] = __byte_perm (w[26], w[25], selector);
|
|
w[53] = __byte_perm (w[25], w[24], selector);
|
|
w[52] = __byte_perm (w[24], w[23], selector);
|
|
w[51] = __byte_perm (w[23], w[22], selector);
|
|
w[50] = __byte_perm (w[22], w[21], selector);
|
|
w[49] = __byte_perm (w[21], w[20], selector);
|
|
w[48] = __byte_perm (w[20], w[19], selector);
|
|
w[47] = __byte_perm (w[19], w[18], selector);
|
|
w[46] = __byte_perm (w[18], w[17], selector);
|
|
w[45] = __byte_perm (w[17], w[16], selector);
|
|
w[44] = __byte_perm (w[16], w[15], selector);
|
|
w[43] = __byte_perm (w[15], w[14], selector);
|
|
w[42] = __byte_perm (w[14], w[13], selector);
|
|
w[41] = __byte_perm (w[13], w[12], selector);
|
|
w[40] = __byte_perm (w[12], w[11], selector);
|
|
w[39] = __byte_perm (w[11], w[10], selector);
|
|
w[38] = __byte_perm (w[10], w[ 9], selector);
|
|
w[37] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[36] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[35] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[34] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[33] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[32] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[31] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[30] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[29] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[28] = __byte_perm (w[ 0], 0, selector);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = __byte_perm (w[34], w[33], selector);
|
|
w[62] = __byte_perm (w[33], w[32], selector);
|
|
w[61] = __byte_perm (w[32], w[31], selector);
|
|
w[60] = __byte_perm (w[31], w[30], selector);
|
|
w[59] = __byte_perm (w[30], w[29], selector);
|
|
w[58] = __byte_perm (w[29], w[28], selector);
|
|
w[57] = __byte_perm (w[28], w[27], selector);
|
|
w[56] = __byte_perm (w[27], w[26], selector);
|
|
w[55] = __byte_perm (w[26], w[25], selector);
|
|
w[54] = __byte_perm (w[25], w[24], selector);
|
|
w[53] = __byte_perm (w[24], w[23], selector);
|
|
w[52] = __byte_perm (w[23], w[22], selector);
|
|
w[51] = __byte_perm (w[22], w[21], selector);
|
|
w[50] = __byte_perm (w[21], w[20], selector);
|
|
w[49] = __byte_perm (w[20], w[19], selector);
|
|
w[48] = __byte_perm (w[19], w[18], selector);
|
|
w[47] = __byte_perm (w[18], w[17], selector);
|
|
w[46] = __byte_perm (w[17], w[16], selector);
|
|
w[45] = __byte_perm (w[16], w[15], selector);
|
|
w[44] = __byte_perm (w[15], w[14], selector);
|
|
w[43] = __byte_perm (w[14], w[13], selector);
|
|
w[42] = __byte_perm (w[13], w[12], selector);
|
|
w[41] = __byte_perm (w[12], w[11], selector);
|
|
w[40] = __byte_perm (w[11], w[10], selector);
|
|
w[39] = __byte_perm (w[10], w[ 9], selector);
|
|
w[38] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[37] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[36] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[35] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[34] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[33] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[32] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[31] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[30] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[29] = __byte_perm (w[ 0], 0, selector);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = __byte_perm (w[33], w[32], selector);
|
|
w[62] = __byte_perm (w[32], w[31], selector);
|
|
w[61] = __byte_perm (w[31], w[30], selector);
|
|
w[60] = __byte_perm (w[30], w[29], selector);
|
|
w[59] = __byte_perm (w[29], w[28], selector);
|
|
w[58] = __byte_perm (w[28], w[27], selector);
|
|
w[57] = __byte_perm (w[27], w[26], selector);
|
|
w[56] = __byte_perm (w[26], w[25], selector);
|
|
w[55] = __byte_perm (w[25], w[24], selector);
|
|
w[54] = __byte_perm (w[24], w[23], selector);
|
|
w[53] = __byte_perm (w[23], w[22], selector);
|
|
w[52] = __byte_perm (w[22], w[21], selector);
|
|
w[51] = __byte_perm (w[21], w[20], selector);
|
|
w[50] = __byte_perm (w[20], w[19], selector);
|
|
w[49] = __byte_perm (w[19], w[18], selector);
|
|
w[48] = __byte_perm (w[18], w[17], selector);
|
|
w[47] = __byte_perm (w[17], w[16], selector);
|
|
w[46] = __byte_perm (w[16], w[15], selector);
|
|
w[45] = __byte_perm (w[15], w[14], selector);
|
|
w[44] = __byte_perm (w[14], w[13], selector);
|
|
w[43] = __byte_perm (w[13], w[12], selector);
|
|
w[42] = __byte_perm (w[12], w[11], selector);
|
|
w[41] = __byte_perm (w[11], w[10], selector);
|
|
w[40] = __byte_perm (w[10], w[ 9], selector);
|
|
w[39] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[38] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[37] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[36] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[35] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[34] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[33] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[32] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[31] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[30] = __byte_perm (w[ 0], 0, selector);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = __byte_perm (w[32], w[31], selector);
|
|
w[62] = __byte_perm (w[31], w[30], selector);
|
|
w[61] = __byte_perm (w[30], w[29], selector);
|
|
w[60] = __byte_perm (w[29], w[28], selector);
|
|
w[59] = __byte_perm (w[28], w[27], selector);
|
|
w[58] = __byte_perm (w[27], w[26], selector);
|
|
w[57] = __byte_perm (w[26], w[25], selector);
|
|
w[56] = __byte_perm (w[25], w[24], selector);
|
|
w[55] = __byte_perm (w[24], w[23], selector);
|
|
w[54] = __byte_perm (w[23], w[22], selector);
|
|
w[53] = __byte_perm (w[22], w[21], selector);
|
|
w[52] = __byte_perm (w[21], w[20], selector);
|
|
w[51] = __byte_perm (w[20], w[19], selector);
|
|
w[50] = __byte_perm (w[19], w[18], selector);
|
|
w[49] = __byte_perm (w[18], w[17], selector);
|
|
w[48] = __byte_perm (w[17], w[16], selector);
|
|
w[47] = __byte_perm (w[16], w[15], selector);
|
|
w[46] = __byte_perm (w[15], w[14], selector);
|
|
w[45] = __byte_perm (w[14], w[13], selector);
|
|
w[44] = __byte_perm (w[13], w[12], selector);
|
|
w[43] = __byte_perm (w[12], w[11], selector);
|
|
w[42] = __byte_perm (w[11], w[10], selector);
|
|
w[41] = __byte_perm (w[10], w[ 9], selector);
|
|
w[40] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[39] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[38] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[37] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[36] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[35] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[34] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[33] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[32] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[31] = __byte_perm (w[ 0], 0, selector);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = __byte_perm (w[31], w[30], selector);
|
|
w[62] = __byte_perm (w[30], w[29], selector);
|
|
w[61] = __byte_perm (w[29], w[28], selector);
|
|
w[60] = __byte_perm (w[28], w[27], selector);
|
|
w[59] = __byte_perm (w[27], w[26], selector);
|
|
w[58] = __byte_perm (w[26], w[25], selector);
|
|
w[57] = __byte_perm (w[25], w[24], selector);
|
|
w[56] = __byte_perm (w[24], w[23], selector);
|
|
w[55] = __byte_perm (w[23], w[22], selector);
|
|
w[54] = __byte_perm (w[22], w[21], selector);
|
|
w[53] = __byte_perm (w[21], w[20], selector);
|
|
w[52] = __byte_perm (w[20], w[19], selector);
|
|
w[51] = __byte_perm (w[19], w[18], selector);
|
|
w[50] = __byte_perm (w[18], w[17], selector);
|
|
w[49] = __byte_perm (w[17], w[16], selector);
|
|
w[48] = __byte_perm (w[16], w[15], selector);
|
|
w[47] = __byte_perm (w[15], w[14], selector);
|
|
w[46] = __byte_perm (w[14], w[13], selector);
|
|
w[45] = __byte_perm (w[13], w[12], selector);
|
|
w[44] = __byte_perm (w[12], w[11], selector);
|
|
w[43] = __byte_perm (w[11], w[10], selector);
|
|
w[42] = __byte_perm (w[10], w[ 9], selector);
|
|
w[41] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[40] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[39] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[38] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[37] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[36] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[35] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[34] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[33] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[32] = __byte_perm (w[ 0], 0, selector);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = __byte_perm (w[30], w[29], selector);
|
|
w[62] = __byte_perm (w[29], w[28], selector);
|
|
w[61] = __byte_perm (w[28], w[27], selector);
|
|
w[60] = __byte_perm (w[27], w[26], selector);
|
|
w[59] = __byte_perm (w[26], w[25], selector);
|
|
w[58] = __byte_perm (w[25], w[24], selector);
|
|
w[57] = __byte_perm (w[24], w[23], selector);
|
|
w[56] = __byte_perm (w[23], w[22], selector);
|
|
w[55] = __byte_perm (w[22], w[21], selector);
|
|
w[54] = __byte_perm (w[21], w[20], selector);
|
|
w[53] = __byte_perm (w[20], w[19], selector);
|
|
w[52] = __byte_perm (w[19], w[18], selector);
|
|
w[51] = __byte_perm (w[18], w[17], selector);
|
|
w[50] = __byte_perm (w[17], w[16], selector);
|
|
w[49] = __byte_perm (w[16], w[15], selector);
|
|
w[48] = __byte_perm (w[15], w[14], selector);
|
|
w[47] = __byte_perm (w[14], w[13], selector);
|
|
w[46] = __byte_perm (w[13], w[12], selector);
|
|
w[45] = __byte_perm (w[12], w[11], selector);
|
|
w[44] = __byte_perm (w[11], w[10], selector);
|
|
w[43] = __byte_perm (w[10], w[ 9], selector);
|
|
w[42] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[41] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[40] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[39] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[38] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[37] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[36] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[35] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[34] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[33] = __byte_perm (w[ 0], 0, selector);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = __byte_perm (w[29], w[28], selector);
|
|
w[62] = __byte_perm (w[28], w[27], selector);
|
|
w[61] = __byte_perm (w[27], w[26], selector);
|
|
w[60] = __byte_perm (w[26], w[25], selector);
|
|
w[59] = __byte_perm (w[25], w[24], selector);
|
|
w[58] = __byte_perm (w[24], w[23], selector);
|
|
w[57] = __byte_perm (w[23], w[22], selector);
|
|
w[56] = __byte_perm (w[22], w[21], selector);
|
|
w[55] = __byte_perm (w[21], w[20], selector);
|
|
w[54] = __byte_perm (w[20], w[19], selector);
|
|
w[53] = __byte_perm (w[19], w[18], selector);
|
|
w[52] = __byte_perm (w[18], w[17], selector);
|
|
w[51] = __byte_perm (w[17], w[16], selector);
|
|
w[50] = __byte_perm (w[16], w[15], selector);
|
|
w[49] = __byte_perm (w[15], w[14], selector);
|
|
w[48] = __byte_perm (w[14], w[13], selector);
|
|
w[47] = __byte_perm (w[13], w[12], selector);
|
|
w[46] = __byte_perm (w[12], w[11], selector);
|
|
w[45] = __byte_perm (w[11], w[10], selector);
|
|
w[44] = __byte_perm (w[10], w[ 9], selector);
|
|
w[43] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[42] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[41] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[40] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[39] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[38] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[37] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[36] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[35] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[34] = __byte_perm (w[ 0], 0, selector);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = __byte_perm (w[28], w[27], selector);
|
|
w[62] = __byte_perm (w[27], w[26], selector);
|
|
w[61] = __byte_perm (w[26], w[25], selector);
|
|
w[60] = __byte_perm (w[25], w[24], selector);
|
|
w[59] = __byte_perm (w[24], w[23], selector);
|
|
w[58] = __byte_perm (w[23], w[22], selector);
|
|
w[57] = __byte_perm (w[22], w[21], selector);
|
|
w[56] = __byte_perm (w[21], w[20], selector);
|
|
w[55] = __byte_perm (w[20], w[19], selector);
|
|
w[54] = __byte_perm (w[19], w[18], selector);
|
|
w[53] = __byte_perm (w[18], w[17], selector);
|
|
w[52] = __byte_perm (w[17], w[16], selector);
|
|
w[51] = __byte_perm (w[16], w[15], selector);
|
|
w[50] = __byte_perm (w[15], w[14], selector);
|
|
w[49] = __byte_perm (w[14], w[13], selector);
|
|
w[48] = __byte_perm (w[13], w[12], selector);
|
|
w[47] = __byte_perm (w[12], w[11], selector);
|
|
w[46] = __byte_perm (w[11], w[10], selector);
|
|
w[45] = __byte_perm (w[10], w[ 9], selector);
|
|
w[44] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[43] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[42] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[41] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[40] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[39] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[38] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[37] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[36] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[35] = __byte_perm (w[ 0], 0, selector);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = __byte_perm (w[27], w[26], selector);
|
|
w[62] = __byte_perm (w[26], w[25], selector);
|
|
w[61] = __byte_perm (w[25], w[24], selector);
|
|
w[60] = __byte_perm (w[24], w[23], selector);
|
|
w[59] = __byte_perm (w[23], w[22], selector);
|
|
w[58] = __byte_perm (w[22], w[21], selector);
|
|
w[57] = __byte_perm (w[21], w[20], selector);
|
|
w[56] = __byte_perm (w[20], w[19], selector);
|
|
w[55] = __byte_perm (w[19], w[18], selector);
|
|
w[54] = __byte_perm (w[18], w[17], selector);
|
|
w[53] = __byte_perm (w[17], w[16], selector);
|
|
w[52] = __byte_perm (w[16], w[15], selector);
|
|
w[51] = __byte_perm (w[15], w[14], selector);
|
|
w[50] = __byte_perm (w[14], w[13], selector);
|
|
w[49] = __byte_perm (w[13], w[12], selector);
|
|
w[48] = __byte_perm (w[12], w[11], selector);
|
|
w[47] = __byte_perm (w[11], w[10], selector);
|
|
w[46] = __byte_perm (w[10], w[ 9], selector);
|
|
w[45] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[44] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[43] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[42] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[41] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[40] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[39] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[38] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[37] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[36] = __byte_perm (w[ 0], 0, selector);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = __byte_perm (w[26], w[25], selector);
|
|
w[62] = __byte_perm (w[25], w[24], selector);
|
|
w[61] = __byte_perm (w[24], w[23], selector);
|
|
w[60] = __byte_perm (w[23], w[22], selector);
|
|
w[59] = __byte_perm (w[22], w[21], selector);
|
|
w[58] = __byte_perm (w[21], w[20], selector);
|
|
w[57] = __byte_perm (w[20], w[19], selector);
|
|
w[56] = __byte_perm (w[19], w[18], selector);
|
|
w[55] = __byte_perm (w[18], w[17], selector);
|
|
w[54] = __byte_perm (w[17], w[16], selector);
|
|
w[53] = __byte_perm (w[16], w[15], selector);
|
|
w[52] = __byte_perm (w[15], w[14], selector);
|
|
w[51] = __byte_perm (w[14], w[13], selector);
|
|
w[50] = __byte_perm (w[13], w[12], selector);
|
|
w[49] = __byte_perm (w[12], w[11], selector);
|
|
w[48] = __byte_perm (w[11], w[10], selector);
|
|
w[47] = __byte_perm (w[10], w[ 9], selector);
|
|
w[46] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[45] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[44] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[43] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[42] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[41] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[40] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[39] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[38] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[37] = __byte_perm (w[ 0], 0, selector);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = __byte_perm (w[25], w[24], selector);
|
|
w[62] = __byte_perm (w[24], w[23], selector);
|
|
w[61] = __byte_perm (w[23], w[22], selector);
|
|
w[60] = __byte_perm (w[22], w[21], selector);
|
|
w[59] = __byte_perm (w[21], w[20], selector);
|
|
w[58] = __byte_perm (w[20], w[19], selector);
|
|
w[57] = __byte_perm (w[19], w[18], selector);
|
|
w[56] = __byte_perm (w[18], w[17], selector);
|
|
w[55] = __byte_perm (w[17], w[16], selector);
|
|
w[54] = __byte_perm (w[16], w[15], selector);
|
|
w[53] = __byte_perm (w[15], w[14], selector);
|
|
w[52] = __byte_perm (w[14], w[13], selector);
|
|
w[51] = __byte_perm (w[13], w[12], selector);
|
|
w[50] = __byte_perm (w[12], w[11], selector);
|
|
w[49] = __byte_perm (w[11], w[10], selector);
|
|
w[48] = __byte_perm (w[10], w[ 9], selector);
|
|
w[47] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[46] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[45] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[44] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[43] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[42] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[41] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[40] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[39] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[38] = __byte_perm (w[ 0], 0, selector);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = __byte_perm (w[24], w[23], selector);
|
|
w[62] = __byte_perm (w[23], w[22], selector);
|
|
w[61] = __byte_perm (w[22], w[21], selector);
|
|
w[60] = __byte_perm (w[21], w[20], selector);
|
|
w[59] = __byte_perm (w[20], w[19], selector);
|
|
w[58] = __byte_perm (w[19], w[18], selector);
|
|
w[57] = __byte_perm (w[18], w[17], selector);
|
|
w[56] = __byte_perm (w[17], w[16], selector);
|
|
w[55] = __byte_perm (w[16], w[15], selector);
|
|
w[54] = __byte_perm (w[15], w[14], selector);
|
|
w[53] = __byte_perm (w[14], w[13], selector);
|
|
w[52] = __byte_perm (w[13], w[12], selector);
|
|
w[51] = __byte_perm (w[12], w[11], selector);
|
|
w[50] = __byte_perm (w[11], w[10], selector);
|
|
w[49] = __byte_perm (w[10], w[ 9], selector);
|
|
w[48] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[47] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[46] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[45] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[44] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[43] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[42] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[41] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[40] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[39] = __byte_perm (w[ 0], 0, selector);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = __byte_perm (w[23], w[22], selector);
|
|
w[62] = __byte_perm (w[22], w[21], selector);
|
|
w[61] = __byte_perm (w[21], w[20], selector);
|
|
w[60] = __byte_perm (w[20], w[19], selector);
|
|
w[59] = __byte_perm (w[19], w[18], selector);
|
|
w[58] = __byte_perm (w[18], w[17], selector);
|
|
w[57] = __byte_perm (w[17], w[16], selector);
|
|
w[56] = __byte_perm (w[16], w[15], selector);
|
|
w[55] = __byte_perm (w[15], w[14], selector);
|
|
w[54] = __byte_perm (w[14], w[13], selector);
|
|
w[53] = __byte_perm (w[13], w[12], selector);
|
|
w[52] = __byte_perm (w[12], w[11], selector);
|
|
w[51] = __byte_perm (w[11], w[10], selector);
|
|
w[50] = __byte_perm (w[10], w[ 9], selector);
|
|
w[49] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[48] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[47] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[46] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[45] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[44] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[43] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[42] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[41] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[40] = __byte_perm (w[ 0], 0, selector);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = __byte_perm (w[22], w[21], selector);
|
|
w[62] = __byte_perm (w[21], w[20], selector);
|
|
w[61] = __byte_perm (w[20], w[19], selector);
|
|
w[60] = __byte_perm (w[19], w[18], selector);
|
|
w[59] = __byte_perm (w[18], w[17], selector);
|
|
w[58] = __byte_perm (w[17], w[16], selector);
|
|
w[57] = __byte_perm (w[16], w[15], selector);
|
|
w[56] = __byte_perm (w[15], w[14], selector);
|
|
w[55] = __byte_perm (w[14], w[13], selector);
|
|
w[54] = __byte_perm (w[13], w[12], selector);
|
|
w[53] = __byte_perm (w[12], w[11], selector);
|
|
w[52] = __byte_perm (w[11], w[10], selector);
|
|
w[51] = __byte_perm (w[10], w[ 9], selector);
|
|
w[50] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[49] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[48] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[47] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[46] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[45] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[44] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[43] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[42] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[41] = __byte_perm (w[ 0], 0, selector);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = __byte_perm (w[21], w[20], selector);
|
|
w[62] = __byte_perm (w[20], w[19], selector);
|
|
w[61] = __byte_perm (w[19], w[18], selector);
|
|
w[60] = __byte_perm (w[18], w[17], selector);
|
|
w[59] = __byte_perm (w[17], w[16], selector);
|
|
w[58] = __byte_perm (w[16], w[15], selector);
|
|
w[57] = __byte_perm (w[15], w[14], selector);
|
|
w[56] = __byte_perm (w[14], w[13], selector);
|
|
w[55] = __byte_perm (w[13], w[12], selector);
|
|
w[54] = __byte_perm (w[12], w[11], selector);
|
|
w[53] = __byte_perm (w[11], w[10], selector);
|
|
w[52] = __byte_perm (w[10], w[ 9], selector);
|
|
w[51] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[50] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[49] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[48] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[47] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[46] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[45] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[44] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[43] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[42] = __byte_perm (w[ 0], 0, selector);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = __byte_perm (w[20], w[19], selector);
|
|
w[62] = __byte_perm (w[19], w[18], selector);
|
|
w[61] = __byte_perm (w[18], w[17], selector);
|
|
w[60] = __byte_perm (w[17], w[16], selector);
|
|
w[59] = __byte_perm (w[16], w[15], selector);
|
|
w[58] = __byte_perm (w[15], w[14], selector);
|
|
w[57] = __byte_perm (w[14], w[13], selector);
|
|
w[56] = __byte_perm (w[13], w[12], selector);
|
|
w[55] = __byte_perm (w[12], w[11], selector);
|
|
w[54] = __byte_perm (w[11], w[10], selector);
|
|
w[53] = __byte_perm (w[10], w[ 9], selector);
|
|
w[52] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[51] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[50] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[49] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[48] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[47] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[46] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[45] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[44] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[43] = __byte_perm (w[ 0], 0, selector);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = __byte_perm (w[19], w[18], selector);
|
|
w[62] = __byte_perm (w[18], w[17], selector);
|
|
w[61] = __byte_perm (w[17], w[16], selector);
|
|
w[60] = __byte_perm (w[16], w[15], selector);
|
|
w[59] = __byte_perm (w[15], w[14], selector);
|
|
w[58] = __byte_perm (w[14], w[13], selector);
|
|
w[57] = __byte_perm (w[13], w[12], selector);
|
|
w[56] = __byte_perm (w[12], w[11], selector);
|
|
w[55] = __byte_perm (w[11], w[10], selector);
|
|
w[54] = __byte_perm (w[10], w[ 9], selector);
|
|
w[53] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[52] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[51] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[50] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[49] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[48] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[47] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[46] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[45] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[44] = __byte_perm (w[ 0], 0, selector);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = __byte_perm (w[18], w[17], selector);
|
|
w[62] = __byte_perm (w[17], w[16], selector);
|
|
w[61] = __byte_perm (w[16], w[15], selector);
|
|
w[60] = __byte_perm (w[15], w[14], selector);
|
|
w[59] = __byte_perm (w[14], w[13], selector);
|
|
w[58] = __byte_perm (w[13], w[12], selector);
|
|
w[57] = __byte_perm (w[12], w[11], selector);
|
|
w[56] = __byte_perm (w[11], w[10], selector);
|
|
w[55] = __byte_perm (w[10], w[ 9], selector);
|
|
w[54] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[53] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[52] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[51] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[50] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[49] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[48] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[47] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[46] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[45] = __byte_perm (w[ 0], 0, selector);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = __byte_perm (w[17], w[16], selector);
|
|
w[62] = __byte_perm (w[16], w[15], selector);
|
|
w[61] = __byte_perm (w[15], w[14], selector);
|
|
w[60] = __byte_perm (w[14], w[13], selector);
|
|
w[59] = __byte_perm (w[13], w[12], selector);
|
|
w[58] = __byte_perm (w[12], w[11], selector);
|
|
w[57] = __byte_perm (w[11], w[10], selector);
|
|
w[56] = __byte_perm (w[10], w[ 9], selector);
|
|
w[55] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[54] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[53] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[52] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[51] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[50] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[49] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[48] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[47] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[46] = __byte_perm (w[ 0], 0, selector);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = __byte_perm (w[16], w[15], selector);
|
|
w[62] = __byte_perm (w[15], w[14], selector);
|
|
w[61] = __byte_perm (w[14], w[13], selector);
|
|
w[60] = __byte_perm (w[13], w[12], selector);
|
|
w[59] = __byte_perm (w[12], w[11], selector);
|
|
w[58] = __byte_perm (w[11], w[10], selector);
|
|
w[57] = __byte_perm (w[10], w[ 9], selector);
|
|
w[56] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[55] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[54] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[53] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[52] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[51] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[50] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[49] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[48] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[47] = __byte_perm (w[ 0], 0, selector);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = __byte_perm (w[15], w[14], selector);
|
|
w[62] = __byte_perm (w[14], w[13], selector);
|
|
w[61] = __byte_perm (w[13], w[12], selector);
|
|
w[60] = __byte_perm (w[12], w[11], selector);
|
|
w[59] = __byte_perm (w[11], w[10], selector);
|
|
w[58] = __byte_perm (w[10], w[ 9], selector);
|
|
w[57] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[56] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[55] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[54] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[53] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[52] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[51] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[50] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[49] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[48] = __byte_perm (w[ 0], 0, selector);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = __byte_perm (w[14], w[13], selector);
|
|
w[62] = __byte_perm (w[13], w[12], selector);
|
|
w[61] = __byte_perm (w[12], w[11], selector);
|
|
w[60] = __byte_perm (w[11], w[10], selector);
|
|
w[59] = __byte_perm (w[10], w[ 9], selector);
|
|
w[58] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[57] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[56] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[55] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[54] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[53] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[52] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[51] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[50] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[49] = __byte_perm (w[ 0], 0, selector);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = __byte_perm (w[13], w[12], selector);
|
|
w[62] = __byte_perm (w[12], w[11], selector);
|
|
w[61] = __byte_perm (w[11], w[10], selector);
|
|
w[60] = __byte_perm (w[10], w[ 9], selector);
|
|
w[59] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[58] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[57] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[56] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[55] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[54] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[53] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[52] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[51] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[50] = __byte_perm (w[ 0], 0, selector);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = __byte_perm (w[12], w[11], selector);
|
|
w[62] = __byte_perm (w[11], w[10], selector);
|
|
w[61] = __byte_perm (w[10], w[ 9], selector);
|
|
w[60] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[59] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[58] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[57] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[56] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[55] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[54] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[53] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[52] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[51] = __byte_perm (w[ 0], 0, selector);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = __byte_perm (w[11], w[10], selector);
|
|
w[62] = __byte_perm (w[10], w[ 9], selector);
|
|
w[61] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[60] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[59] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[58] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[57] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[56] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[55] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[54] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[53] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[52] = __byte_perm (w[ 0], 0, selector);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = __byte_perm (w[10], w[ 9], selector);
|
|
w[62] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[61] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[60] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[59] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[58] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[57] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[56] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[55] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[54] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[53] = __byte_perm (w[ 0], 0, selector);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = __byte_perm (w[ 9], w[ 8], selector);
|
|
w[62] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[61] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[60] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[59] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[58] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[57] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[56] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[55] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[54] = __byte_perm (w[ 0], 0, selector);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = __byte_perm (w[ 8], w[ 7], selector);
|
|
w[62] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[61] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[60] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[59] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[58] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[57] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[56] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[55] = __byte_perm (w[ 0], 0, selector);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = __byte_perm (w[ 7], w[ 6], selector);
|
|
w[62] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[61] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[60] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[59] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[58] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[57] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[56] = __byte_perm (w[ 0], 0, selector);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = __byte_perm (w[ 6], w[ 5], selector);
|
|
w[62] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[61] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[60] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[59] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[58] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[57] = __byte_perm (w[ 0], 0, selector);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = __byte_perm (w[ 5], w[ 4], selector);
|
|
w[62] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[61] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[60] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[59] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[58] = __byte_perm (w[ 0], 0, selector);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = __byte_perm (w[ 4], w[ 3], selector);
|
|
w[62] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[61] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[60] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[59] = __byte_perm (w[ 0], 0, selector);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = __byte_perm (w[ 3], w[ 2], selector);
|
|
w[62] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[61] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[60] = __byte_perm (w[ 0], 0, selector);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = __byte_perm (w[ 2], w[ 1], selector);
|
|
w[62] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[61] = __byte_perm (w[ 0], 0, selector);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = __byte_perm (w[ 1], w[ 0], selector);
|
|
w[62] = __byte_perm (w[ 0], 0, selector);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = __byte_perm (w[ 0], 0, selector);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* vector functions as scalar (for outer loop usage)
|
|
*/
|
|
|
|
static void truncate_block_4x4_le_S (u32 w0[4], const u32 len)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 0:
|
|
w0[0] = 0;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] &= 0x000000ff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] &= 0x0000ffff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] &= 0x00ffffff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] &= 0x000000ff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] &= 0x0000ffff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] &= 0x00ffffff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] &= 0x000000ff;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] &= 0x0000ffff;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] &= 0x00ffffff;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] &= 0x000000ff;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] &= 0x0000ffff;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] &= 0x00ffffff;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void truncate_block_4x4_be_S (u32 w0[4], const u32 len)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 0:
|
|
w0[0] = 0;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] &= 0xff000000;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] &= 0xffff0000;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] &= 0xffffff00;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] &= 0xff000000;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] &= 0xffff0000;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] &= 0xffffff00;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] &= 0xff000000;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] &= 0xffff0000;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] &= 0xffffff00;
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] &= 0xff000000;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] &= 0xffff0000;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] &= 0xffffff00;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void truncate_block_16x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 len)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 0:
|
|
w0[0] = 0;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] &= 0x000000ff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] &= 0x0000ffff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] &= 0x00ffffff;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] &= 0x000000ff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] &= 0x0000ffff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] &= 0x00ffffff;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] &= 0x000000ff;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] &= 0x0000ffff;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] &= 0x00ffffff;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] &= 0x000000ff;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] &= 0x0000ffff;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] &= 0x00ffffff;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w1[0] &= 0x000000ff;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w1[0] &= 0x0000ffff;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w1[0] &= 0x00ffffff;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w1[1] &= 0x000000ff;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w1[1] &= 0x0000ffff;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w1[1] &= 0x00ffffff;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w1[2] &= 0x000000ff;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w1[2] &= 0x0000ffff;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w1[2] &= 0x00ffffff;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w1[3] &= 0x000000ff;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w1[3] &= 0x0000ffff;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w1[3] &= 0x00ffffff;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w2[0] &= 0x000000ff;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w2[0] &= 0x0000ffff;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w2[0] &= 0x00ffffff;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w2[1] &= 0x000000ff;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w2[1] &= 0x0000ffff;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w2[1] &= 0x00ffffff;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w2[2] &= 0x000000ff;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w2[2] &= 0x0000ffff;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w2[2] &= 0x00ffffff;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w2[3] &= 0x000000ff;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w2[3] &= 0x0000ffff;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w2[3] &= 0x00ffffff;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w3[0] &= 0x000000ff;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w3[0] &= 0x0000ffff;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w3[0] &= 0x00ffffff;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w3[1] &= 0x000000ff;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w3[1] &= 0x0000ffff;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w3[1] &= 0x00ffffff;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w3[2] &= 0x000000ff;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w3[2] &= 0x0000ffff;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w3[2] &= 0x00ffffff;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w3[3] &= 0x000000ff;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w3[3] &= 0x0000ffff;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w3[3] &= 0x00ffffff;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void truncate_block_16x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 len)
|
|
{
|
|
switch (len)
|
|
{
|
|
case 0:
|
|
w0[0] = 0;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] &= 0xff000000;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] &= 0xffff0000;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] &= 0xffffff00;
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = 0;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] &= 0xff000000;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] &= 0xffff0000;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] &= 0xffffff00;
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = 0;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] &= 0xff000000;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] &= 0xffff0000;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] &= 0xffffff00;
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = 0;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] &= 0xff000000;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] &= 0xffff0000;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] &= 0xffffff00;
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w1[0] = 0;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w1[0] &= 0xff000000;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w1[0] &= 0xffff0000;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w1[0] &= 0xffffff00;
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w1[1] = 0;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w1[1] &= 0xff000000;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w1[1] &= 0xffff0000;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w1[1] &= 0xffffff00;
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w1[2] = 0;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w1[2] &= 0xff000000;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w1[2] &= 0xffff0000;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w1[2] &= 0xffffff00;
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w1[3] = 0;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w1[3] &= 0xff000000;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w1[3] &= 0xffff0000;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w1[3] &= 0xffffff00;
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w2[0] = 0;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w2[0] &= 0xff000000;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w2[0] &= 0xffff0000;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w2[0] &= 0xffffff00;
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w2[1] = 0;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w2[1] &= 0xff000000;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w2[1] &= 0xffff0000;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w2[1] &= 0xffffff00;
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w2[2] = 0;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w2[2] &= 0xff000000;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w2[2] &= 0xffff0000;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w2[2] &= 0xffffff00;
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w2[3] = 0;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w2[3] &= 0xff000000;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w2[3] &= 0xffff0000;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w2[3] &= 0xffffff00;
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w3[0] = 0;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w3[0] &= 0xff000000;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w3[0] &= 0xffff0000;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w3[0] &= 0xffffff00;
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w3[1] = 0;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w3[1] &= 0xff000000;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w3[1] &= 0xffff0000;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w3[1] &= 0xffffff00;
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w3[2] = 0;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w3[2] &= 0xff000000;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w3[2] &= 0xffff0000;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w3[2] &= 0xffffff00;
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w3[3] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w3[3] &= 0xff000000;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w3[3] &= 0xffff0000;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w3[3] &= 0xffffff00;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x01010101 & c_append_helper[offset][0];
|
|
w0[1] |= 0x01010101 & c_append_helper[offset][1];
|
|
w0[2] |= 0x01010101 & c_append_helper[offset][2];
|
|
w0[3] |= 0x01010101 & c_append_helper[offset][3];
|
|
w1[0] |= 0x01010101 & c_append_helper[offset][4];
|
|
w1[1] |= 0x01010101 & c_append_helper[offset][5];
|
|
w1[2] |= 0x01010101 & c_append_helper[offset][6];
|
|
w1[3] |= 0x01010101 & c_append_helper[offset][7];
|
|
}
|
|
|
|
static void append_0x80_1x4_S (u32 w0[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][3];
|
|
}
|
|
|
|
static void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][7];
|
|
}
|
|
|
|
static void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][ 0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][ 1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][ 2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][ 3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][ 4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][ 5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][ 6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][ 7];
|
|
w2[0] |= 0x80808080 & c_append_helper[offset][ 8];
|
|
w2[1] |= 0x80808080 & c_append_helper[offset][ 9];
|
|
w2[2] |= 0x80808080 & c_append_helper[offset][10];
|
|
w2[3] |= 0x80808080 & c_append_helper[offset][11];
|
|
}
|
|
|
|
static void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
|
|
{
|
|
w0[0] |= 0x80808080 & c_append_helper[offset][ 0];
|
|
w0[1] |= 0x80808080 & c_append_helper[offset][ 1];
|
|
w0[2] |= 0x80808080 & c_append_helper[offset][ 2];
|
|
w0[3] |= 0x80808080 & c_append_helper[offset][ 3];
|
|
w1[0] |= 0x80808080 & c_append_helper[offset][ 4];
|
|
w1[1] |= 0x80808080 & c_append_helper[offset][ 5];
|
|
w1[2] |= 0x80808080 & c_append_helper[offset][ 6];
|
|
w1[3] |= 0x80808080 & c_append_helper[offset][ 7];
|
|
w2[0] |= 0x80808080 & c_append_helper[offset][ 8];
|
|
w2[1] |= 0x80808080 & c_append_helper[offset][ 9];
|
|
w2[2] |= 0x80808080 & c_append_helper[offset][10];
|
|
w2[3] |= 0x80808080 & c_append_helper[offset][11];
|
|
w3[0] |= 0x80808080 & c_append_helper[offset][12];
|
|
w3[1] |= 0x80808080 & c_append_helper[offset][13];
|
|
w3[2] |= 0x80808080 & c_append_helper[offset][14];
|
|
w3[3] |= 0x80808080 & c_append_helper[offset][15];
|
|
}
|
|
|
|
static void append_0x80_8x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
|
|
{
|
|
switch (offset)
|
|
{
|
|
case 0:
|
|
w0[0] = w0[0] | 0x80;
|
|
break;
|
|
|
|
case 1:
|
|
w0[0] = w0[0] | 0x8000;
|
|
break;
|
|
|
|
case 2:
|
|
w0[0] = w0[0] | 0x800000;
|
|
break;
|
|
|
|
case 3:
|
|
w0[0] = w0[0] | 0x80000000;
|
|
break;
|
|
|
|
case 4:
|
|
w0[1] = w0[1] | 0x80;
|
|
break;
|
|
|
|
case 5:
|
|
w0[1] = w0[1] | 0x8000;
|
|
break;
|
|
|
|
case 6:
|
|
w0[1] = w0[1] | 0x800000;
|
|
break;
|
|
|
|
case 7:
|
|
w0[1] = w0[1] | 0x80000000;
|
|
break;
|
|
|
|
case 8:
|
|
w0[2] = w0[2] | 0x80;
|
|
break;
|
|
|
|
case 9:
|
|
w0[2] = w0[2] | 0x8000;
|
|
break;
|
|
|
|
case 10:
|
|
w0[2] = w0[2] | 0x800000;
|
|
break;
|
|
|
|
case 11:
|
|
w0[2] = w0[2] | 0x80000000;
|
|
break;
|
|
|
|
case 12:
|
|
w0[3] = w0[3] | 0x80;
|
|
break;
|
|
|
|
case 13:
|
|
w0[3] = w0[3] | 0x8000;
|
|
break;
|
|
|
|
case 14:
|
|
w0[3] = w0[3] | 0x800000;
|
|
break;
|
|
|
|
case 15:
|
|
w0[3] = w0[3] | 0x80000000;
|
|
break;
|
|
|
|
case 16:
|
|
w1[0] = w1[0] | 0x80;
|
|
break;
|
|
|
|
case 17:
|
|
w1[0] = w1[0] | 0x8000;
|
|
break;
|
|
|
|
case 18:
|
|
w1[0] = w1[0] | 0x800000;
|
|
break;
|
|
|
|
case 19:
|
|
w1[0] = w1[0] | 0x80000000;
|
|
break;
|
|
|
|
case 20:
|
|
w1[1] = w1[1] | 0x80;
|
|
break;
|
|
|
|
case 21:
|
|
w1[1] = w1[1] | 0x8000;
|
|
break;
|
|
|
|
case 22:
|
|
w1[1] = w1[1] | 0x800000;
|
|
break;
|
|
|
|
case 23:
|
|
w1[1] = w1[1] | 0x80000000;
|
|
break;
|
|
|
|
case 24:
|
|
w1[2] = w1[2] | 0x80;
|
|
break;
|
|
|
|
case 25:
|
|
w1[2] = w1[2] | 0x8000;
|
|
break;
|
|
|
|
case 26:
|
|
w1[2] = w1[2] | 0x800000;
|
|
break;
|
|
|
|
case 27:
|
|
w1[2] = w1[2] | 0x80000000;
|
|
break;
|
|
|
|
case 28:
|
|
w1[3] = w1[3] | 0x80;
|
|
break;
|
|
|
|
case 29:
|
|
w1[3] = w1[3] | 0x8000;
|
|
break;
|
|
|
|
case 30:
|
|
w1[3] = w1[3] | 0x800000;
|
|
break;
|
|
|
|
case 31:
|
|
w1[3] = w1[3] | 0x80000000;
|
|
break;
|
|
|
|
case 32:
|
|
w2[0] = w2[0] | 0x80;
|
|
break;
|
|
|
|
case 33:
|
|
w2[0] = w2[0] | 0x8000;
|
|
break;
|
|
|
|
case 34:
|
|
w2[0] = w2[0] | 0x800000;
|
|
break;
|
|
|
|
case 35:
|
|
w2[0] = w2[0] | 0x80000000;
|
|
break;
|
|
|
|
case 36:
|
|
w2[1] = w2[1] | 0x80;
|
|
break;
|
|
|
|
case 37:
|
|
w2[1] = w2[1] | 0x8000;
|
|
break;
|
|
|
|
case 38:
|
|
w2[1] = w2[1] | 0x800000;
|
|
break;
|
|
|
|
case 39:
|
|
w2[1] = w2[1] | 0x80000000;
|
|
break;
|
|
|
|
case 40:
|
|
w2[2] = w2[2] | 0x80;
|
|
break;
|
|
|
|
case 41:
|
|
w2[2] = w2[2] | 0x8000;
|
|
break;
|
|
|
|
case 42:
|
|
w2[2] = w2[2] | 0x800000;
|
|
break;
|
|
|
|
case 43:
|
|
w2[2] = w2[2] | 0x80000000;
|
|
break;
|
|
|
|
case 44:
|
|
w2[3] = w2[3] | 0x80;
|
|
break;
|
|
|
|
case 45:
|
|
w2[3] = w2[3] | 0x8000;
|
|
break;
|
|
|
|
case 46:
|
|
w2[3] = w2[3] | 0x800000;
|
|
break;
|
|
|
|
case 47:
|
|
w2[3] = w2[3] | 0x80000000;
|
|
break;
|
|
|
|
case 48:
|
|
w3[0] = w3[0] | 0x80;
|
|
break;
|
|
|
|
case 49:
|
|
w3[0] = w3[0] | 0x8000;
|
|
break;
|
|
|
|
case 50:
|
|
w3[0] = w3[0] | 0x800000;
|
|
break;
|
|
|
|
case 51:
|
|
w3[0] = w3[0] | 0x80000000;
|
|
break;
|
|
|
|
case 52:
|
|
w3[1] = w3[1] | 0x80;
|
|
break;
|
|
|
|
case 53:
|
|
w3[1] = w3[1] | 0x8000;
|
|
break;
|
|
|
|
case 54:
|
|
w3[1] = w3[1] | 0x800000;
|
|
break;
|
|
|
|
case 55:
|
|
w3[1] = w3[1] | 0x80000000;
|
|
break;
|
|
|
|
case 56:
|
|
w3[2] = w3[2] | 0x80;
|
|
break;
|
|
|
|
case 57:
|
|
w3[2] = w3[2] | 0x8000;
|
|
break;
|
|
|
|
case 58:
|
|
w3[2] = w3[2] | 0x800000;
|
|
break;
|
|
|
|
case 59:
|
|
w3[2] = w3[2] | 0x80000000;
|
|
break;
|
|
|
|
case 60:
|
|
w3[3] = w3[3] | 0x80;
|
|
break;
|
|
|
|
case 61:
|
|
w3[3] = w3[3] | 0x8000;
|
|
break;
|
|
|
|
case 62:
|
|
w3[3] = w3[3] | 0x800000;
|
|
break;
|
|
|
|
case 63:
|
|
w3[3] = w3[3] | 0x80000000;
|
|
break;
|
|
|
|
case 64:
|
|
w4[0] = w4[0] | 0x80;
|
|
break;
|
|
|
|
case 65:
|
|
w4[0] = w4[0] | 0x8000;
|
|
break;
|
|
|
|
case 66:
|
|
w4[0] = w4[0] | 0x800000;
|
|
break;
|
|
|
|
case 67:
|
|
w4[0] = w4[0] | 0x80000000;
|
|
break;
|
|
|
|
case 68:
|
|
w4[1] = w4[1] | 0x80;
|
|
break;
|
|
|
|
case 69:
|
|
w4[1] = w4[1] | 0x8000;
|
|
break;
|
|
|
|
case 70:
|
|
w4[1] = w4[1] | 0x800000;
|
|
break;
|
|
|
|
case 71:
|
|
w4[1] = w4[1] | 0x80000000;
|
|
break;
|
|
|
|
case 72:
|
|
w4[2] = w4[2] | 0x80;
|
|
break;
|
|
|
|
case 73:
|
|
w4[2] = w4[2] | 0x8000;
|
|
break;
|
|
|
|
case 74:
|
|
w4[2] = w4[2] | 0x800000;
|
|
break;
|
|
|
|
case 75:
|
|
w4[2] = w4[2] | 0x80000000;
|
|
break;
|
|
|
|
case 76:
|
|
w4[3] = w4[3] | 0x80;
|
|
break;
|
|
|
|
case 77:
|
|
w4[3] = w4[3] | 0x8000;
|
|
break;
|
|
|
|
case 78:
|
|
w4[3] = w4[3] | 0x800000;
|
|
break;
|
|
|
|
case 79:
|
|
w4[3] = w4[3] | 0x80000000;
|
|
break;
|
|
|
|
case 80:
|
|
w5[0] = w5[0] | 0x80;
|
|
break;
|
|
|
|
case 81:
|
|
w5[0] = w5[0] | 0x8000;
|
|
break;
|
|
|
|
case 82:
|
|
w5[0] = w5[0] | 0x800000;
|
|
break;
|
|
|
|
case 83:
|
|
w5[0] = w5[0] | 0x80000000;
|
|
break;
|
|
|
|
case 84:
|
|
w5[1] = w5[1] | 0x80;
|
|
break;
|
|
|
|
case 85:
|
|
w5[1] = w5[1] | 0x8000;
|
|
break;
|
|
|
|
case 86:
|
|
w5[1] = w5[1] | 0x800000;
|
|
break;
|
|
|
|
case 87:
|
|
w5[1] = w5[1] | 0x80000000;
|
|
break;
|
|
|
|
case 88:
|
|
w5[2] = w5[2] | 0x80;
|
|
break;
|
|
|
|
case 89:
|
|
w5[2] = w5[2] | 0x8000;
|
|
break;
|
|
|
|
case 90:
|
|
w5[2] = w5[2] | 0x800000;
|
|
break;
|
|
|
|
case 91:
|
|
w5[2] = w5[2] | 0x80000000;
|
|
break;
|
|
|
|
case 92:
|
|
w5[3] = w5[3] | 0x80;
|
|
break;
|
|
|
|
case 93:
|
|
w5[3] = w5[3] | 0x8000;
|
|
break;
|
|
|
|
case 94:
|
|
w5[3] = w5[3] | 0x800000;
|
|
break;
|
|
|
|
case 95:
|
|
w5[3] = w5[3] | 0x80000000;
|
|
break;
|
|
|
|
case 96:
|
|
w6[0] = w6[0] | 0x80;
|
|
break;
|
|
|
|
case 97:
|
|
w6[0] = w6[0] | 0x8000;
|
|
break;
|
|
|
|
case 98:
|
|
w6[0] = w6[0] | 0x800000;
|
|
break;
|
|
|
|
case 99:
|
|
w6[0] = w6[0] | 0x80000000;
|
|
break;
|
|
|
|
case 100:
|
|
w6[1] = w6[1] | 0x80;
|
|
break;
|
|
|
|
case 101:
|
|
w6[1] = w6[1] | 0x8000;
|
|
break;
|
|
|
|
case 102:
|
|
w6[1] = w6[1] | 0x800000;
|
|
break;
|
|
|
|
case 103:
|
|
w6[1] = w6[1] | 0x80000000;
|
|
break;
|
|
|
|
case 104:
|
|
w6[2] = w6[2] | 0x80;
|
|
break;
|
|
|
|
case 105:
|
|
w6[2] = w6[2] | 0x8000;
|
|
break;
|
|
|
|
case 106:
|
|
w6[2] = w6[2] | 0x800000;
|
|
break;
|
|
|
|
case 107:
|
|
w6[2] = w6[2] | 0x80000000;
|
|
break;
|
|
|
|
case 108:
|
|
w6[3] = w6[3] | 0x80;
|
|
break;
|
|
|
|
case 109:
|
|
w6[3] = w6[3] | 0x8000;
|
|
break;
|
|
|
|
case 110:
|
|
w6[3] = w6[3] | 0x800000;
|
|
break;
|
|
|
|
case 111:
|
|
w6[3] = w6[3] | 0x80000000;
|
|
break;
|
|
|
|
case 112:
|
|
w7[0] = w7[0] | 0x80;
|
|
break;
|
|
|
|
case 113:
|
|
w7[0] = w7[0] | 0x8000;
|
|
break;
|
|
|
|
case 114:
|
|
w7[0] = w7[0] | 0x800000;
|
|
break;
|
|
|
|
case 115:
|
|
w7[0] = w7[0] | 0x80000000;
|
|
break;
|
|
|
|
case 116:
|
|
w7[1] = w7[1] | 0x80;
|
|
break;
|
|
|
|
case 117:
|
|
w7[1] = w7[1] | 0x8000;
|
|
break;
|
|
|
|
case 118:
|
|
w7[1] = w7[1] | 0x800000;
|
|
break;
|
|
|
|
case 119:
|
|
w7[1] = w7[1] | 0x80000000;
|
|
break;
|
|
|
|
case 120:
|
|
w7[2] = w7[2] | 0x80;
|
|
break;
|
|
|
|
case 121:
|
|
w7[2] = w7[2] | 0x8000;
|
|
break;
|
|
|
|
case 122:
|
|
w7[2] = w7[2] | 0x800000;
|
|
break;
|
|
|
|
case 123:
|
|
w7[2] = w7[2] | 0x80000000;
|
|
break;
|
|
|
|
case 124:
|
|
w7[3] = w7[3] | 0x80;
|
|
break;
|
|
|
|
case 125:
|
|
w7[3] = w7[3] | 0x8000;
|
|
break;
|
|
|
|
case 126:
|
|
w7[3] = w7[3] | 0x800000;
|
|
break;
|
|
|
|
case 127:
|
|
w7[3] = w7[3] | 0x80000000;
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void make_utf16be_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm_S (in[3], 0, 0x3727);
|
|
out2[2] = __byte_perm_S (in[3], 0, 0x1707);
|
|
out2[1] = __byte_perm_S (in[2], 0, 0x3727);
|
|
out2[0] = __byte_perm_S (in[2], 0, 0x1707);
|
|
out1[3] = __byte_perm_S (in[1], 0, 0x3727);
|
|
out1[2] = __byte_perm_S (in[1], 0, 0x1707);
|
|
out1[1] = __byte_perm_S (in[0], 0, 0x3727);
|
|
out1[0] = __byte_perm_S (in[0], 0, 0x1707);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm_S (in[3], 0, 0x03070207);
|
|
out2[2] = __byte_perm_S (in[3], 0, 0x01070007);
|
|
out2[1] = __byte_perm_S (in[2], 0, 0x03070207);
|
|
out2[0] = __byte_perm_S (in[2], 0, 0x01070007);
|
|
out1[3] = __byte_perm_S (in[1], 0, 0x03070207);
|
|
out1[2] = __byte_perm_S (in[1], 0, 0x01070007);
|
|
out1[1] = __byte_perm_S (in[0], 0, 0x03070207);
|
|
out1[0] = __byte_perm_S (in[0], 0, 0x01070007);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00);
|
|
out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00);
|
|
out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00);
|
|
out2[0] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00);
|
|
out1[3] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00);
|
|
out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00);
|
|
out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00);
|
|
out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void make_utf16le_S (const u32 in[4], u32 out1[4], u32 out2[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out2[3] = __byte_perm_S (in[3], 0, 0x7372);
|
|
out2[2] = __byte_perm_S (in[3], 0, 0x7170);
|
|
out2[1] = __byte_perm_S (in[2], 0, 0x7372);
|
|
out2[0] = __byte_perm_S (in[2], 0, 0x7170);
|
|
out1[3] = __byte_perm_S (in[1], 0, 0x7372);
|
|
out1[2] = __byte_perm_S (in[1], 0, 0x7170);
|
|
out1[1] = __byte_perm_S (in[0], 0, 0x7372);
|
|
out1[0] = __byte_perm_S (in[0], 0, 0x7170);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out2[3] = __byte_perm_S (in[3], 0, 0x07030702);
|
|
out2[2] = __byte_perm_S (in[3], 0, 0x07010700);
|
|
out2[1] = __byte_perm_S (in[2], 0, 0x07030702);
|
|
out2[0] = __byte_perm_S (in[2], 0, 0x07010700);
|
|
out1[3] = __byte_perm_S (in[1], 0, 0x07030702);
|
|
out1[2] = __byte_perm_S (in[1], 0, 0x07010700);
|
|
out1[1] = __byte_perm_S (in[0], 0, 0x07030702);
|
|
out1[0] = __byte_perm_S (in[0], 0, 0x07010700);
|
|
|
|
#else
|
|
|
|
out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF);
|
|
out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF);
|
|
out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF);
|
|
out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF);
|
|
out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF);
|
|
out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF);
|
|
out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF);
|
|
out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void undo_utf16be_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out[0] = __byte_perm_S (in1[0], in1[1], 0x4602);
|
|
out[1] = __byte_perm_S (in1[2], in1[3], 0x4602);
|
|
out[2] = __byte_perm_S (in2[0], in2[1], 0x4602);
|
|
out[3] = __byte_perm_S (in2[2], in2[3], 0x4602);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out[0] = __byte_perm_S (in1[0], in1[1], 0x04060002);
|
|
out[1] = __byte_perm_S (in1[2], in1[3], 0x04060002);
|
|
out[2] = __byte_perm_S (in2[0], in2[1], 0x04060002);
|
|
out[3] = __byte_perm_S (in2[2], in2[3], 0x04060002);
|
|
|
|
#else
|
|
|
|
out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16)
|
|
| ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0);
|
|
out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16)
|
|
| ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0);
|
|
out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16)
|
|
| ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0);
|
|
out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16)
|
|
| ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
|
|
{
|
|
#if defined IS_NV
|
|
|
|
out[0] = __byte_perm_S (in1[0], in1[1], 0x6420);
|
|
out[1] = __byte_perm_S (in1[2], in1[3], 0x6420);
|
|
out[2] = __byte_perm_S (in2[0], in2[1], 0x6420);
|
|
out[3] = __byte_perm_S (in2[2], in2[3], 0x6420);
|
|
|
|
#elif defined IS_AMD_ROCM
|
|
|
|
out[0] = __byte_perm_S (in1[0], in1[1], 0x06040200);
|
|
out[1] = __byte_perm_S (in1[2], in1[3], 0x06040200);
|
|
out[2] = __byte_perm_S (in2[0], in2[1], 0x06040200);
|
|
out[3] = __byte_perm_S (in2[2], in2[3], 0x06040200);
|
|
|
|
#else
|
|
|
|
out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8)
|
|
| ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8);
|
|
out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8)
|
|
| ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8);
|
|
out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8)
|
|
| ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8);
|
|
out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8)
|
|
| ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD || defined IS_GENERIC
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
c0[0] = swap32_S (c0[0]);
|
|
c0[1] = swap32_S (c0[1]);
|
|
c0[2] = swap32_S (c0[2]);
|
|
c0[3] = swap32_S (c0[3]);
|
|
c1[0] = swap32_S (c1[0]);
|
|
c1[1] = swap32_S (c1[1]);
|
|
c1[2] = swap32_S (c1[2]);
|
|
c1[3] = swap32_S (c1[3]);
|
|
c2[0] = swap32_S (c2[0]);
|
|
c2[1] = swap32_S (c2[1]);
|
|
c2[2] = swap32_S (c2[2]);
|
|
c2[3] = swap32_S (c2[3]);
|
|
c3[0] = swap32_S (c3[0]);
|
|
c3[1] = swap32_S (c3[1]);
|
|
c3[2] = swap32_S (c3[2]);
|
|
c3[3] = swap32_S (c3[3]);
|
|
#endif
|
|
|
|
#ifdef IS_NV
|
|
// todo
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w1[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w0[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w0[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w0[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w0[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[0] = w0[1];
|
|
w0[1] = w0[2];
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w1[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w0[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[1] = w0[2];
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w0[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[2] = w0[3];
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w0[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w0[3] = w1[0];
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w1[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[0] = w1[1];
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w1[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[1] = w1[2];
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w1[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[2] = w1[3];
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w1[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w1[3] = w2[0];
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w2[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[0] = w2[1];
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w2[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[1] = w2[2];
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w2[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[2] = w2[3];
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c2[2] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w2[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w2[3] = w3[0];
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c2[3] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c2[2] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w3[0] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[0] = w3[1];
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c3[0] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c2[3] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c2[2] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w3[1] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[1] = w3[2];
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c3[1] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c3[0] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c2[3] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c2[2] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w3[2] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[2] = w3[3];
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = c3[2];
|
|
c3[2] = 0;
|
|
}
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign_S ( 0, w3[3], offset_minus_4);
|
|
c3[2] = amd_bytealign_S (w3[3], w3[2], offset_minus_4);
|
|
c3[1] = amd_bytealign_S (w3[2], w3[1], offset_minus_4);
|
|
c3[0] = amd_bytealign_S (w3[1], w3[0], offset_minus_4);
|
|
c2[3] = amd_bytealign_S (w3[0], w2[3], offset_minus_4);
|
|
c2[2] = amd_bytealign_S (w2[3], w2[2], offset_minus_4);
|
|
c2[1] = amd_bytealign_S (w2[2], w2[1], offset_minus_4);
|
|
c2[0] = amd_bytealign_S (w2[1], w2[0], offset_minus_4);
|
|
c1[3] = amd_bytealign_S (w2[0], w1[3], offset_minus_4);
|
|
c1[2] = amd_bytealign_S (w1[3], w1[2], offset_minus_4);
|
|
c1[1] = amd_bytealign_S (w1[2], w1[1], offset_minus_4);
|
|
c1[0] = amd_bytealign_S (w1[1], w1[0], offset_minus_4);
|
|
c0[3] = amd_bytealign_S (w1[0], w0[3], offset_minus_4);
|
|
c0[2] = amd_bytealign_S (w0[3], w0[2], offset_minus_4);
|
|
c0[1] = amd_bytealign_S (w0[2], w0[1], offset_minus_4);
|
|
c0[0] = amd_bytealign_S (w0[1], w0[0], offset_minus_4);
|
|
w3[3] = amd_bytealign_S (w0[0], 0, offset_minus_4);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
if (offset_mod_4 == 0)
|
|
{
|
|
w3[3] = c0[0];
|
|
c0[0] = c0[1];
|
|
c0[1] = c0[2];
|
|
c0[2] = c0[3];
|
|
c0[3] = c1[0];
|
|
c1[0] = c1[1];
|
|
c1[1] = c1[2];
|
|
c1[2] = c1[3];
|
|
c1[3] = c2[0];
|
|
c2[0] = c2[1];
|
|
c2[1] = c2[2];
|
|
c2[2] = c2[3];
|
|
c2[3] = c3[0];
|
|
c3[0] = c3[1];
|
|
c3[1] = c3[2];
|
|
c3[2] = c3[3];
|
|
c3[3] = 0;
|
|
}
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w3[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm_S (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w3[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w3[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w3[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w3[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w3[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w3[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w3[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w3[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w3[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w3[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w3[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w3[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w3[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w3[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w3[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c0[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c1[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign_S (w3[3], 0, offset);
|
|
c2[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign_S (w3[3], 0, offset);
|
|
c3[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = __byte_perm_S ( 0, w3[3], selector);
|
|
w3[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm_S (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = __byte_perm_S ( 0, w3[3], selector);
|
|
c0[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = __byte_perm_S ( 0, w3[3], selector);
|
|
c0[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = __byte_perm_S ( 0, w3[3], selector);
|
|
c0[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = __byte_perm_S ( 0, w3[3], selector);
|
|
c0[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = __byte_perm_S ( 0, w3[3], selector);
|
|
c1[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = __byte_perm_S ( 0, w3[3], selector);
|
|
c1[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = __byte_perm_S ( 0, w3[3], selector);
|
|
c1[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = __byte_perm_S ( 0, w3[3], selector);
|
|
c1[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = __byte_perm_S ( 0, w3[3], selector);
|
|
c2[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = __byte_perm_S ( 0, w3[3], selector);
|
|
c2[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = __byte_perm_S ( 0, w3[3], selector);
|
|
c2[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = __byte_perm_S ( 0, w3[3], selector);
|
|
c2[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = __byte_perm_S ( 0, w3[3], selector);
|
|
c3[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = __byte_perm_S ( 0, w3[3], selector);
|
|
c3[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c3[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c2[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c1[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
c0[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = __byte_perm_S ( 0, w3[3], selector);
|
|
c3[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c3[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c3[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c2[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c2[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c1[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c1[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
c0[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
c0[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
w4[0] = swap32_S (w4[0]);
|
|
w4[1] = swap32_S (w4[1]);
|
|
w4[2] = swap32_S (w4[2]);
|
|
w4[3] = swap32_S (w4[3]);
|
|
w5[0] = swap32_S (w5[0]);
|
|
w5[1] = swap32_S (w5[1]);
|
|
w5[2] = swap32_S (w5[2]);
|
|
w5[3] = swap32_S (w5[3]);
|
|
w6[0] = swap32_S (w6[0]);
|
|
w6[1] = swap32_S (w6[1]);
|
|
w6[2] = swap32_S (w6[2]);
|
|
w6[3] = swap32_S (w6[3]);
|
|
w7[0] = swap32_S (w7[0]);
|
|
w7[1] = swap32_S (w7[1]);
|
|
w7[2] = swap32_S (w7[2]);
|
|
w7[3] = swap32_S (w7[3]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
w0[0] = swap32_S (w0[0]);
|
|
w0[1] = swap32_S (w0[1]);
|
|
w0[2] = swap32_S (w0[2]);
|
|
w0[3] = swap32_S (w0[3]);
|
|
w1[0] = swap32_S (w1[0]);
|
|
w1[1] = swap32_S (w1[1]);
|
|
w1[2] = swap32_S (w1[2]);
|
|
w1[3] = swap32_S (w1[3]);
|
|
w2[0] = swap32_S (w2[0]);
|
|
w2[1] = swap32_S (w2[1]);
|
|
w2[2] = swap32_S (w2[2]);
|
|
w2[3] = swap32_S (w2[3]);
|
|
w3[0] = swap32_S (w3[0]);
|
|
w3[1] = swap32_S (w3[1]);
|
|
w3[2] = swap32_S (w3[2]);
|
|
w3[3] = swap32_S (w3[3]);
|
|
w4[0] = swap32_S (w4[0]);
|
|
w4[1] = swap32_S (w4[1]);
|
|
w4[2] = swap32_S (w4[2]);
|
|
w4[3] = swap32_S (w4[3]);
|
|
w5[0] = swap32_S (w5[0]);
|
|
w5[1] = swap32_S (w5[1]);
|
|
w5[2] = swap32_S (w5[2]);
|
|
w5[3] = swap32_S (w5[3]);
|
|
w6[0] = swap32_S (w6[0]);
|
|
w6[1] = swap32_S (w6[1]);
|
|
w6[2] = swap32_S (w6[2]);
|
|
w6[3] = swap32_S (w6[3]);
|
|
w7[0] = swap32_S (w7[0]);
|
|
w7[1] = swap32_S (w7[1]);
|
|
w7[2] = swap32_S (w7[2]);
|
|
w7[3] = swap32_S (w7[3]);
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = __byte_perm_S (w7[2], w7[3], selector);
|
|
w7[2] = __byte_perm_S (w7[1], w7[2], selector);
|
|
w7[1] = __byte_perm_S (w7[0], w7[1], selector);
|
|
w7[0] = __byte_perm_S (w6[3], w7[0], selector);
|
|
w6[3] = __byte_perm_S (w6[2], w6[3], selector);
|
|
w6[2] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w6[1] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w6[0] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w5[3] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w5[2] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w5[1] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w5[0] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w4[3] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w4[2] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w4[1] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w4[0] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w3[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = __byte_perm_S (w7[1], w7[2], selector);
|
|
w7[2] = __byte_perm_S (w7[0], w7[1], selector);
|
|
w7[1] = __byte_perm_S (w6[3], w7[0], selector);
|
|
w7[0] = __byte_perm_S (w6[2], w6[3], selector);
|
|
w6[3] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w6[2] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w6[1] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w6[0] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w5[3] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w5[2] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w5[1] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w5[0] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w4[3] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w4[2] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w4[1] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w4[0] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w3[3] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = __byte_perm_S (w7[0], w7[1], selector);
|
|
w7[2] = __byte_perm_S (w6[3], w7[0], selector);
|
|
w7[1] = __byte_perm_S (w6[2], w6[3], selector);
|
|
w7[0] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w6[3] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w6[2] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w6[1] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w6[0] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w5[3] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w5[2] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w5[1] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w5[0] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w4[3] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w4[2] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w4[1] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w4[0] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w3[3] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = __byte_perm_S (w6[3], w7[0], selector);
|
|
w7[2] = __byte_perm_S (w6[2], w6[3], selector);
|
|
w7[1] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w7[0] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w6[3] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w6[2] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w6[1] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w6[0] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w5[3] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w5[2] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w5[1] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w5[0] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w4[3] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w4[2] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w4[1] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w4[0] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w3[3] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w0[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = __byte_perm_S (w6[2], w6[3], selector);
|
|
w7[2] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w7[1] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w7[0] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w6[3] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w6[2] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w6[1] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w6[0] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w5[3] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w5[2] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w5[1] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w5[0] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w4[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w4[2] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w4[1] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w4[0] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w3[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = __byte_perm_S (w6[1], w6[2], selector);
|
|
w7[2] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w7[1] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w7[0] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w6[3] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w6[2] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w6[1] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w6[0] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w5[3] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w5[2] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w5[1] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w5[0] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w4[3] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w4[2] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w4[1] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w4[0] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w3[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = __byte_perm_S (w6[0], w6[1], selector);
|
|
w7[2] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w7[1] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w7[0] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w6[3] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w6[2] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w6[1] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w6[0] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w5[3] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w5[2] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w5[1] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w5[0] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w4[3] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w4[2] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w4[1] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w4[0] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w3[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = __byte_perm_S (w5[3], w6[0], selector);
|
|
w7[2] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w7[1] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w7[0] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w6[3] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w6[2] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w6[1] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w6[0] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w5[3] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w5[2] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w5[1] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w5[0] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w4[3] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w4[2] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w4[1] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w4[0] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w3[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w1[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = __byte_perm_S (w5[2], w5[3], selector);
|
|
w7[2] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w7[1] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w7[0] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w6[3] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w6[2] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w6[1] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w6[0] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w5[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w5[2] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w5[1] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w5[0] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w4[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w4[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w4[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w4[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w3[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = __byte_perm_S (w5[1], w5[2], selector);
|
|
w7[2] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w7[1] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w7[0] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w6[3] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w6[2] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w6[1] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w6[0] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w5[3] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w5[2] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w5[1] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w5[0] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w4[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w4[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w4[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w4[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w3[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = __byte_perm_S (w5[0], w5[1], selector);
|
|
w7[2] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w7[1] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w7[0] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w6[3] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w6[2] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w6[1] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w6[0] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w5[3] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w5[2] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w5[1] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w5[0] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w4[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w4[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w4[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w4[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w3[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = __byte_perm_S (w4[3], w5[0], selector);
|
|
w7[2] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w7[1] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w7[0] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w6[3] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w6[2] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w6[1] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w6[0] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w5[3] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w5[2] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w5[1] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w5[0] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w4[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w4[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w4[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w4[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w3[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w2[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = __byte_perm_S (w4[2], w4[3], selector);
|
|
w7[2] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w7[1] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w7[0] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w6[3] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w6[2] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w6[1] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w6[0] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w5[3] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w5[2] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w5[1] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w5[0] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w4[3] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w4[2] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w4[1] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w4[0] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w3[3] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[0] = __byte_perm_S ( 0, w0[0], selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = __byte_perm_S (w4[1], w4[2], selector);
|
|
w7[2] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w7[1] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w7[0] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w6[3] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w6[2] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w6[1] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w6[0] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w5[3] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w5[2] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w5[1] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w5[0] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w4[3] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w4[2] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w4[1] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w4[0] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w3[3] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[1] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = __byte_perm_S (w4[0], w4[1], selector);
|
|
w7[2] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w7[1] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w7[0] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w6[3] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w6[2] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w6[1] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w6[0] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w5[3] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w5[2] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w5[1] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w5[0] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w4[3] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w4[2] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w4[1] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w4[0] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w3[3] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[2] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = __byte_perm_S (w3[3], w4[0], selector);
|
|
w7[2] = __byte_perm_S (w3[2], w3[3], selector);
|
|
w7[1] = __byte_perm_S (w3[1], w3[2], selector);
|
|
w7[0] = __byte_perm_S (w3[0], w3[1], selector);
|
|
w6[3] = __byte_perm_S (w2[3], w3[0], selector);
|
|
w6[2] = __byte_perm_S (w2[2], w2[3], selector);
|
|
w6[1] = __byte_perm_S (w2[1], w2[2], selector);
|
|
w6[0] = __byte_perm_S (w2[0], w2[1], selector);
|
|
w5[3] = __byte_perm_S (w1[3], w2[0], selector);
|
|
w5[2] = __byte_perm_S (w1[2], w1[3], selector);
|
|
w5[1] = __byte_perm_S (w1[1], w1[2], selector);
|
|
w5[0] = __byte_perm_S (w1[0], w1[1], selector);
|
|
w4[3] = __byte_perm_S (w0[3], w1[0], selector);
|
|
w4[2] = __byte_perm_S (w0[2], w0[3], selector);
|
|
w4[1] = __byte_perm_S (w0[1], w0[2], selector);
|
|
w4[0] = __byte_perm_S (w0[0], w0[1], selector);
|
|
w3[3] = __byte_perm_S ( 0, w0[0], selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w7[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
w7[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
w7[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w6[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w6[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w5[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w5[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w4[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w4[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w3[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm_S (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w7[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
w7[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w6[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w5[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w4[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w7[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w7[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w7[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w7[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w7[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w7[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w7[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w7[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w7[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w7[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w7[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w7[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w7[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w7[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w7[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w7[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w7[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w7[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w7[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w7[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w7[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w7[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w7[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w7[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w7[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w7[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w7[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w7[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w7[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w7[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], u32 c0[4], u32 c1[4], u32 c2[4], u32 c3[4], u32 c4[4], u32 c5[4], u32 c6[4], u32 c7[4], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
w7[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
w7[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w6[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w5[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w4[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w3[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w2[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w1[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w0[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c0[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
w7[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w6[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w5[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w4[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w3[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w2[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w1[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w0[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c0[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c0[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
w7[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w6[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w5[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w4[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w3[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w2[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w1[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w0[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c0[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c0[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c0[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
w7[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w6[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w5[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w4[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w3[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w2[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w1[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w0[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c0[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c0[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c0[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c0[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
w7[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w6[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w5[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w4[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w3[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w2[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w1[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c1[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c0[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c0[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c0[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c0[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
w7[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w6[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w5[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w4[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w3[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w2[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w1[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c1[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c1[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c0[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c0[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c0[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c0[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
w7[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w6[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w5[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w4[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w3[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w2[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w1[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c1[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c1[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c1[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c0[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c0[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c0[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c0[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
w7[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w6[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w5[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w4[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w3[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w2[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w1[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c1[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c1[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c1[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c1[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c0[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c0[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c0[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c0[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
w7[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w6[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w5[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w4[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w3[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w2[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c2[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c1[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c1[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c1[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c1[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c0[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c0[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c0[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c0[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
w7[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w6[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w5[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w4[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w3[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w2[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c2[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c2[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c1[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c1[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c1[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c1[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c0[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c0[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c0[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c0[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
w7[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w6[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w5[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w4[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w3[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w2[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c2[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c2[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c2[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c1[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c1[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c1[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c1[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c0[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c0[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c0[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c0[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
w7[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w6[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w5[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w4[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w3[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w2[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c2[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c2[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c2[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c2[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c1[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c1[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c1[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c1[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c0[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c0[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c0[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c0[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
w7[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w6[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w5[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w4[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w3[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c3[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c2[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c2[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c2[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c2[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c1[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c1[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c1[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c1[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c0[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c0[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c0[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c0[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
w7[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w6[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w5[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w4[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w3[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c3[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c3[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c2[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c2[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c2[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c2[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c1[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c1[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c1[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c1[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c0[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c0[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c0[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c0[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
w7[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w6[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w5[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w4[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w3[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c3[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c3[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c3[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c2[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c2[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c2[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c2[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c1[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c1[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c1[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c1[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c0[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c0[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c0[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c0[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
w7[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w6[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w5[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w4[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w3[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
c4[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c3[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c3[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c3[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c3[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c2[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c2[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c2[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c2[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c1[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c1[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c1[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c1[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c0[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c0[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c0[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c0[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
w7[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w6[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w5[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w4[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
c4[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c4[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c3[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c3[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c3[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c3[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c2[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c2[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c2[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c2[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c1[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c1[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c1[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c1[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c0[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c0[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c0[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c0[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
w7[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w6[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w5[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w4[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
c4[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c4[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c4[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c3[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c3[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c3[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c3[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c2[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c2[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c2[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c2[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c1[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c1[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c1[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c1[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c0[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c0[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c0[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
w7[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w6[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w5[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w4[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
c4[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c4[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c4[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c4[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c3[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c3[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c3[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c3[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c2[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c2[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c2[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c2[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c1[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c1[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c1[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c1[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c0[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c0[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
w7[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w6[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w5[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w4[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
c5[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c4[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c4[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c4[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c4[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c3[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c3[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c3[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c3[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c2[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c2[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c2[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c2[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c1[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c1[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c1[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c1[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c0[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
w7[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w6[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w5[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
c5[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c5[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c4[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c4[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c4[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c4[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c3[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c3[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c3[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c3[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c2[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c2[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c2[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c2[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c1[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c1[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c1[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c1[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c0[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
w7[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w6[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w5[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
c5[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c5[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c5[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c4[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c4[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c4[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c4[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c3[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c3[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c3[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c3[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c2[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c2[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c2[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c2[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c1[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c1[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c1[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c0[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
w7[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w6[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w5[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
c5[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c5[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c5[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c5[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c4[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c4[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c4[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c4[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c3[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c3[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c3[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c3[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c2[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c2[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c2[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c2[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c1[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c1[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c0[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
w7[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w6[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w5[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
c6[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c5[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c5[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c5[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c5[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c4[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c4[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c4[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c4[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c3[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c3[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c3[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c3[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c2[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c2[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c2[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c2[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c1[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c0[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
w7[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w6[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
c6[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c6[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c5[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c5[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c5[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c5[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c4[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c4[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c4[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c4[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c3[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c3[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c3[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c3[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c2[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c2[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c2[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c2[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c1[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c0[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
w7[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w6[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
c6[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c6[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c6[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c5[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c5[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c5[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c5[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c4[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c4[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c4[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c4[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c3[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c3[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c3[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c3[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c2[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c2[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c2[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c1[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c0[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
w7[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w6[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
c6[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c6[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c6[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c6[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c5[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c5[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c5[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c5[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c4[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c4[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c4[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c4[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c3[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c3[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c3[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c3[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c2[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c2[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c1[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c0[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
w7[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w6[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
c7[0] = amd_bytealign_S (w7[3], 0, offset);
|
|
c6[3] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c6[2] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c6[1] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c6[0] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c5[3] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c5[2] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c5[1] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c5[0] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c4[3] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c4[2] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c4[1] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c4[0] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c3[3] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c3[2] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c3[1] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c3[0] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c2[3] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[2] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[1] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[0] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c1[3] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[2] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[1] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[0] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c0[3] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[2] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[1] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[0] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
w7[3] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[2] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[1] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[0] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
c7[1] = amd_bytealign_S (w7[3], 0, offset);
|
|
c7[0] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c6[3] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c6[2] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c6[1] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c6[0] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c5[3] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c5[2] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c5[1] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c5[0] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c4[3] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c4[2] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c4[1] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c4[0] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c3[3] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c3[2] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c3[1] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c3[0] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c2[3] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[2] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[1] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[0] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c1[3] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[2] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[1] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[0] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c0[3] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[2] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[1] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[0] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
w7[3] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[2] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[1] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
c7[2] = amd_bytealign_S (w7[3], 0, offset);
|
|
c7[1] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c7[0] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c6[3] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c6[2] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c6[1] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c6[0] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c5[3] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c5[2] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c5[1] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c5[0] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c4[3] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c4[2] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c4[1] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c4[0] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c3[3] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c3[2] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c3[1] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[0] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c2[3] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[2] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[1] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[0] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c1[3] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[2] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[1] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[0] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c0[3] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[2] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[1] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[0] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
w7[3] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[2] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
c7[3] = amd_bytealign_S (w7[3], 0, offset);
|
|
c7[2] = amd_bytealign_S (w7[2], w7[3], offset);
|
|
c7[1] = amd_bytealign_S (w7[1], w7[2], offset);
|
|
c7[0] = amd_bytealign_S (w7[0], w7[1], offset);
|
|
c6[3] = amd_bytealign_S (w6[3], w7[0], offset);
|
|
c6[2] = amd_bytealign_S (w6[2], w6[3], offset);
|
|
c6[1] = amd_bytealign_S (w6[1], w6[2], offset);
|
|
c6[0] = amd_bytealign_S (w6[0], w6[1], offset);
|
|
c5[3] = amd_bytealign_S (w5[3], w6[0], offset);
|
|
c5[2] = amd_bytealign_S (w5[2], w5[3], offset);
|
|
c5[1] = amd_bytealign_S (w5[1], w5[2], offset);
|
|
c5[0] = amd_bytealign_S (w5[0], w5[1], offset);
|
|
c4[3] = amd_bytealign_S (w4[3], w5[0], offset);
|
|
c4[2] = amd_bytealign_S (w4[2], w4[3], offset);
|
|
c4[1] = amd_bytealign_S (w4[1], w4[2], offset);
|
|
c4[0] = amd_bytealign_S (w4[0], w4[1], offset);
|
|
c3[3] = amd_bytealign_S (w3[3], w4[0], offset);
|
|
c3[2] = amd_bytealign_S (w3[2], w3[3], offset);
|
|
c3[1] = amd_bytealign_S (w3[1], w3[2], offset);
|
|
c3[0] = amd_bytealign_S (w3[0], w3[1], offset);
|
|
c2[3] = amd_bytealign_S (w2[3], w3[0], offset);
|
|
c2[2] = amd_bytealign_S (w2[2], w2[3], offset);
|
|
c2[1] = amd_bytealign_S (w2[1], w2[2], offset);
|
|
c2[0] = amd_bytealign_S (w2[0], w2[1], offset);
|
|
c1[3] = amd_bytealign_S (w1[3], w2[0], offset);
|
|
c1[2] = amd_bytealign_S (w1[2], w1[3], offset);
|
|
c1[1] = amd_bytealign_S (w1[1], w1[2], offset);
|
|
c1[0] = amd_bytealign_S (w1[0], w1[1], offset);
|
|
c0[3] = amd_bytealign_S (w0[3], w1[0], offset);
|
|
c0[2] = amd_bytealign_S (w0[2], w0[3], offset);
|
|
c0[1] = amd_bytealign_S (w0[1], w0[2], offset);
|
|
c0[0] = amd_bytealign_S (w0[0], w0[1], offset);
|
|
w7[3] = amd_bytealign_S ( 0, w0[0], offset);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
c0[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
w7[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
w7[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
w7[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w6[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w6[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w5[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w5[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w4[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w4[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w3[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w2[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w1[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w0[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[0] = __byte_perm_S (w0[0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
c0[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c0[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
w7[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
w7[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w6[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w5[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w4[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w3[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w2[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w1[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w0[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
c0[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c0[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c0[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
w7[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w6[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w5[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w4[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w3[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w2[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w1[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w0[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
c0[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c0[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c0[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c0[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
w7[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w6[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w5[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w4[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w3[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w2[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w1[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w0[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
c1[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c0[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c0[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c0[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c0[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
w7[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w6[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w5[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w4[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w3[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w2[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w1[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
c1[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c1[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c0[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c0[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c0[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c0[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
w7[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w6[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w5[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w4[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w3[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w2[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w1[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
c1[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c1[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c1[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c0[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c0[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c0[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c0[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
w7[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w6[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w5[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w4[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w3[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w2[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w1[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
c1[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c1[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c1[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c1[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c0[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c0[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c0[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c0[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
w7[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w6[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w5[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w4[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w3[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w2[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w1[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
c2[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c1[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c1[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c1[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c1[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c0[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c0[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c0[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c0[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
w7[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w6[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w5[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w4[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w3[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w2[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
c2[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c2[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c1[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c1[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c1[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c1[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c0[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c0[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c0[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c0[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
w7[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w6[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w5[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w4[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w3[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w2[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
c2[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c2[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c2[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c1[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c1[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c1[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c1[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c0[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c0[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c0[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c0[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
w7[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w6[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w5[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w4[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w3[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w2[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
c2[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c2[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c2[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c2[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c1[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c1[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c1[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c1[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c0[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c0[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c0[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c0[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
w7[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w6[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w5[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w4[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w3[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w2[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
c3[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c2[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c2[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c2[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c2[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c1[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c1[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c1[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c1[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c0[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c0[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c0[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c0[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
w7[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w6[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w5[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w4[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w3[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
c3[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c3[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c2[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c2[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c2[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c2[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c1[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c1[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c1[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c1[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c0[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c0[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c0[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c0[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
w7[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w6[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w5[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w4[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w3[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
c3[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c3[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c3[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c2[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c2[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c2[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c2[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c1[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c1[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c1[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c1[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c0[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c0[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c0[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c0[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
w7[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w6[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w5[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w4[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w3[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
c3[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c3[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c3[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c3[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c2[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c2[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c2[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c2[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c1[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c1[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c1[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c1[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c0[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c0[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c0[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c0[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
w7[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w6[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w5[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w4[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w3[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
c4[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c3[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c3[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c3[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c3[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c2[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c2[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c2[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c2[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c1[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c1[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c1[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c1[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c0[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c0[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c0[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c0[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
w7[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w6[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w5[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w4[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
c4[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c4[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c3[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c3[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c3[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c3[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c2[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c2[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c2[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c2[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c1[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c1[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c1[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c1[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c0[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c0[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c0[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c0[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
w7[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w6[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w5[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w4[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
c4[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c4[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c4[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c3[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c3[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c3[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c3[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c2[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c2[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c2[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c2[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c1[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c1[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c1[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c1[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c0[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c0[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c0[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
w7[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w6[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w5[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w4[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
c4[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c4[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c4[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c4[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c3[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c3[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c3[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c3[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c2[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c2[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c2[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c2[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c1[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c1[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c1[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c1[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c0[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c0[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
w7[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w6[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w5[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w4[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
c5[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c4[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c4[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c4[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c4[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c3[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c3[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c3[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c3[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c2[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c2[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c2[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c2[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c1[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c1[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c1[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c1[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c0[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
w7[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w6[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w5[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
c5[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c5[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c4[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c4[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c4[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c4[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c3[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c3[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c3[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c3[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c2[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c2[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c2[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c2[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c1[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c1[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c1[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c1[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c0[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
w7[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w6[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w5[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
c5[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c5[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c5[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c4[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c4[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c4[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c4[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c3[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c3[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c3[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c3[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c2[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c2[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c2[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c2[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c1[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c1[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c1[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c0[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
w7[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w6[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w5[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
c5[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c5[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c5[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c5[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c4[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c4[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c4[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c4[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c3[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c3[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c3[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c3[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c2[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c2[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c2[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c2[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c1[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c1[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c0[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
w7[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w6[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w5[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
c6[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c5[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c5[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c5[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c5[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c4[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c4[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c4[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c4[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c3[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c3[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c3[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c3[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c2[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c2[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c2[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c2[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c1[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c0[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
w7[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w6[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
c6[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c6[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c5[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c5[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c5[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c5[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c4[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c4[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c4[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c4[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c3[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c3[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c3[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c3[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c2[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c2[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c2[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c2[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c1[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c0[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
w7[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w6[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
c6[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c6[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c6[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c5[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c5[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c5[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c5[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c4[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c4[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c4[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c4[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c3[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c3[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c3[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c3[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c2[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c2[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c2[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c1[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c0[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
w7[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w6[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
c6[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c6[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c6[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c6[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c5[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c5[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c5[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c5[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c4[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c4[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c4[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c4[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c3[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c3[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c3[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c3[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c2[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c2[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c1[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c0[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
w7[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w6[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
c7[0] = __byte_perm_S ( 0, w7[3], selector);
|
|
c6[3] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c6[2] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c6[1] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c6[0] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c5[3] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c5[2] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c5[1] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c5[0] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c4[3] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c4[2] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c4[1] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c4[0] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c3[3] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c3[2] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c3[1] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c3[0] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c2[3] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[2] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[1] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[0] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c1[3] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[2] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[1] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[0] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c0[3] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[2] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[1] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[0] = __byte_perm_S (w1[0], w0[3], selector);
|
|
w7[3] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[2] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[1] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[0] = __byte_perm_S (w0[0], 0, selector);
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
c7[1] = __byte_perm_S ( 0, w7[3], selector);
|
|
c7[0] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c6[3] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c6[2] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c6[1] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c6[0] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c5[3] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c5[2] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c5[1] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c5[0] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c4[3] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c4[2] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c4[1] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c4[0] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c3[3] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c3[2] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c3[1] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c3[0] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c2[3] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[2] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[1] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[0] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c1[3] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[2] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[1] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[0] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c0[3] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[2] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[1] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[0] = __byte_perm_S (w0[3], w0[2], selector);
|
|
w7[3] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[2] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[1] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
c7[2] = __byte_perm_S ( 0, w7[3], selector);
|
|
c7[1] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c7[0] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c6[3] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c6[2] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c6[1] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c6[0] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c5[3] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c5[2] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c5[1] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c5[0] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c4[3] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c4[2] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c4[1] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c4[0] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c3[3] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c3[2] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c3[1] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c3[0] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c2[3] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[2] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[1] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c2[0] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c1[3] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[2] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[1] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c1[0] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c0[3] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[2] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[1] = __byte_perm_S (w0[3], w0[2], selector);
|
|
c0[0] = __byte_perm_S (w0[2], w0[1], selector);
|
|
w7[3] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[2] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
c7[3] = __byte_perm_S ( 0, w7[3], selector);
|
|
c7[2] = __byte_perm_S (w7[3], w7[2], selector);
|
|
c7[1] = __byte_perm_S (w7[2], w7[1], selector);
|
|
c7[0] = __byte_perm_S (w7[1], w7[0], selector);
|
|
c6[3] = __byte_perm_S (w7[0], w6[3], selector);
|
|
c6[2] = __byte_perm_S (w6[3], w6[2], selector);
|
|
c6[1] = __byte_perm_S (w6[2], w6[1], selector);
|
|
c6[0] = __byte_perm_S (w6[1], w6[0], selector);
|
|
c5[3] = __byte_perm_S (w6[0], w5[3], selector);
|
|
c5[2] = __byte_perm_S (w5[3], w5[2], selector);
|
|
c5[1] = __byte_perm_S (w5[2], w5[1], selector);
|
|
c5[0] = __byte_perm_S (w5[1], w5[0], selector);
|
|
c4[3] = __byte_perm_S (w5[0], w4[3], selector);
|
|
c4[2] = __byte_perm_S (w4[3], w4[2], selector);
|
|
c4[1] = __byte_perm_S (w4[2], w4[1], selector);
|
|
c4[0] = __byte_perm_S (w4[1], w4[0], selector);
|
|
c3[3] = __byte_perm_S (w4[0], w3[3], selector);
|
|
c3[2] = __byte_perm_S (w3[3], w3[2], selector);
|
|
c3[1] = __byte_perm_S (w3[2], w3[1], selector);
|
|
c3[0] = __byte_perm_S (w3[1], w3[0], selector);
|
|
c2[3] = __byte_perm_S (w3[0], w2[3], selector);
|
|
c2[2] = __byte_perm_S (w2[3], w2[2], selector);
|
|
c2[1] = __byte_perm_S (w2[2], w2[1], selector);
|
|
c2[0] = __byte_perm_S (w2[1], w2[0], selector);
|
|
c1[3] = __byte_perm_S (w2[0], w1[3], selector);
|
|
c1[2] = __byte_perm_S (w1[3], w1[2], selector);
|
|
c1[1] = __byte_perm_S (w1[2], w1[1], selector);
|
|
c1[0] = __byte_perm_S (w1[1], w1[0], selector);
|
|
c0[3] = __byte_perm_S (w1[0], w0[3], selector);
|
|
c0[2] = __byte_perm_S (w0[3], w0[2], selector);
|
|
c0[1] = __byte_perm_S (w0[2], w0[1], selector);
|
|
c0[0] = __byte_perm_S (w0[1], w0[0], selector);
|
|
w7[3] = __byte_perm_S (w0[0], 0, selector);
|
|
w7[2] = 0;
|
|
w7[1] = 0;
|
|
w7[0] = 0;
|
|
w6[3] = 0;
|
|
w6[2] = 0;
|
|
w6[1] = 0;
|
|
w6[0] = 0;
|
|
w5[3] = 0;
|
|
w5[2] = 0;
|
|
w5[1] = 0;
|
|
w5[0] = 0;
|
|
w4[3] = 0;
|
|
w4[2] = 0;
|
|
w4[1] = 0;
|
|
w4[0] = 0;
|
|
w3[3] = 0;
|
|
w3[2] = 0;
|
|
w3[1] = 0;
|
|
w3[0] = 0;
|
|
w2[3] = 0;
|
|
w2[2] = 0;
|
|
w2[1] = 0;
|
|
w2[0] = 0;
|
|
w1[3] = 0;
|
|
w1[2] = 0;
|
|
w1[1] = 0;
|
|
w1[0] = 0;
|
|
w0[3] = 0;
|
|
w0[2] = 0;
|
|
w0[1] = 0;
|
|
w0[0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
|
|
{
|
|
const int offset_mod_4 = offset & 3;
|
|
|
|
const int offset_minus_4 = 4 - offset_mod_4;
|
|
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
|
|
#pragma unroll
|
|
for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]);
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = amd_bytealign_S (w[62], w[63], offset);
|
|
w[62] = amd_bytealign_S (w[61], w[62], offset);
|
|
w[61] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[60] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[59] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[58] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[57] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[56] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[55] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[54] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[53] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[52] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[51] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[50] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[49] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[48] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[47] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[46] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[45] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[44] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[43] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[42] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[41] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[40] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[39] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[38] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[37] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[36] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[35] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[34] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[33] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[32] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[31] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[30] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[29] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[28] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[27] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[26] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[25] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[24] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[23] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[22] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[21] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[20] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[19] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[18] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[17] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[16] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[15] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[14] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[13] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[12] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[11] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[10] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 2] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 1] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 0] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = amd_bytealign_S (w[61], w[62], offset);
|
|
w[62] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[61] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[60] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[59] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[58] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[57] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[56] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[55] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[54] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[53] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[52] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[51] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[50] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[49] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[48] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[47] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[46] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[45] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[44] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[43] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[42] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[41] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[40] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[39] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[38] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[37] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[36] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[35] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[34] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[33] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[32] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[31] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[30] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[29] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[28] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[27] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[26] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[25] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[24] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[23] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[22] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[21] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[20] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[19] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[18] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[17] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[16] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[15] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[14] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[13] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[12] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[11] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[10] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 2] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 1] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[62] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[61] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[60] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[59] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[58] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[57] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[56] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[55] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[54] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[53] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[52] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[51] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[50] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[49] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[48] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[47] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[46] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[45] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[44] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[43] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[42] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[41] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[40] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[39] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[38] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[37] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[36] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[35] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[34] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[33] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[32] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[31] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[30] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[29] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[28] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[27] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[26] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[25] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[24] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[23] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[22] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[21] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[20] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[19] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[18] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[17] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[16] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[15] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[14] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[13] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[12] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[11] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[10] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 2] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[62] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[61] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[60] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[59] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[58] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[57] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[56] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[55] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[54] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[53] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[52] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[51] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[50] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[49] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[48] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[47] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[46] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[45] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[44] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[43] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[42] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[41] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[40] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[39] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[38] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[37] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[36] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[35] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[34] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[33] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[32] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[31] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[30] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[29] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[28] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[27] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[26] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[25] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[24] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[23] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[22] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[21] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[20] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[19] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[18] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[17] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[16] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[15] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[14] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[13] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[12] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[11] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[10] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 3] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[62] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[61] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[60] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[59] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[58] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[57] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[56] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[55] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[54] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[53] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[52] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[51] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[50] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[49] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[48] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[47] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[46] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[45] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[44] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[43] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[42] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[41] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[40] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[39] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[38] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[37] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[36] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[35] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[34] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[33] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[32] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[31] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[30] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[29] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[28] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[27] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[26] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[25] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[24] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[23] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[22] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[21] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[20] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[19] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[18] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[17] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[16] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[15] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[14] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[13] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[12] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[11] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[10] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 4] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[62] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[61] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[60] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[59] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[58] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[57] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[56] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[55] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[54] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[53] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[52] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[51] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[50] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[49] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[48] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[47] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[46] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[45] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[44] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[43] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[42] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[41] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[40] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[39] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[38] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[37] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[36] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[35] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[34] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[33] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[32] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[31] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[30] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[29] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[28] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[27] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[26] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[25] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[24] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[23] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[22] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[21] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[20] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[19] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[18] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[17] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[16] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[15] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[14] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[13] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[12] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[11] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[10] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 5] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[62] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[61] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[60] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[59] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[58] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[57] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[56] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[55] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[54] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[53] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[52] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[51] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[50] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[49] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[48] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[47] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[46] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[45] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[44] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[43] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[42] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[41] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[40] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[39] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[38] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[37] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[36] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[35] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[34] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[33] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[32] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[31] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[30] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[29] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[28] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[27] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[26] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[25] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[24] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[23] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[22] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[21] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[20] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[19] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[18] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[17] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[16] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[15] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[14] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[13] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[12] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[11] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[10] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 6] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[62] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[61] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[60] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[59] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[58] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[57] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[56] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[55] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[54] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[53] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[52] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[51] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[50] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[49] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[48] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[47] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[46] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[45] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[44] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[43] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[42] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[41] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[40] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[39] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[38] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[37] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[36] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[35] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[34] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[33] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[32] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[31] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[30] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[29] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[28] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[27] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[26] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[25] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[24] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[23] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[22] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[21] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[20] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[19] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[18] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[17] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[16] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[15] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[14] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[13] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[12] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[11] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[10] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 7] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[62] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[61] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[60] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[59] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[58] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[57] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[56] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[55] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[54] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[53] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[52] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[51] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[50] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[49] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[48] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[47] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[46] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[45] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[44] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[43] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[42] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[41] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[40] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[39] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[38] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[37] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[36] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[35] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[34] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[33] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[32] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[31] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[30] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[29] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[28] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[27] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[26] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[25] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[24] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[23] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[22] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[21] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[20] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[19] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[18] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[17] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[16] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[15] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[14] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[13] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[12] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[11] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[10] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 8] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[62] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[61] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[60] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[59] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[58] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[57] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[56] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[55] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[54] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[53] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[52] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[51] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[50] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[49] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[48] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[47] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[46] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[45] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[44] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[43] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[42] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[41] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[40] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[39] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[38] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[37] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[36] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[35] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[34] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[33] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[32] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[31] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[30] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[29] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[28] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[27] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[26] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[25] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[24] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[23] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[22] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[21] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[20] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[19] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[18] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[17] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[16] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[15] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[14] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[13] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[12] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[11] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[10] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 9] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[62] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[61] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[60] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[59] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[58] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[57] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[56] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[55] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[54] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[53] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[52] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[51] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[50] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[49] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[48] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[47] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[46] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[45] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[44] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[43] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[42] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[41] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[40] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[39] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[38] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[37] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[36] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[35] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[34] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[33] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[32] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[31] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[30] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[29] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[28] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[27] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[26] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[25] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[24] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[23] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[22] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[21] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[20] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[19] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[18] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[17] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[16] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[15] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[14] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[13] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[12] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[11] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[10] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[62] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[61] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[60] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[59] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[58] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[57] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[56] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[55] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[54] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[53] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[52] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[51] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[50] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[49] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[48] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[47] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[46] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[45] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[44] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[43] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[42] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[41] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[40] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[39] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[38] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[37] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[36] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[35] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[34] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[33] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[32] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[31] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[30] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[29] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[28] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[27] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[26] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[25] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[24] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[23] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[22] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[21] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[20] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[19] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[18] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[17] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[16] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[15] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[14] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[13] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[12] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[11] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[62] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[61] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[60] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[59] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[58] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[57] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[56] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[55] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[54] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[53] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[52] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[51] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[50] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[49] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[48] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[47] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[46] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[45] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[44] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[43] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[42] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[41] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[40] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[39] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[38] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[37] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[36] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[35] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[34] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[33] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[32] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[31] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[30] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[29] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[28] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[27] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[26] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[25] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[24] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[23] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[22] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[21] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[20] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[19] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[18] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[17] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[16] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[15] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[14] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[13] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[12] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[62] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[61] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[60] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[59] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[58] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[57] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[56] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[55] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[54] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[53] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[52] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[51] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[50] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[49] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[48] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[47] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[46] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[45] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[44] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[43] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[42] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[41] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[40] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[39] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[38] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[37] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[36] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[35] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[34] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[33] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[32] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[31] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[30] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[29] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[28] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[27] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[26] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[25] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[24] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[23] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[22] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[21] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[20] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[19] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[18] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[17] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[16] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[15] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[14] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[13] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[62] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[61] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[60] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[59] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[58] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[57] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[56] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[55] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[54] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[53] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[52] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[51] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[50] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[49] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[48] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[47] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[46] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[45] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[44] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[43] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[42] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[41] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[40] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[39] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[38] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[37] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[36] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[35] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[34] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[33] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[32] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[31] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[30] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[29] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[28] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[27] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[26] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[25] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[24] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[23] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[22] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[21] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[20] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[19] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[18] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[17] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[16] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[15] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[14] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[62] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[61] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[60] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[59] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[58] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[57] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[56] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[55] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[54] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[53] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[52] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[51] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[50] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[49] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[48] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[47] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[46] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[45] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[44] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[43] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[42] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[41] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[40] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[39] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[38] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[37] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[36] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[35] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[34] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[33] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[32] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[31] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[30] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[29] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[28] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[27] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[26] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[25] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[24] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[23] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[22] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[21] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[20] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[19] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[18] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[17] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[16] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[15] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[62] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[61] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[60] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[59] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[58] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[57] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[56] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[55] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[54] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[53] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[52] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[51] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[50] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[49] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[48] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[47] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[46] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[45] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[44] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[43] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[42] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[41] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[40] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[39] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[38] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[37] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[36] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[35] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[34] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[33] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[32] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[31] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[30] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[29] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[28] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[27] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[26] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[25] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[24] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[23] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[22] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[21] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[20] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[19] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[18] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[17] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[16] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[62] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[61] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[60] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[59] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[58] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[57] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[56] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[55] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[54] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[53] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[52] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[51] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[50] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[49] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[48] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[47] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[46] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[45] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[44] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[43] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[42] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[41] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[40] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[39] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[38] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[37] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[36] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[35] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[34] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[33] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[32] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[31] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[30] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[29] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[28] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[27] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[26] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[25] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[24] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[23] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[22] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[21] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[20] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[19] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[18] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[17] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[62] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[61] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[60] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[59] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[58] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[57] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[56] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[55] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[54] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[53] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[52] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[51] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[50] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[49] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[48] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[47] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[46] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[45] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[44] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[43] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[42] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[41] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[40] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[39] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[38] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[37] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[36] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[35] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[34] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[33] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[32] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[31] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[30] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[29] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[28] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[27] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[26] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[25] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[24] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[23] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[22] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[21] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[20] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[19] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[18] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[62] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[61] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[60] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[59] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[58] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[57] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[56] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[55] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[54] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[53] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[52] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[51] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[50] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[49] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[48] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[47] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[46] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[45] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[44] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[43] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[42] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[41] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[40] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[39] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[38] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[37] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[36] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[35] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[34] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[33] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[32] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[31] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[30] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[29] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[28] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[27] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[26] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[25] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[24] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[23] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[22] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[21] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[20] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[19] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[62] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[61] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[60] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[59] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[58] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[57] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[56] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[55] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[54] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[53] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[52] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[51] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[50] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[49] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[48] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[47] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[46] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[45] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[44] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[43] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[42] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[41] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[40] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[39] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[38] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[37] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[36] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[35] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[34] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[33] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[32] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[31] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[30] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[29] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[28] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[27] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[26] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[25] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[24] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[23] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[22] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[21] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[20] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[62] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[61] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[60] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[59] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[58] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[57] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[56] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[55] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[54] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[53] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[52] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[51] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[50] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[49] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[48] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[47] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[46] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[45] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[44] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[43] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[42] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[41] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[40] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[39] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[38] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[37] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[36] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[35] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[34] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[33] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[32] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[31] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[30] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[29] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[28] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[27] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[26] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[25] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[24] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[23] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[22] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[21] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[62] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[61] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[60] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[59] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[58] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[57] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[56] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[55] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[54] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[53] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[52] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[51] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[50] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[49] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[48] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[47] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[46] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[45] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[44] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[43] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[42] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[41] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[40] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[39] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[38] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[37] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[36] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[35] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[34] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[33] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[32] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[31] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[30] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[29] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[28] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[27] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[26] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[25] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[24] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[23] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[22] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[62] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[61] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[60] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[59] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[58] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[57] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[56] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[55] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[54] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[53] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[52] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[51] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[50] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[49] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[48] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[47] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[46] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[45] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[44] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[43] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[42] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[41] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[40] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[39] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[38] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[37] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[36] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[35] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[34] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[33] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[32] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[31] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[30] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[29] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[28] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[27] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[26] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[25] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[24] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[23] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[62] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[61] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[60] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[59] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[58] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[57] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[56] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[55] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[54] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[53] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[52] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[51] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[50] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[49] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[48] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[47] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[46] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[45] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[44] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[43] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[42] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[41] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[40] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[39] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[38] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[37] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[36] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[35] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[34] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[33] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[32] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[31] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[30] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[29] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[28] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[27] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[26] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[25] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[24] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[62] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[61] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[60] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[59] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[58] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[57] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[56] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[55] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[54] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[53] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[52] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[51] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[50] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[49] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[48] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[47] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[46] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[45] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[44] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[43] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[42] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[41] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[40] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[39] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[38] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[37] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[36] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[35] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[34] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[33] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[32] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[31] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[30] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[29] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[28] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[27] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[26] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[25] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[62] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[61] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[60] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[59] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[58] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[57] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[56] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[55] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[54] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[53] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[52] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[51] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[50] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[49] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[48] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[47] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[46] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[45] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[44] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[43] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[42] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[41] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[40] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[39] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[38] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[37] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[36] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[35] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[34] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[33] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[32] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[31] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[30] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[29] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[28] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[27] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[26] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[62] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[61] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[60] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[59] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[58] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[57] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[56] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[55] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[54] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[53] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[52] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[51] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[50] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[49] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[48] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[47] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[46] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[45] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[44] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[43] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[42] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[41] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[40] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[39] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[38] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[37] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[36] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[35] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[34] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[33] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[32] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[31] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[30] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[29] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[28] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[27] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[62] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[61] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[60] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[59] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[58] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[57] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[56] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[55] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[54] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[53] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[52] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[51] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[50] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[49] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[48] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[47] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[46] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[45] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[44] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[43] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[42] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[41] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[40] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[39] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[38] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[37] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[36] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[35] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[34] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[33] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[32] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[31] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[30] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[29] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[28] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[62] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[61] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[60] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[59] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[58] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[57] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[56] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[55] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[54] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[53] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[52] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[51] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[50] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[49] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[48] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[47] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[46] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[45] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[44] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[43] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[42] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[41] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[40] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[39] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[38] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[37] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[36] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[35] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[34] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[33] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[32] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[31] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[30] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[29] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[62] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[61] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[60] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[59] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[58] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[57] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[56] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[55] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[54] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[53] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[52] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[51] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[50] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[49] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[48] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[47] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[46] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[45] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[44] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[43] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[42] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[41] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[40] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[39] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[38] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[37] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[36] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[35] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[34] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[33] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[32] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[31] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[30] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[62] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[61] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[60] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[59] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[58] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[57] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[56] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[55] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[54] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[53] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[52] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[51] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[50] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[49] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[48] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[47] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[46] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[45] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[44] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[43] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[42] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[41] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[40] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[39] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[38] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[37] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[36] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[35] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[34] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[33] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[32] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[31] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[62] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[61] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[60] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[59] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[58] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[57] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[56] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[55] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[54] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[53] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[52] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[51] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[50] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[49] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[48] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[47] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[46] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[45] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[44] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[43] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[42] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[41] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[40] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[39] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[38] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[37] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[36] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[35] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[34] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[33] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[32] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[62] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[61] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[60] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[59] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[58] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[57] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[56] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[55] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[54] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[53] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[52] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[51] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[50] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[49] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[48] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[47] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[46] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[45] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[44] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[43] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[42] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[41] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[40] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[39] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[38] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[37] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[36] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[35] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[34] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[33] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[62] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[61] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[60] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[59] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[58] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[57] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[56] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[55] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[54] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[53] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[52] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[51] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[50] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[49] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[48] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[47] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[46] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[45] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[44] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[43] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[42] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[41] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[40] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[39] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[38] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[37] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[36] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[35] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[34] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[62] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[61] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[60] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[59] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[58] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[57] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[56] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[55] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[54] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[53] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[52] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[51] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[50] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[49] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[48] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[47] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[46] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[45] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[44] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[43] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[42] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[41] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[40] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[39] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[38] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[37] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[36] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[35] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[62] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[61] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[60] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[59] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[58] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[57] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[56] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[55] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[54] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[53] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[52] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[51] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[50] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[49] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[48] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[47] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[46] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[45] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[44] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[43] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[42] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[41] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[40] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[39] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[38] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[37] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[36] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[62] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[61] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[60] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[59] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[58] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[57] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[56] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[55] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[54] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[53] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[52] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[51] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[50] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[49] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[48] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[47] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[46] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[45] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[44] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[43] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[42] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[41] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[40] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[39] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[38] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[37] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[62] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[61] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[60] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[59] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[58] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[57] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[56] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[55] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[54] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[53] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[52] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[51] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[50] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[49] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[48] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[47] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[46] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[45] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[44] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[43] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[42] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[41] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[40] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[39] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[38] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[62] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[61] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[60] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[59] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[58] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[57] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[56] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[55] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[54] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[53] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[52] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[51] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[50] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[49] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[48] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[47] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[46] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[45] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[44] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[43] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[42] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[41] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[40] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[39] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[62] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[61] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[60] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[59] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[58] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[57] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[56] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[55] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[54] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[53] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[52] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[51] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[50] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[49] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[48] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[47] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[46] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[45] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[44] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[43] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[42] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[41] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[40] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[62] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[61] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[60] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[59] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[58] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[57] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[56] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[55] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[54] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[53] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[52] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[51] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[50] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[49] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[48] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[47] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[46] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[45] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[44] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[43] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[42] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[41] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[62] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[61] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[60] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[59] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[58] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[57] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[56] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[55] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[54] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[53] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[52] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[51] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[50] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[49] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[48] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[47] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[46] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[45] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[44] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[43] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[42] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[62] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[61] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[60] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[59] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[58] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[57] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[56] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[55] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[54] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[53] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[52] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[51] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[50] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[49] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[48] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[47] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[46] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[45] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[44] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[43] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[62] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[61] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[60] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[59] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[58] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[57] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[56] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[55] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[54] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[53] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[52] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[51] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[50] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[49] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[48] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[47] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[46] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[45] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[44] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[62] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[61] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[60] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[59] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[58] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[57] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[56] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[55] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[54] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[53] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[52] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[51] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[50] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[49] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[48] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[47] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[46] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[45] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[62] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[61] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[60] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[59] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[58] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[57] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[56] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[55] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[54] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[53] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[52] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[51] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[50] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[49] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[48] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[47] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[46] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[62] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[61] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[60] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[59] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[58] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[57] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[56] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[55] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[54] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[53] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[52] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[51] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[50] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[49] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[48] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[47] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[62] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[61] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[60] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[59] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[58] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[57] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[56] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[55] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[54] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[53] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[52] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[51] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[50] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[49] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[48] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[62] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[61] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[60] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[59] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[58] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[57] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[56] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[55] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[54] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[53] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[52] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[51] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[50] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[49] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[62] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[61] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[60] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[59] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[58] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[57] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[56] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[55] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[54] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[53] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[52] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[51] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[50] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[62] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[61] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[60] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[59] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[58] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[57] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[56] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[55] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[54] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[53] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[52] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[51] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[62] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[61] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[60] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[59] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[58] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[57] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[56] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[55] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[54] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[53] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[52] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[62] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[61] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[60] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[59] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[58] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[57] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[56] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[55] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[54] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[53] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[62] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[61] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[60] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[59] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[58] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[57] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[56] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[55] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[54] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[62] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[61] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[60] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[59] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[58] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[57] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[56] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[55] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[62] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[61] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[60] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[59] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[58] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[57] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[56] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[62] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[61] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[60] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[59] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[58] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[57] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[62] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[61] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[60] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[59] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[58] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[62] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[61] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[60] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[59] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[62] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[61] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[60] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[62] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[61] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[62] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
|
|
#pragma unroll
|
|
for (int i = 0; i < 64; i++) w[i] = swap32_S (w[i]);
|
|
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> (offset_minus_4 * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = __byte_perm_S (w[62], w[63], selector);
|
|
w[62] = __byte_perm_S (w[61], w[62], selector);
|
|
w[61] = __byte_perm_S (w[60], w[61], selector);
|
|
w[60] = __byte_perm_S (w[59], w[60], selector);
|
|
w[59] = __byte_perm_S (w[58], w[59], selector);
|
|
w[58] = __byte_perm_S (w[57], w[58], selector);
|
|
w[57] = __byte_perm_S (w[56], w[57], selector);
|
|
w[56] = __byte_perm_S (w[55], w[56], selector);
|
|
w[55] = __byte_perm_S (w[54], w[55], selector);
|
|
w[54] = __byte_perm_S (w[53], w[54], selector);
|
|
w[53] = __byte_perm_S (w[52], w[53], selector);
|
|
w[52] = __byte_perm_S (w[51], w[52], selector);
|
|
w[51] = __byte_perm_S (w[50], w[51], selector);
|
|
w[50] = __byte_perm_S (w[49], w[50], selector);
|
|
w[49] = __byte_perm_S (w[48], w[49], selector);
|
|
w[48] = __byte_perm_S (w[47], w[48], selector);
|
|
w[47] = __byte_perm_S (w[46], w[47], selector);
|
|
w[46] = __byte_perm_S (w[45], w[46], selector);
|
|
w[45] = __byte_perm_S (w[44], w[45], selector);
|
|
w[44] = __byte_perm_S (w[43], w[44], selector);
|
|
w[43] = __byte_perm_S (w[42], w[43], selector);
|
|
w[42] = __byte_perm_S (w[41], w[42], selector);
|
|
w[41] = __byte_perm_S (w[40], w[41], selector);
|
|
w[40] = __byte_perm_S (w[39], w[40], selector);
|
|
w[39] = __byte_perm_S (w[38], w[39], selector);
|
|
w[38] = __byte_perm_S (w[37], w[38], selector);
|
|
w[37] = __byte_perm_S (w[36], w[37], selector);
|
|
w[36] = __byte_perm_S (w[35], w[36], selector);
|
|
w[35] = __byte_perm_S (w[34], w[35], selector);
|
|
w[34] = __byte_perm_S (w[33], w[34], selector);
|
|
w[33] = __byte_perm_S (w[32], w[33], selector);
|
|
w[32] = __byte_perm_S (w[31], w[32], selector);
|
|
w[31] = __byte_perm_S (w[30], w[31], selector);
|
|
w[30] = __byte_perm_S (w[29], w[30], selector);
|
|
w[29] = __byte_perm_S (w[28], w[29], selector);
|
|
w[28] = __byte_perm_S (w[27], w[28], selector);
|
|
w[27] = __byte_perm_S (w[26], w[27], selector);
|
|
w[26] = __byte_perm_S (w[25], w[26], selector);
|
|
w[25] = __byte_perm_S (w[24], w[25], selector);
|
|
w[24] = __byte_perm_S (w[23], w[24], selector);
|
|
w[23] = __byte_perm_S (w[22], w[23], selector);
|
|
w[22] = __byte_perm_S (w[21], w[22], selector);
|
|
w[21] = __byte_perm_S (w[20], w[21], selector);
|
|
w[20] = __byte_perm_S (w[19], w[20], selector);
|
|
w[19] = __byte_perm_S (w[18], w[19], selector);
|
|
w[18] = __byte_perm_S (w[17], w[18], selector);
|
|
w[17] = __byte_perm_S (w[16], w[17], selector);
|
|
w[16] = __byte_perm_S (w[15], w[16], selector);
|
|
w[15] = __byte_perm_S (w[14], w[15], selector);
|
|
w[14] = __byte_perm_S (w[13], w[14], selector);
|
|
w[13] = __byte_perm_S (w[12], w[13], selector);
|
|
w[12] = __byte_perm_S (w[11], w[12], selector);
|
|
w[11] = __byte_perm_S (w[10], w[11], selector);
|
|
w[10] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[ 9] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[ 8] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 7] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 6] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 5] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 4] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 3] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 2] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 1] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 0] = __byte_perm_S ( 0, w[ 0], selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = __byte_perm_S (w[61], w[62], selector);
|
|
w[62] = __byte_perm_S (w[60], w[61], selector);
|
|
w[61] = __byte_perm_S (w[59], w[60], selector);
|
|
w[60] = __byte_perm_S (w[58], w[59], selector);
|
|
w[59] = __byte_perm_S (w[57], w[58], selector);
|
|
w[58] = __byte_perm_S (w[56], w[57], selector);
|
|
w[57] = __byte_perm_S (w[55], w[56], selector);
|
|
w[56] = __byte_perm_S (w[54], w[55], selector);
|
|
w[55] = __byte_perm_S (w[53], w[54], selector);
|
|
w[54] = __byte_perm_S (w[52], w[53], selector);
|
|
w[53] = __byte_perm_S (w[51], w[52], selector);
|
|
w[52] = __byte_perm_S (w[50], w[51], selector);
|
|
w[51] = __byte_perm_S (w[49], w[50], selector);
|
|
w[50] = __byte_perm_S (w[48], w[49], selector);
|
|
w[49] = __byte_perm_S (w[47], w[48], selector);
|
|
w[48] = __byte_perm_S (w[46], w[47], selector);
|
|
w[47] = __byte_perm_S (w[45], w[46], selector);
|
|
w[46] = __byte_perm_S (w[44], w[45], selector);
|
|
w[45] = __byte_perm_S (w[43], w[44], selector);
|
|
w[44] = __byte_perm_S (w[42], w[43], selector);
|
|
w[43] = __byte_perm_S (w[41], w[42], selector);
|
|
w[42] = __byte_perm_S (w[40], w[41], selector);
|
|
w[41] = __byte_perm_S (w[39], w[40], selector);
|
|
w[40] = __byte_perm_S (w[38], w[39], selector);
|
|
w[39] = __byte_perm_S (w[37], w[38], selector);
|
|
w[38] = __byte_perm_S (w[36], w[37], selector);
|
|
w[37] = __byte_perm_S (w[35], w[36], selector);
|
|
w[36] = __byte_perm_S (w[34], w[35], selector);
|
|
w[35] = __byte_perm_S (w[33], w[34], selector);
|
|
w[34] = __byte_perm_S (w[32], w[33], selector);
|
|
w[33] = __byte_perm_S (w[31], w[32], selector);
|
|
w[32] = __byte_perm_S (w[30], w[31], selector);
|
|
w[31] = __byte_perm_S (w[29], w[30], selector);
|
|
w[30] = __byte_perm_S (w[28], w[29], selector);
|
|
w[29] = __byte_perm_S (w[27], w[28], selector);
|
|
w[28] = __byte_perm_S (w[26], w[27], selector);
|
|
w[27] = __byte_perm_S (w[25], w[26], selector);
|
|
w[26] = __byte_perm_S (w[24], w[25], selector);
|
|
w[25] = __byte_perm_S (w[23], w[24], selector);
|
|
w[24] = __byte_perm_S (w[22], w[23], selector);
|
|
w[23] = __byte_perm_S (w[21], w[22], selector);
|
|
w[22] = __byte_perm_S (w[20], w[21], selector);
|
|
w[21] = __byte_perm_S (w[19], w[20], selector);
|
|
w[20] = __byte_perm_S (w[18], w[19], selector);
|
|
w[19] = __byte_perm_S (w[17], w[18], selector);
|
|
w[18] = __byte_perm_S (w[16], w[17], selector);
|
|
w[17] = __byte_perm_S (w[15], w[16], selector);
|
|
w[16] = __byte_perm_S (w[14], w[15], selector);
|
|
w[15] = __byte_perm_S (w[13], w[14], selector);
|
|
w[14] = __byte_perm_S (w[12], w[13], selector);
|
|
w[13] = __byte_perm_S (w[11], w[12], selector);
|
|
w[12] = __byte_perm_S (w[10], w[11], selector);
|
|
w[11] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[10] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[ 9] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 8] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 7] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 6] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 5] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 4] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 3] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 2] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 1] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = __byte_perm_S (w[60], w[61], selector);
|
|
w[62] = __byte_perm_S (w[59], w[60], selector);
|
|
w[61] = __byte_perm_S (w[58], w[59], selector);
|
|
w[60] = __byte_perm_S (w[57], w[58], selector);
|
|
w[59] = __byte_perm_S (w[56], w[57], selector);
|
|
w[58] = __byte_perm_S (w[55], w[56], selector);
|
|
w[57] = __byte_perm_S (w[54], w[55], selector);
|
|
w[56] = __byte_perm_S (w[53], w[54], selector);
|
|
w[55] = __byte_perm_S (w[52], w[53], selector);
|
|
w[54] = __byte_perm_S (w[51], w[52], selector);
|
|
w[53] = __byte_perm_S (w[50], w[51], selector);
|
|
w[52] = __byte_perm_S (w[49], w[50], selector);
|
|
w[51] = __byte_perm_S (w[48], w[49], selector);
|
|
w[50] = __byte_perm_S (w[47], w[48], selector);
|
|
w[49] = __byte_perm_S (w[46], w[47], selector);
|
|
w[48] = __byte_perm_S (w[45], w[46], selector);
|
|
w[47] = __byte_perm_S (w[44], w[45], selector);
|
|
w[46] = __byte_perm_S (w[43], w[44], selector);
|
|
w[45] = __byte_perm_S (w[42], w[43], selector);
|
|
w[44] = __byte_perm_S (w[41], w[42], selector);
|
|
w[43] = __byte_perm_S (w[40], w[41], selector);
|
|
w[42] = __byte_perm_S (w[39], w[40], selector);
|
|
w[41] = __byte_perm_S (w[38], w[39], selector);
|
|
w[40] = __byte_perm_S (w[37], w[38], selector);
|
|
w[39] = __byte_perm_S (w[36], w[37], selector);
|
|
w[38] = __byte_perm_S (w[35], w[36], selector);
|
|
w[37] = __byte_perm_S (w[34], w[35], selector);
|
|
w[36] = __byte_perm_S (w[33], w[34], selector);
|
|
w[35] = __byte_perm_S (w[32], w[33], selector);
|
|
w[34] = __byte_perm_S (w[31], w[32], selector);
|
|
w[33] = __byte_perm_S (w[30], w[31], selector);
|
|
w[32] = __byte_perm_S (w[29], w[30], selector);
|
|
w[31] = __byte_perm_S (w[28], w[29], selector);
|
|
w[30] = __byte_perm_S (w[27], w[28], selector);
|
|
w[29] = __byte_perm_S (w[26], w[27], selector);
|
|
w[28] = __byte_perm_S (w[25], w[26], selector);
|
|
w[27] = __byte_perm_S (w[24], w[25], selector);
|
|
w[26] = __byte_perm_S (w[23], w[24], selector);
|
|
w[25] = __byte_perm_S (w[22], w[23], selector);
|
|
w[24] = __byte_perm_S (w[21], w[22], selector);
|
|
w[23] = __byte_perm_S (w[20], w[21], selector);
|
|
w[22] = __byte_perm_S (w[19], w[20], selector);
|
|
w[21] = __byte_perm_S (w[18], w[19], selector);
|
|
w[20] = __byte_perm_S (w[17], w[18], selector);
|
|
w[19] = __byte_perm_S (w[16], w[17], selector);
|
|
w[18] = __byte_perm_S (w[15], w[16], selector);
|
|
w[17] = __byte_perm_S (w[14], w[15], selector);
|
|
w[16] = __byte_perm_S (w[13], w[14], selector);
|
|
w[15] = __byte_perm_S (w[12], w[13], selector);
|
|
w[14] = __byte_perm_S (w[11], w[12], selector);
|
|
w[13] = __byte_perm_S (w[10], w[11], selector);
|
|
w[12] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[11] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[10] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[ 9] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 8] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 7] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 6] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 5] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 4] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 3] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 2] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = __byte_perm_S (w[59], w[60], selector);
|
|
w[62] = __byte_perm_S (w[58], w[59], selector);
|
|
w[61] = __byte_perm_S (w[57], w[58], selector);
|
|
w[60] = __byte_perm_S (w[56], w[57], selector);
|
|
w[59] = __byte_perm_S (w[55], w[56], selector);
|
|
w[58] = __byte_perm_S (w[54], w[55], selector);
|
|
w[57] = __byte_perm_S (w[53], w[54], selector);
|
|
w[56] = __byte_perm_S (w[52], w[53], selector);
|
|
w[55] = __byte_perm_S (w[51], w[52], selector);
|
|
w[54] = __byte_perm_S (w[50], w[51], selector);
|
|
w[53] = __byte_perm_S (w[49], w[50], selector);
|
|
w[52] = __byte_perm_S (w[48], w[49], selector);
|
|
w[51] = __byte_perm_S (w[47], w[48], selector);
|
|
w[50] = __byte_perm_S (w[46], w[47], selector);
|
|
w[49] = __byte_perm_S (w[45], w[46], selector);
|
|
w[48] = __byte_perm_S (w[44], w[45], selector);
|
|
w[47] = __byte_perm_S (w[43], w[44], selector);
|
|
w[46] = __byte_perm_S (w[42], w[43], selector);
|
|
w[45] = __byte_perm_S (w[41], w[42], selector);
|
|
w[44] = __byte_perm_S (w[40], w[41], selector);
|
|
w[43] = __byte_perm_S (w[39], w[40], selector);
|
|
w[42] = __byte_perm_S (w[38], w[39], selector);
|
|
w[41] = __byte_perm_S (w[37], w[38], selector);
|
|
w[40] = __byte_perm_S (w[36], w[37], selector);
|
|
w[39] = __byte_perm_S (w[35], w[36], selector);
|
|
w[38] = __byte_perm_S (w[34], w[35], selector);
|
|
w[37] = __byte_perm_S (w[33], w[34], selector);
|
|
w[36] = __byte_perm_S (w[32], w[33], selector);
|
|
w[35] = __byte_perm_S (w[31], w[32], selector);
|
|
w[34] = __byte_perm_S (w[30], w[31], selector);
|
|
w[33] = __byte_perm_S (w[29], w[30], selector);
|
|
w[32] = __byte_perm_S (w[28], w[29], selector);
|
|
w[31] = __byte_perm_S (w[27], w[28], selector);
|
|
w[30] = __byte_perm_S (w[26], w[27], selector);
|
|
w[29] = __byte_perm_S (w[25], w[26], selector);
|
|
w[28] = __byte_perm_S (w[24], w[25], selector);
|
|
w[27] = __byte_perm_S (w[23], w[24], selector);
|
|
w[26] = __byte_perm_S (w[22], w[23], selector);
|
|
w[25] = __byte_perm_S (w[21], w[22], selector);
|
|
w[24] = __byte_perm_S (w[20], w[21], selector);
|
|
w[23] = __byte_perm_S (w[19], w[20], selector);
|
|
w[22] = __byte_perm_S (w[18], w[19], selector);
|
|
w[21] = __byte_perm_S (w[17], w[18], selector);
|
|
w[20] = __byte_perm_S (w[16], w[17], selector);
|
|
w[19] = __byte_perm_S (w[15], w[16], selector);
|
|
w[18] = __byte_perm_S (w[14], w[15], selector);
|
|
w[17] = __byte_perm_S (w[13], w[14], selector);
|
|
w[16] = __byte_perm_S (w[12], w[13], selector);
|
|
w[15] = __byte_perm_S (w[11], w[12], selector);
|
|
w[14] = __byte_perm_S (w[10], w[11], selector);
|
|
w[13] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[12] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[11] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[10] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[ 9] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 8] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 7] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 6] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 5] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 4] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 3] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = __byte_perm_S (w[58], w[59], selector);
|
|
w[62] = __byte_perm_S (w[57], w[58], selector);
|
|
w[61] = __byte_perm_S (w[56], w[57], selector);
|
|
w[60] = __byte_perm_S (w[55], w[56], selector);
|
|
w[59] = __byte_perm_S (w[54], w[55], selector);
|
|
w[58] = __byte_perm_S (w[53], w[54], selector);
|
|
w[57] = __byte_perm_S (w[52], w[53], selector);
|
|
w[56] = __byte_perm_S (w[51], w[52], selector);
|
|
w[55] = __byte_perm_S (w[50], w[51], selector);
|
|
w[54] = __byte_perm_S (w[49], w[50], selector);
|
|
w[53] = __byte_perm_S (w[48], w[49], selector);
|
|
w[52] = __byte_perm_S (w[47], w[48], selector);
|
|
w[51] = __byte_perm_S (w[46], w[47], selector);
|
|
w[50] = __byte_perm_S (w[45], w[46], selector);
|
|
w[49] = __byte_perm_S (w[44], w[45], selector);
|
|
w[48] = __byte_perm_S (w[43], w[44], selector);
|
|
w[47] = __byte_perm_S (w[42], w[43], selector);
|
|
w[46] = __byte_perm_S (w[41], w[42], selector);
|
|
w[45] = __byte_perm_S (w[40], w[41], selector);
|
|
w[44] = __byte_perm_S (w[39], w[40], selector);
|
|
w[43] = __byte_perm_S (w[38], w[39], selector);
|
|
w[42] = __byte_perm_S (w[37], w[38], selector);
|
|
w[41] = __byte_perm_S (w[36], w[37], selector);
|
|
w[40] = __byte_perm_S (w[35], w[36], selector);
|
|
w[39] = __byte_perm_S (w[34], w[35], selector);
|
|
w[38] = __byte_perm_S (w[33], w[34], selector);
|
|
w[37] = __byte_perm_S (w[32], w[33], selector);
|
|
w[36] = __byte_perm_S (w[31], w[32], selector);
|
|
w[35] = __byte_perm_S (w[30], w[31], selector);
|
|
w[34] = __byte_perm_S (w[29], w[30], selector);
|
|
w[33] = __byte_perm_S (w[28], w[29], selector);
|
|
w[32] = __byte_perm_S (w[27], w[28], selector);
|
|
w[31] = __byte_perm_S (w[26], w[27], selector);
|
|
w[30] = __byte_perm_S (w[25], w[26], selector);
|
|
w[29] = __byte_perm_S (w[24], w[25], selector);
|
|
w[28] = __byte_perm_S (w[23], w[24], selector);
|
|
w[27] = __byte_perm_S (w[22], w[23], selector);
|
|
w[26] = __byte_perm_S (w[21], w[22], selector);
|
|
w[25] = __byte_perm_S (w[20], w[21], selector);
|
|
w[24] = __byte_perm_S (w[19], w[20], selector);
|
|
w[23] = __byte_perm_S (w[18], w[19], selector);
|
|
w[22] = __byte_perm_S (w[17], w[18], selector);
|
|
w[21] = __byte_perm_S (w[16], w[17], selector);
|
|
w[20] = __byte_perm_S (w[15], w[16], selector);
|
|
w[19] = __byte_perm_S (w[14], w[15], selector);
|
|
w[18] = __byte_perm_S (w[13], w[14], selector);
|
|
w[17] = __byte_perm_S (w[12], w[13], selector);
|
|
w[16] = __byte_perm_S (w[11], w[12], selector);
|
|
w[15] = __byte_perm_S (w[10], w[11], selector);
|
|
w[14] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[13] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[12] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[11] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[10] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[ 9] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 8] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 7] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 6] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 5] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 4] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = __byte_perm_S (w[57], w[58], selector);
|
|
w[62] = __byte_perm_S (w[56], w[57], selector);
|
|
w[61] = __byte_perm_S (w[55], w[56], selector);
|
|
w[60] = __byte_perm_S (w[54], w[55], selector);
|
|
w[59] = __byte_perm_S (w[53], w[54], selector);
|
|
w[58] = __byte_perm_S (w[52], w[53], selector);
|
|
w[57] = __byte_perm_S (w[51], w[52], selector);
|
|
w[56] = __byte_perm_S (w[50], w[51], selector);
|
|
w[55] = __byte_perm_S (w[49], w[50], selector);
|
|
w[54] = __byte_perm_S (w[48], w[49], selector);
|
|
w[53] = __byte_perm_S (w[47], w[48], selector);
|
|
w[52] = __byte_perm_S (w[46], w[47], selector);
|
|
w[51] = __byte_perm_S (w[45], w[46], selector);
|
|
w[50] = __byte_perm_S (w[44], w[45], selector);
|
|
w[49] = __byte_perm_S (w[43], w[44], selector);
|
|
w[48] = __byte_perm_S (w[42], w[43], selector);
|
|
w[47] = __byte_perm_S (w[41], w[42], selector);
|
|
w[46] = __byte_perm_S (w[40], w[41], selector);
|
|
w[45] = __byte_perm_S (w[39], w[40], selector);
|
|
w[44] = __byte_perm_S (w[38], w[39], selector);
|
|
w[43] = __byte_perm_S (w[37], w[38], selector);
|
|
w[42] = __byte_perm_S (w[36], w[37], selector);
|
|
w[41] = __byte_perm_S (w[35], w[36], selector);
|
|
w[40] = __byte_perm_S (w[34], w[35], selector);
|
|
w[39] = __byte_perm_S (w[33], w[34], selector);
|
|
w[38] = __byte_perm_S (w[32], w[33], selector);
|
|
w[37] = __byte_perm_S (w[31], w[32], selector);
|
|
w[36] = __byte_perm_S (w[30], w[31], selector);
|
|
w[35] = __byte_perm_S (w[29], w[30], selector);
|
|
w[34] = __byte_perm_S (w[28], w[29], selector);
|
|
w[33] = __byte_perm_S (w[27], w[28], selector);
|
|
w[32] = __byte_perm_S (w[26], w[27], selector);
|
|
w[31] = __byte_perm_S (w[25], w[26], selector);
|
|
w[30] = __byte_perm_S (w[24], w[25], selector);
|
|
w[29] = __byte_perm_S (w[23], w[24], selector);
|
|
w[28] = __byte_perm_S (w[22], w[23], selector);
|
|
w[27] = __byte_perm_S (w[21], w[22], selector);
|
|
w[26] = __byte_perm_S (w[20], w[21], selector);
|
|
w[25] = __byte_perm_S (w[19], w[20], selector);
|
|
w[24] = __byte_perm_S (w[18], w[19], selector);
|
|
w[23] = __byte_perm_S (w[17], w[18], selector);
|
|
w[22] = __byte_perm_S (w[16], w[17], selector);
|
|
w[21] = __byte_perm_S (w[15], w[16], selector);
|
|
w[20] = __byte_perm_S (w[14], w[15], selector);
|
|
w[19] = __byte_perm_S (w[13], w[14], selector);
|
|
w[18] = __byte_perm_S (w[12], w[13], selector);
|
|
w[17] = __byte_perm_S (w[11], w[12], selector);
|
|
w[16] = __byte_perm_S (w[10], w[11], selector);
|
|
w[15] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[14] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[13] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[12] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[11] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[10] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[ 9] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 8] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 7] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 6] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 5] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = __byte_perm_S (w[56], w[57], selector);
|
|
w[62] = __byte_perm_S (w[55], w[56], selector);
|
|
w[61] = __byte_perm_S (w[54], w[55], selector);
|
|
w[60] = __byte_perm_S (w[53], w[54], selector);
|
|
w[59] = __byte_perm_S (w[52], w[53], selector);
|
|
w[58] = __byte_perm_S (w[51], w[52], selector);
|
|
w[57] = __byte_perm_S (w[50], w[51], selector);
|
|
w[56] = __byte_perm_S (w[49], w[50], selector);
|
|
w[55] = __byte_perm_S (w[48], w[49], selector);
|
|
w[54] = __byte_perm_S (w[47], w[48], selector);
|
|
w[53] = __byte_perm_S (w[46], w[47], selector);
|
|
w[52] = __byte_perm_S (w[45], w[46], selector);
|
|
w[51] = __byte_perm_S (w[44], w[45], selector);
|
|
w[50] = __byte_perm_S (w[43], w[44], selector);
|
|
w[49] = __byte_perm_S (w[42], w[43], selector);
|
|
w[48] = __byte_perm_S (w[41], w[42], selector);
|
|
w[47] = __byte_perm_S (w[40], w[41], selector);
|
|
w[46] = __byte_perm_S (w[39], w[40], selector);
|
|
w[45] = __byte_perm_S (w[38], w[39], selector);
|
|
w[44] = __byte_perm_S (w[37], w[38], selector);
|
|
w[43] = __byte_perm_S (w[36], w[37], selector);
|
|
w[42] = __byte_perm_S (w[35], w[36], selector);
|
|
w[41] = __byte_perm_S (w[34], w[35], selector);
|
|
w[40] = __byte_perm_S (w[33], w[34], selector);
|
|
w[39] = __byte_perm_S (w[32], w[33], selector);
|
|
w[38] = __byte_perm_S (w[31], w[32], selector);
|
|
w[37] = __byte_perm_S (w[30], w[31], selector);
|
|
w[36] = __byte_perm_S (w[29], w[30], selector);
|
|
w[35] = __byte_perm_S (w[28], w[29], selector);
|
|
w[34] = __byte_perm_S (w[27], w[28], selector);
|
|
w[33] = __byte_perm_S (w[26], w[27], selector);
|
|
w[32] = __byte_perm_S (w[25], w[26], selector);
|
|
w[31] = __byte_perm_S (w[24], w[25], selector);
|
|
w[30] = __byte_perm_S (w[23], w[24], selector);
|
|
w[29] = __byte_perm_S (w[22], w[23], selector);
|
|
w[28] = __byte_perm_S (w[21], w[22], selector);
|
|
w[27] = __byte_perm_S (w[20], w[21], selector);
|
|
w[26] = __byte_perm_S (w[19], w[20], selector);
|
|
w[25] = __byte_perm_S (w[18], w[19], selector);
|
|
w[24] = __byte_perm_S (w[17], w[18], selector);
|
|
w[23] = __byte_perm_S (w[16], w[17], selector);
|
|
w[22] = __byte_perm_S (w[15], w[16], selector);
|
|
w[21] = __byte_perm_S (w[14], w[15], selector);
|
|
w[20] = __byte_perm_S (w[13], w[14], selector);
|
|
w[19] = __byte_perm_S (w[12], w[13], selector);
|
|
w[18] = __byte_perm_S (w[11], w[12], selector);
|
|
w[17] = __byte_perm_S (w[10], w[11], selector);
|
|
w[16] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[15] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[14] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[13] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[12] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[11] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[10] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[ 9] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 8] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 7] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 6] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = __byte_perm_S (w[55], w[56], selector);
|
|
w[62] = __byte_perm_S (w[54], w[55], selector);
|
|
w[61] = __byte_perm_S (w[53], w[54], selector);
|
|
w[60] = __byte_perm_S (w[52], w[53], selector);
|
|
w[59] = __byte_perm_S (w[51], w[52], selector);
|
|
w[58] = __byte_perm_S (w[50], w[51], selector);
|
|
w[57] = __byte_perm_S (w[49], w[50], selector);
|
|
w[56] = __byte_perm_S (w[48], w[49], selector);
|
|
w[55] = __byte_perm_S (w[47], w[48], selector);
|
|
w[54] = __byte_perm_S (w[46], w[47], selector);
|
|
w[53] = __byte_perm_S (w[45], w[46], selector);
|
|
w[52] = __byte_perm_S (w[44], w[45], selector);
|
|
w[51] = __byte_perm_S (w[43], w[44], selector);
|
|
w[50] = __byte_perm_S (w[42], w[43], selector);
|
|
w[49] = __byte_perm_S (w[41], w[42], selector);
|
|
w[48] = __byte_perm_S (w[40], w[41], selector);
|
|
w[47] = __byte_perm_S (w[39], w[40], selector);
|
|
w[46] = __byte_perm_S (w[38], w[39], selector);
|
|
w[45] = __byte_perm_S (w[37], w[38], selector);
|
|
w[44] = __byte_perm_S (w[36], w[37], selector);
|
|
w[43] = __byte_perm_S (w[35], w[36], selector);
|
|
w[42] = __byte_perm_S (w[34], w[35], selector);
|
|
w[41] = __byte_perm_S (w[33], w[34], selector);
|
|
w[40] = __byte_perm_S (w[32], w[33], selector);
|
|
w[39] = __byte_perm_S (w[31], w[32], selector);
|
|
w[38] = __byte_perm_S (w[30], w[31], selector);
|
|
w[37] = __byte_perm_S (w[29], w[30], selector);
|
|
w[36] = __byte_perm_S (w[28], w[29], selector);
|
|
w[35] = __byte_perm_S (w[27], w[28], selector);
|
|
w[34] = __byte_perm_S (w[26], w[27], selector);
|
|
w[33] = __byte_perm_S (w[25], w[26], selector);
|
|
w[32] = __byte_perm_S (w[24], w[25], selector);
|
|
w[31] = __byte_perm_S (w[23], w[24], selector);
|
|
w[30] = __byte_perm_S (w[22], w[23], selector);
|
|
w[29] = __byte_perm_S (w[21], w[22], selector);
|
|
w[28] = __byte_perm_S (w[20], w[21], selector);
|
|
w[27] = __byte_perm_S (w[19], w[20], selector);
|
|
w[26] = __byte_perm_S (w[18], w[19], selector);
|
|
w[25] = __byte_perm_S (w[17], w[18], selector);
|
|
w[24] = __byte_perm_S (w[16], w[17], selector);
|
|
w[23] = __byte_perm_S (w[15], w[16], selector);
|
|
w[22] = __byte_perm_S (w[14], w[15], selector);
|
|
w[21] = __byte_perm_S (w[13], w[14], selector);
|
|
w[20] = __byte_perm_S (w[12], w[13], selector);
|
|
w[19] = __byte_perm_S (w[11], w[12], selector);
|
|
w[18] = __byte_perm_S (w[10], w[11], selector);
|
|
w[17] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[16] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[15] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[14] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[13] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[12] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[11] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[10] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[ 9] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 8] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 7] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = __byte_perm_S (w[54], w[55], selector);
|
|
w[62] = __byte_perm_S (w[53], w[54], selector);
|
|
w[61] = __byte_perm_S (w[52], w[53], selector);
|
|
w[60] = __byte_perm_S (w[51], w[52], selector);
|
|
w[59] = __byte_perm_S (w[50], w[51], selector);
|
|
w[58] = __byte_perm_S (w[49], w[50], selector);
|
|
w[57] = __byte_perm_S (w[48], w[49], selector);
|
|
w[56] = __byte_perm_S (w[47], w[48], selector);
|
|
w[55] = __byte_perm_S (w[46], w[47], selector);
|
|
w[54] = __byte_perm_S (w[45], w[46], selector);
|
|
w[53] = __byte_perm_S (w[44], w[45], selector);
|
|
w[52] = __byte_perm_S (w[43], w[44], selector);
|
|
w[51] = __byte_perm_S (w[42], w[43], selector);
|
|
w[50] = __byte_perm_S (w[41], w[42], selector);
|
|
w[49] = __byte_perm_S (w[40], w[41], selector);
|
|
w[48] = __byte_perm_S (w[39], w[40], selector);
|
|
w[47] = __byte_perm_S (w[38], w[39], selector);
|
|
w[46] = __byte_perm_S (w[37], w[38], selector);
|
|
w[45] = __byte_perm_S (w[36], w[37], selector);
|
|
w[44] = __byte_perm_S (w[35], w[36], selector);
|
|
w[43] = __byte_perm_S (w[34], w[35], selector);
|
|
w[42] = __byte_perm_S (w[33], w[34], selector);
|
|
w[41] = __byte_perm_S (w[32], w[33], selector);
|
|
w[40] = __byte_perm_S (w[31], w[32], selector);
|
|
w[39] = __byte_perm_S (w[30], w[31], selector);
|
|
w[38] = __byte_perm_S (w[29], w[30], selector);
|
|
w[37] = __byte_perm_S (w[28], w[29], selector);
|
|
w[36] = __byte_perm_S (w[27], w[28], selector);
|
|
w[35] = __byte_perm_S (w[26], w[27], selector);
|
|
w[34] = __byte_perm_S (w[25], w[26], selector);
|
|
w[33] = __byte_perm_S (w[24], w[25], selector);
|
|
w[32] = __byte_perm_S (w[23], w[24], selector);
|
|
w[31] = __byte_perm_S (w[22], w[23], selector);
|
|
w[30] = __byte_perm_S (w[21], w[22], selector);
|
|
w[29] = __byte_perm_S (w[20], w[21], selector);
|
|
w[28] = __byte_perm_S (w[19], w[20], selector);
|
|
w[27] = __byte_perm_S (w[18], w[19], selector);
|
|
w[26] = __byte_perm_S (w[17], w[18], selector);
|
|
w[25] = __byte_perm_S (w[16], w[17], selector);
|
|
w[24] = __byte_perm_S (w[15], w[16], selector);
|
|
w[23] = __byte_perm_S (w[14], w[15], selector);
|
|
w[22] = __byte_perm_S (w[13], w[14], selector);
|
|
w[21] = __byte_perm_S (w[12], w[13], selector);
|
|
w[20] = __byte_perm_S (w[11], w[12], selector);
|
|
w[19] = __byte_perm_S (w[10], w[11], selector);
|
|
w[18] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[17] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[16] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[15] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[14] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[13] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[12] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[11] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[10] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[ 9] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 8] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = __byte_perm_S (w[53], w[54], selector);
|
|
w[62] = __byte_perm_S (w[52], w[53], selector);
|
|
w[61] = __byte_perm_S (w[51], w[52], selector);
|
|
w[60] = __byte_perm_S (w[50], w[51], selector);
|
|
w[59] = __byte_perm_S (w[49], w[50], selector);
|
|
w[58] = __byte_perm_S (w[48], w[49], selector);
|
|
w[57] = __byte_perm_S (w[47], w[48], selector);
|
|
w[56] = __byte_perm_S (w[46], w[47], selector);
|
|
w[55] = __byte_perm_S (w[45], w[46], selector);
|
|
w[54] = __byte_perm_S (w[44], w[45], selector);
|
|
w[53] = __byte_perm_S (w[43], w[44], selector);
|
|
w[52] = __byte_perm_S (w[42], w[43], selector);
|
|
w[51] = __byte_perm_S (w[41], w[42], selector);
|
|
w[50] = __byte_perm_S (w[40], w[41], selector);
|
|
w[49] = __byte_perm_S (w[39], w[40], selector);
|
|
w[48] = __byte_perm_S (w[38], w[39], selector);
|
|
w[47] = __byte_perm_S (w[37], w[38], selector);
|
|
w[46] = __byte_perm_S (w[36], w[37], selector);
|
|
w[45] = __byte_perm_S (w[35], w[36], selector);
|
|
w[44] = __byte_perm_S (w[34], w[35], selector);
|
|
w[43] = __byte_perm_S (w[33], w[34], selector);
|
|
w[42] = __byte_perm_S (w[32], w[33], selector);
|
|
w[41] = __byte_perm_S (w[31], w[32], selector);
|
|
w[40] = __byte_perm_S (w[30], w[31], selector);
|
|
w[39] = __byte_perm_S (w[29], w[30], selector);
|
|
w[38] = __byte_perm_S (w[28], w[29], selector);
|
|
w[37] = __byte_perm_S (w[27], w[28], selector);
|
|
w[36] = __byte_perm_S (w[26], w[27], selector);
|
|
w[35] = __byte_perm_S (w[25], w[26], selector);
|
|
w[34] = __byte_perm_S (w[24], w[25], selector);
|
|
w[33] = __byte_perm_S (w[23], w[24], selector);
|
|
w[32] = __byte_perm_S (w[22], w[23], selector);
|
|
w[31] = __byte_perm_S (w[21], w[22], selector);
|
|
w[30] = __byte_perm_S (w[20], w[21], selector);
|
|
w[29] = __byte_perm_S (w[19], w[20], selector);
|
|
w[28] = __byte_perm_S (w[18], w[19], selector);
|
|
w[27] = __byte_perm_S (w[17], w[18], selector);
|
|
w[26] = __byte_perm_S (w[16], w[17], selector);
|
|
w[25] = __byte_perm_S (w[15], w[16], selector);
|
|
w[24] = __byte_perm_S (w[14], w[15], selector);
|
|
w[23] = __byte_perm_S (w[13], w[14], selector);
|
|
w[22] = __byte_perm_S (w[12], w[13], selector);
|
|
w[21] = __byte_perm_S (w[11], w[12], selector);
|
|
w[20] = __byte_perm_S (w[10], w[11], selector);
|
|
w[19] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[18] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[17] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[16] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[15] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[14] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[13] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[12] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[11] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[10] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[ 9] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = __byte_perm_S (w[52], w[53], selector);
|
|
w[62] = __byte_perm_S (w[51], w[52], selector);
|
|
w[61] = __byte_perm_S (w[50], w[51], selector);
|
|
w[60] = __byte_perm_S (w[49], w[50], selector);
|
|
w[59] = __byte_perm_S (w[48], w[49], selector);
|
|
w[58] = __byte_perm_S (w[47], w[48], selector);
|
|
w[57] = __byte_perm_S (w[46], w[47], selector);
|
|
w[56] = __byte_perm_S (w[45], w[46], selector);
|
|
w[55] = __byte_perm_S (w[44], w[45], selector);
|
|
w[54] = __byte_perm_S (w[43], w[44], selector);
|
|
w[53] = __byte_perm_S (w[42], w[43], selector);
|
|
w[52] = __byte_perm_S (w[41], w[42], selector);
|
|
w[51] = __byte_perm_S (w[40], w[41], selector);
|
|
w[50] = __byte_perm_S (w[39], w[40], selector);
|
|
w[49] = __byte_perm_S (w[38], w[39], selector);
|
|
w[48] = __byte_perm_S (w[37], w[38], selector);
|
|
w[47] = __byte_perm_S (w[36], w[37], selector);
|
|
w[46] = __byte_perm_S (w[35], w[36], selector);
|
|
w[45] = __byte_perm_S (w[34], w[35], selector);
|
|
w[44] = __byte_perm_S (w[33], w[34], selector);
|
|
w[43] = __byte_perm_S (w[32], w[33], selector);
|
|
w[42] = __byte_perm_S (w[31], w[32], selector);
|
|
w[41] = __byte_perm_S (w[30], w[31], selector);
|
|
w[40] = __byte_perm_S (w[29], w[30], selector);
|
|
w[39] = __byte_perm_S (w[28], w[29], selector);
|
|
w[38] = __byte_perm_S (w[27], w[28], selector);
|
|
w[37] = __byte_perm_S (w[26], w[27], selector);
|
|
w[36] = __byte_perm_S (w[25], w[26], selector);
|
|
w[35] = __byte_perm_S (w[24], w[25], selector);
|
|
w[34] = __byte_perm_S (w[23], w[24], selector);
|
|
w[33] = __byte_perm_S (w[22], w[23], selector);
|
|
w[32] = __byte_perm_S (w[21], w[22], selector);
|
|
w[31] = __byte_perm_S (w[20], w[21], selector);
|
|
w[30] = __byte_perm_S (w[19], w[20], selector);
|
|
w[29] = __byte_perm_S (w[18], w[19], selector);
|
|
w[28] = __byte_perm_S (w[17], w[18], selector);
|
|
w[27] = __byte_perm_S (w[16], w[17], selector);
|
|
w[26] = __byte_perm_S (w[15], w[16], selector);
|
|
w[25] = __byte_perm_S (w[14], w[15], selector);
|
|
w[24] = __byte_perm_S (w[13], w[14], selector);
|
|
w[23] = __byte_perm_S (w[12], w[13], selector);
|
|
w[22] = __byte_perm_S (w[11], w[12], selector);
|
|
w[21] = __byte_perm_S (w[10], w[11], selector);
|
|
w[20] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[19] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[18] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[17] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[16] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[15] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[14] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[13] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[12] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[11] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[10] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = __byte_perm_S (w[51], w[52], selector);
|
|
w[62] = __byte_perm_S (w[50], w[51], selector);
|
|
w[61] = __byte_perm_S (w[49], w[50], selector);
|
|
w[60] = __byte_perm_S (w[48], w[49], selector);
|
|
w[59] = __byte_perm_S (w[47], w[48], selector);
|
|
w[58] = __byte_perm_S (w[46], w[47], selector);
|
|
w[57] = __byte_perm_S (w[45], w[46], selector);
|
|
w[56] = __byte_perm_S (w[44], w[45], selector);
|
|
w[55] = __byte_perm_S (w[43], w[44], selector);
|
|
w[54] = __byte_perm_S (w[42], w[43], selector);
|
|
w[53] = __byte_perm_S (w[41], w[42], selector);
|
|
w[52] = __byte_perm_S (w[40], w[41], selector);
|
|
w[51] = __byte_perm_S (w[39], w[40], selector);
|
|
w[50] = __byte_perm_S (w[38], w[39], selector);
|
|
w[49] = __byte_perm_S (w[37], w[38], selector);
|
|
w[48] = __byte_perm_S (w[36], w[37], selector);
|
|
w[47] = __byte_perm_S (w[35], w[36], selector);
|
|
w[46] = __byte_perm_S (w[34], w[35], selector);
|
|
w[45] = __byte_perm_S (w[33], w[34], selector);
|
|
w[44] = __byte_perm_S (w[32], w[33], selector);
|
|
w[43] = __byte_perm_S (w[31], w[32], selector);
|
|
w[42] = __byte_perm_S (w[30], w[31], selector);
|
|
w[41] = __byte_perm_S (w[29], w[30], selector);
|
|
w[40] = __byte_perm_S (w[28], w[29], selector);
|
|
w[39] = __byte_perm_S (w[27], w[28], selector);
|
|
w[38] = __byte_perm_S (w[26], w[27], selector);
|
|
w[37] = __byte_perm_S (w[25], w[26], selector);
|
|
w[36] = __byte_perm_S (w[24], w[25], selector);
|
|
w[35] = __byte_perm_S (w[23], w[24], selector);
|
|
w[34] = __byte_perm_S (w[22], w[23], selector);
|
|
w[33] = __byte_perm_S (w[21], w[22], selector);
|
|
w[32] = __byte_perm_S (w[20], w[21], selector);
|
|
w[31] = __byte_perm_S (w[19], w[20], selector);
|
|
w[30] = __byte_perm_S (w[18], w[19], selector);
|
|
w[29] = __byte_perm_S (w[17], w[18], selector);
|
|
w[28] = __byte_perm_S (w[16], w[17], selector);
|
|
w[27] = __byte_perm_S (w[15], w[16], selector);
|
|
w[26] = __byte_perm_S (w[14], w[15], selector);
|
|
w[25] = __byte_perm_S (w[13], w[14], selector);
|
|
w[24] = __byte_perm_S (w[12], w[13], selector);
|
|
w[23] = __byte_perm_S (w[11], w[12], selector);
|
|
w[22] = __byte_perm_S (w[10], w[11], selector);
|
|
w[21] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[20] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[19] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[18] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[17] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[16] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[15] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[14] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[13] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[12] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[11] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = __byte_perm_S (w[50], w[51], selector);
|
|
w[62] = __byte_perm_S (w[49], w[50], selector);
|
|
w[61] = __byte_perm_S (w[48], w[49], selector);
|
|
w[60] = __byte_perm_S (w[47], w[48], selector);
|
|
w[59] = __byte_perm_S (w[46], w[47], selector);
|
|
w[58] = __byte_perm_S (w[45], w[46], selector);
|
|
w[57] = __byte_perm_S (w[44], w[45], selector);
|
|
w[56] = __byte_perm_S (w[43], w[44], selector);
|
|
w[55] = __byte_perm_S (w[42], w[43], selector);
|
|
w[54] = __byte_perm_S (w[41], w[42], selector);
|
|
w[53] = __byte_perm_S (w[40], w[41], selector);
|
|
w[52] = __byte_perm_S (w[39], w[40], selector);
|
|
w[51] = __byte_perm_S (w[38], w[39], selector);
|
|
w[50] = __byte_perm_S (w[37], w[38], selector);
|
|
w[49] = __byte_perm_S (w[36], w[37], selector);
|
|
w[48] = __byte_perm_S (w[35], w[36], selector);
|
|
w[47] = __byte_perm_S (w[34], w[35], selector);
|
|
w[46] = __byte_perm_S (w[33], w[34], selector);
|
|
w[45] = __byte_perm_S (w[32], w[33], selector);
|
|
w[44] = __byte_perm_S (w[31], w[32], selector);
|
|
w[43] = __byte_perm_S (w[30], w[31], selector);
|
|
w[42] = __byte_perm_S (w[29], w[30], selector);
|
|
w[41] = __byte_perm_S (w[28], w[29], selector);
|
|
w[40] = __byte_perm_S (w[27], w[28], selector);
|
|
w[39] = __byte_perm_S (w[26], w[27], selector);
|
|
w[38] = __byte_perm_S (w[25], w[26], selector);
|
|
w[37] = __byte_perm_S (w[24], w[25], selector);
|
|
w[36] = __byte_perm_S (w[23], w[24], selector);
|
|
w[35] = __byte_perm_S (w[22], w[23], selector);
|
|
w[34] = __byte_perm_S (w[21], w[22], selector);
|
|
w[33] = __byte_perm_S (w[20], w[21], selector);
|
|
w[32] = __byte_perm_S (w[19], w[20], selector);
|
|
w[31] = __byte_perm_S (w[18], w[19], selector);
|
|
w[30] = __byte_perm_S (w[17], w[18], selector);
|
|
w[29] = __byte_perm_S (w[16], w[17], selector);
|
|
w[28] = __byte_perm_S (w[15], w[16], selector);
|
|
w[27] = __byte_perm_S (w[14], w[15], selector);
|
|
w[26] = __byte_perm_S (w[13], w[14], selector);
|
|
w[25] = __byte_perm_S (w[12], w[13], selector);
|
|
w[24] = __byte_perm_S (w[11], w[12], selector);
|
|
w[23] = __byte_perm_S (w[10], w[11], selector);
|
|
w[22] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[21] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[20] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[19] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[18] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[17] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[16] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[15] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[14] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[13] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[12] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = __byte_perm_S (w[49], w[50], selector);
|
|
w[62] = __byte_perm_S (w[48], w[49], selector);
|
|
w[61] = __byte_perm_S (w[47], w[48], selector);
|
|
w[60] = __byte_perm_S (w[46], w[47], selector);
|
|
w[59] = __byte_perm_S (w[45], w[46], selector);
|
|
w[58] = __byte_perm_S (w[44], w[45], selector);
|
|
w[57] = __byte_perm_S (w[43], w[44], selector);
|
|
w[56] = __byte_perm_S (w[42], w[43], selector);
|
|
w[55] = __byte_perm_S (w[41], w[42], selector);
|
|
w[54] = __byte_perm_S (w[40], w[41], selector);
|
|
w[53] = __byte_perm_S (w[39], w[40], selector);
|
|
w[52] = __byte_perm_S (w[38], w[39], selector);
|
|
w[51] = __byte_perm_S (w[37], w[38], selector);
|
|
w[50] = __byte_perm_S (w[36], w[37], selector);
|
|
w[49] = __byte_perm_S (w[35], w[36], selector);
|
|
w[48] = __byte_perm_S (w[34], w[35], selector);
|
|
w[47] = __byte_perm_S (w[33], w[34], selector);
|
|
w[46] = __byte_perm_S (w[32], w[33], selector);
|
|
w[45] = __byte_perm_S (w[31], w[32], selector);
|
|
w[44] = __byte_perm_S (w[30], w[31], selector);
|
|
w[43] = __byte_perm_S (w[29], w[30], selector);
|
|
w[42] = __byte_perm_S (w[28], w[29], selector);
|
|
w[41] = __byte_perm_S (w[27], w[28], selector);
|
|
w[40] = __byte_perm_S (w[26], w[27], selector);
|
|
w[39] = __byte_perm_S (w[25], w[26], selector);
|
|
w[38] = __byte_perm_S (w[24], w[25], selector);
|
|
w[37] = __byte_perm_S (w[23], w[24], selector);
|
|
w[36] = __byte_perm_S (w[22], w[23], selector);
|
|
w[35] = __byte_perm_S (w[21], w[22], selector);
|
|
w[34] = __byte_perm_S (w[20], w[21], selector);
|
|
w[33] = __byte_perm_S (w[19], w[20], selector);
|
|
w[32] = __byte_perm_S (w[18], w[19], selector);
|
|
w[31] = __byte_perm_S (w[17], w[18], selector);
|
|
w[30] = __byte_perm_S (w[16], w[17], selector);
|
|
w[29] = __byte_perm_S (w[15], w[16], selector);
|
|
w[28] = __byte_perm_S (w[14], w[15], selector);
|
|
w[27] = __byte_perm_S (w[13], w[14], selector);
|
|
w[26] = __byte_perm_S (w[12], w[13], selector);
|
|
w[25] = __byte_perm_S (w[11], w[12], selector);
|
|
w[24] = __byte_perm_S (w[10], w[11], selector);
|
|
w[23] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[22] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[21] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[20] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[19] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[18] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[17] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[16] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[15] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[14] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[13] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = __byte_perm_S (w[48], w[49], selector);
|
|
w[62] = __byte_perm_S (w[47], w[48], selector);
|
|
w[61] = __byte_perm_S (w[46], w[47], selector);
|
|
w[60] = __byte_perm_S (w[45], w[46], selector);
|
|
w[59] = __byte_perm_S (w[44], w[45], selector);
|
|
w[58] = __byte_perm_S (w[43], w[44], selector);
|
|
w[57] = __byte_perm_S (w[42], w[43], selector);
|
|
w[56] = __byte_perm_S (w[41], w[42], selector);
|
|
w[55] = __byte_perm_S (w[40], w[41], selector);
|
|
w[54] = __byte_perm_S (w[39], w[40], selector);
|
|
w[53] = __byte_perm_S (w[38], w[39], selector);
|
|
w[52] = __byte_perm_S (w[37], w[38], selector);
|
|
w[51] = __byte_perm_S (w[36], w[37], selector);
|
|
w[50] = __byte_perm_S (w[35], w[36], selector);
|
|
w[49] = __byte_perm_S (w[34], w[35], selector);
|
|
w[48] = __byte_perm_S (w[33], w[34], selector);
|
|
w[47] = __byte_perm_S (w[32], w[33], selector);
|
|
w[46] = __byte_perm_S (w[31], w[32], selector);
|
|
w[45] = __byte_perm_S (w[30], w[31], selector);
|
|
w[44] = __byte_perm_S (w[29], w[30], selector);
|
|
w[43] = __byte_perm_S (w[28], w[29], selector);
|
|
w[42] = __byte_perm_S (w[27], w[28], selector);
|
|
w[41] = __byte_perm_S (w[26], w[27], selector);
|
|
w[40] = __byte_perm_S (w[25], w[26], selector);
|
|
w[39] = __byte_perm_S (w[24], w[25], selector);
|
|
w[38] = __byte_perm_S (w[23], w[24], selector);
|
|
w[37] = __byte_perm_S (w[22], w[23], selector);
|
|
w[36] = __byte_perm_S (w[21], w[22], selector);
|
|
w[35] = __byte_perm_S (w[20], w[21], selector);
|
|
w[34] = __byte_perm_S (w[19], w[20], selector);
|
|
w[33] = __byte_perm_S (w[18], w[19], selector);
|
|
w[32] = __byte_perm_S (w[17], w[18], selector);
|
|
w[31] = __byte_perm_S (w[16], w[17], selector);
|
|
w[30] = __byte_perm_S (w[15], w[16], selector);
|
|
w[29] = __byte_perm_S (w[14], w[15], selector);
|
|
w[28] = __byte_perm_S (w[13], w[14], selector);
|
|
w[27] = __byte_perm_S (w[12], w[13], selector);
|
|
w[26] = __byte_perm_S (w[11], w[12], selector);
|
|
w[25] = __byte_perm_S (w[10], w[11], selector);
|
|
w[24] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[23] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[22] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[21] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[20] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[19] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[18] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[17] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[16] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[15] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[14] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = __byte_perm_S (w[47], w[48], selector);
|
|
w[62] = __byte_perm_S (w[46], w[47], selector);
|
|
w[61] = __byte_perm_S (w[45], w[46], selector);
|
|
w[60] = __byte_perm_S (w[44], w[45], selector);
|
|
w[59] = __byte_perm_S (w[43], w[44], selector);
|
|
w[58] = __byte_perm_S (w[42], w[43], selector);
|
|
w[57] = __byte_perm_S (w[41], w[42], selector);
|
|
w[56] = __byte_perm_S (w[40], w[41], selector);
|
|
w[55] = __byte_perm_S (w[39], w[40], selector);
|
|
w[54] = __byte_perm_S (w[38], w[39], selector);
|
|
w[53] = __byte_perm_S (w[37], w[38], selector);
|
|
w[52] = __byte_perm_S (w[36], w[37], selector);
|
|
w[51] = __byte_perm_S (w[35], w[36], selector);
|
|
w[50] = __byte_perm_S (w[34], w[35], selector);
|
|
w[49] = __byte_perm_S (w[33], w[34], selector);
|
|
w[48] = __byte_perm_S (w[32], w[33], selector);
|
|
w[47] = __byte_perm_S (w[31], w[32], selector);
|
|
w[46] = __byte_perm_S (w[30], w[31], selector);
|
|
w[45] = __byte_perm_S (w[29], w[30], selector);
|
|
w[44] = __byte_perm_S (w[28], w[29], selector);
|
|
w[43] = __byte_perm_S (w[27], w[28], selector);
|
|
w[42] = __byte_perm_S (w[26], w[27], selector);
|
|
w[41] = __byte_perm_S (w[25], w[26], selector);
|
|
w[40] = __byte_perm_S (w[24], w[25], selector);
|
|
w[39] = __byte_perm_S (w[23], w[24], selector);
|
|
w[38] = __byte_perm_S (w[22], w[23], selector);
|
|
w[37] = __byte_perm_S (w[21], w[22], selector);
|
|
w[36] = __byte_perm_S (w[20], w[21], selector);
|
|
w[35] = __byte_perm_S (w[19], w[20], selector);
|
|
w[34] = __byte_perm_S (w[18], w[19], selector);
|
|
w[33] = __byte_perm_S (w[17], w[18], selector);
|
|
w[32] = __byte_perm_S (w[16], w[17], selector);
|
|
w[31] = __byte_perm_S (w[15], w[16], selector);
|
|
w[30] = __byte_perm_S (w[14], w[15], selector);
|
|
w[29] = __byte_perm_S (w[13], w[14], selector);
|
|
w[28] = __byte_perm_S (w[12], w[13], selector);
|
|
w[27] = __byte_perm_S (w[11], w[12], selector);
|
|
w[26] = __byte_perm_S (w[10], w[11], selector);
|
|
w[25] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[24] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[23] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[22] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[21] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[20] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[19] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[18] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[17] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[16] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[15] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = __byte_perm_S (w[46], w[47], selector);
|
|
w[62] = __byte_perm_S (w[45], w[46], selector);
|
|
w[61] = __byte_perm_S (w[44], w[45], selector);
|
|
w[60] = __byte_perm_S (w[43], w[44], selector);
|
|
w[59] = __byte_perm_S (w[42], w[43], selector);
|
|
w[58] = __byte_perm_S (w[41], w[42], selector);
|
|
w[57] = __byte_perm_S (w[40], w[41], selector);
|
|
w[56] = __byte_perm_S (w[39], w[40], selector);
|
|
w[55] = __byte_perm_S (w[38], w[39], selector);
|
|
w[54] = __byte_perm_S (w[37], w[38], selector);
|
|
w[53] = __byte_perm_S (w[36], w[37], selector);
|
|
w[52] = __byte_perm_S (w[35], w[36], selector);
|
|
w[51] = __byte_perm_S (w[34], w[35], selector);
|
|
w[50] = __byte_perm_S (w[33], w[34], selector);
|
|
w[49] = __byte_perm_S (w[32], w[33], selector);
|
|
w[48] = __byte_perm_S (w[31], w[32], selector);
|
|
w[47] = __byte_perm_S (w[30], w[31], selector);
|
|
w[46] = __byte_perm_S (w[29], w[30], selector);
|
|
w[45] = __byte_perm_S (w[28], w[29], selector);
|
|
w[44] = __byte_perm_S (w[27], w[28], selector);
|
|
w[43] = __byte_perm_S (w[26], w[27], selector);
|
|
w[42] = __byte_perm_S (w[25], w[26], selector);
|
|
w[41] = __byte_perm_S (w[24], w[25], selector);
|
|
w[40] = __byte_perm_S (w[23], w[24], selector);
|
|
w[39] = __byte_perm_S (w[22], w[23], selector);
|
|
w[38] = __byte_perm_S (w[21], w[22], selector);
|
|
w[37] = __byte_perm_S (w[20], w[21], selector);
|
|
w[36] = __byte_perm_S (w[19], w[20], selector);
|
|
w[35] = __byte_perm_S (w[18], w[19], selector);
|
|
w[34] = __byte_perm_S (w[17], w[18], selector);
|
|
w[33] = __byte_perm_S (w[16], w[17], selector);
|
|
w[32] = __byte_perm_S (w[15], w[16], selector);
|
|
w[31] = __byte_perm_S (w[14], w[15], selector);
|
|
w[30] = __byte_perm_S (w[13], w[14], selector);
|
|
w[29] = __byte_perm_S (w[12], w[13], selector);
|
|
w[28] = __byte_perm_S (w[11], w[12], selector);
|
|
w[27] = __byte_perm_S (w[10], w[11], selector);
|
|
w[26] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[25] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[24] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[23] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[22] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[21] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[20] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[19] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[18] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[17] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[16] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = __byte_perm_S (w[45], w[46], selector);
|
|
w[62] = __byte_perm_S (w[44], w[45], selector);
|
|
w[61] = __byte_perm_S (w[43], w[44], selector);
|
|
w[60] = __byte_perm_S (w[42], w[43], selector);
|
|
w[59] = __byte_perm_S (w[41], w[42], selector);
|
|
w[58] = __byte_perm_S (w[40], w[41], selector);
|
|
w[57] = __byte_perm_S (w[39], w[40], selector);
|
|
w[56] = __byte_perm_S (w[38], w[39], selector);
|
|
w[55] = __byte_perm_S (w[37], w[38], selector);
|
|
w[54] = __byte_perm_S (w[36], w[37], selector);
|
|
w[53] = __byte_perm_S (w[35], w[36], selector);
|
|
w[52] = __byte_perm_S (w[34], w[35], selector);
|
|
w[51] = __byte_perm_S (w[33], w[34], selector);
|
|
w[50] = __byte_perm_S (w[32], w[33], selector);
|
|
w[49] = __byte_perm_S (w[31], w[32], selector);
|
|
w[48] = __byte_perm_S (w[30], w[31], selector);
|
|
w[47] = __byte_perm_S (w[29], w[30], selector);
|
|
w[46] = __byte_perm_S (w[28], w[29], selector);
|
|
w[45] = __byte_perm_S (w[27], w[28], selector);
|
|
w[44] = __byte_perm_S (w[26], w[27], selector);
|
|
w[43] = __byte_perm_S (w[25], w[26], selector);
|
|
w[42] = __byte_perm_S (w[24], w[25], selector);
|
|
w[41] = __byte_perm_S (w[23], w[24], selector);
|
|
w[40] = __byte_perm_S (w[22], w[23], selector);
|
|
w[39] = __byte_perm_S (w[21], w[22], selector);
|
|
w[38] = __byte_perm_S (w[20], w[21], selector);
|
|
w[37] = __byte_perm_S (w[19], w[20], selector);
|
|
w[36] = __byte_perm_S (w[18], w[19], selector);
|
|
w[35] = __byte_perm_S (w[17], w[18], selector);
|
|
w[34] = __byte_perm_S (w[16], w[17], selector);
|
|
w[33] = __byte_perm_S (w[15], w[16], selector);
|
|
w[32] = __byte_perm_S (w[14], w[15], selector);
|
|
w[31] = __byte_perm_S (w[13], w[14], selector);
|
|
w[30] = __byte_perm_S (w[12], w[13], selector);
|
|
w[29] = __byte_perm_S (w[11], w[12], selector);
|
|
w[28] = __byte_perm_S (w[10], w[11], selector);
|
|
w[27] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[26] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[25] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[24] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[23] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[22] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[21] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[20] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[19] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[18] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[17] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = __byte_perm_S (w[44], w[45], selector);
|
|
w[62] = __byte_perm_S (w[43], w[44], selector);
|
|
w[61] = __byte_perm_S (w[42], w[43], selector);
|
|
w[60] = __byte_perm_S (w[41], w[42], selector);
|
|
w[59] = __byte_perm_S (w[40], w[41], selector);
|
|
w[58] = __byte_perm_S (w[39], w[40], selector);
|
|
w[57] = __byte_perm_S (w[38], w[39], selector);
|
|
w[56] = __byte_perm_S (w[37], w[38], selector);
|
|
w[55] = __byte_perm_S (w[36], w[37], selector);
|
|
w[54] = __byte_perm_S (w[35], w[36], selector);
|
|
w[53] = __byte_perm_S (w[34], w[35], selector);
|
|
w[52] = __byte_perm_S (w[33], w[34], selector);
|
|
w[51] = __byte_perm_S (w[32], w[33], selector);
|
|
w[50] = __byte_perm_S (w[31], w[32], selector);
|
|
w[49] = __byte_perm_S (w[30], w[31], selector);
|
|
w[48] = __byte_perm_S (w[29], w[30], selector);
|
|
w[47] = __byte_perm_S (w[28], w[29], selector);
|
|
w[46] = __byte_perm_S (w[27], w[28], selector);
|
|
w[45] = __byte_perm_S (w[26], w[27], selector);
|
|
w[44] = __byte_perm_S (w[25], w[26], selector);
|
|
w[43] = __byte_perm_S (w[24], w[25], selector);
|
|
w[42] = __byte_perm_S (w[23], w[24], selector);
|
|
w[41] = __byte_perm_S (w[22], w[23], selector);
|
|
w[40] = __byte_perm_S (w[21], w[22], selector);
|
|
w[39] = __byte_perm_S (w[20], w[21], selector);
|
|
w[38] = __byte_perm_S (w[19], w[20], selector);
|
|
w[37] = __byte_perm_S (w[18], w[19], selector);
|
|
w[36] = __byte_perm_S (w[17], w[18], selector);
|
|
w[35] = __byte_perm_S (w[16], w[17], selector);
|
|
w[34] = __byte_perm_S (w[15], w[16], selector);
|
|
w[33] = __byte_perm_S (w[14], w[15], selector);
|
|
w[32] = __byte_perm_S (w[13], w[14], selector);
|
|
w[31] = __byte_perm_S (w[12], w[13], selector);
|
|
w[30] = __byte_perm_S (w[11], w[12], selector);
|
|
w[29] = __byte_perm_S (w[10], w[11], selector);
|
|
w[28] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[27] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[26] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[25] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[24] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[23] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[22] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[21] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[20] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[19] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[18] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = __byte_perm_S (w[43], w[44], selector);
|
|
w[62] = __byte_perm_S (w[42], w[43], selector);
|
|
w[61] = __byte_perm_S (w[41], w[42], selector);
|
|
w[60] = __byte_perm_S (w[40], w[41], selector);
|
|
w[59] = __byte_perm_S (w[39], w[40], selector);
|
|
w[58] = __byte_perm_S (w[38], w[39], selector);
|
|
w[57] = __byte_perm_S (w[37], w[38], selector);
|
|
w[56] = __byte_perm_S (w[36], w[37], selector);
|
|
w[55] = __byte_perm_S (w[35], w[36], selector);
|
|
w[54] = __byte_perm_S (w[34], w[35], selector);
|
|
w[53] = __byte_perm_S (w[33], w[34], selector);
|
|
w[52] = __byte_perm_S (w[32], w[33], selector);
|
|
w[51] = __byte_perm_S (w[31], w[32], selector);
|
|
w[50] = __byte_perm_S (w[30], w[31], selector);
|
|
w[49] = __byte_perm_S (w[29], w[30], selector);
|
|
w[48] = __byte_perm_S (w[28], w[29], selector);
|
|
w[47] = __byte_perm_S (w[27], w[28], selector);
|
|
w[46] = __byte_perm_S (w[26], w[27], selector);
|
|
w[45] = __byte_perm_S (w[25], w[26], selector);
|
|
w[44] = __byte_perm_S (w[24], w[25], selector);
|
|
w[43] = __byte_perm_S (w[23], w[24], selector);
|
|
w[42] = __byte_perm_S (w[22], w[23], selector);
|
|
w[41] = __byte_perm_S (w[21], w[22], selector);
|
|
w[40] = __byte_perm_S (w[20], w[21], selector);
|
|
w[39] = __byte_perm_S (w[19], w[20], selector);
|
|
w[38] = __byte_perm_S (w[18], w[19], selector);
|
|
w[37] = __byte_perm_S (w[17], w[18], selector);
|
|
w[36] = __byte_perm_S (w[16], w[17], selector);
|
|
w[35] = __byte_perm_S (w[15], w[16], selector);
|
|
w[34] = __byte_perm_S (w[14], w[15], selector);
|
|
w[33] = __byte_perm_S (w[13], w[14], selector);
|
|
w[32] = __byte_perm_S (w[12], w[13], selector);
|
|
w[31] = __byte_perm_S (w[11], w[12], selector);
|
|
w[30] = __byte_perm_S (w[10], w[11], selector);
|
|
w[29] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[28] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[27] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[26] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[25] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[24] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[23] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[22] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[21] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[20] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[19] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = __byte_perm_S (w[42], w[43], selector);
|
|
w[62] = __byte_perm_S (w[41], w[42], selector);
|
|
w[61] = __byte_perm_S (w[40], w[41], selector);
|
|
w[60] = __byte_perm_S (w[39], w[40], selector);
|
|
w[59] = __byte_perm_S (w[38], w[39], selector);
|
|
w[58] = __byte_perm_S (w[37], w[38], selector);
|
|
w[57] = __byte_perm_S (w[36], w[37], selector);
|
|
w[56] = __byte_perm_S (w[35], w[36], selector);
|
|
w[55] = __byte_perm_S (w[34], w[35], selector);
|
|
w[54] = __byte_perm_S (w[33], w[34], selector);
|
|
w[53] = __byte_perm_S (w[32], w[33], selector);
|
|
w[52] = __byte_perm_S (w[31], w[32], selector);
|
|
w[51] = __byte_perm_S (w[30], w[31], selector);
|
|
w[50] = __byte_perm_S (w[29], w[30], selector);
|
|
w[49] = __byte_perm_S (w[28], w[29], selector);
|
|
w[48] = __byte_perm_S (w[27], w[28], selector);
|
|
w[47] = __byte_perm_S (w[26], w[27], selector);
|
|
w[46] = __byte_perm_S (w[25], w[26], selector);
|
|
w[45] = __byte_perm_S (w[24], w[25], selector);
|
|
w[44] = __byte_perm_S (w[23], w[24], selector);
|
|
w[43] = __byte_perm_S (w[22], w[23], selector);
|
|
w[42] = __byte_perm_S (w[21], w[22], selector);
|
|
w[41] = __byte_perm_S (w[20], w[21], selector);
|
|
w[40] = __byte_perm_S (w[19], w[20], selector);
|
|
w[39] = __byte_perm_S (w[18], w[19], selector);
|
|
w[38] = __byte_perm_S (w[17], w[18], selector);
|
|
w[37] = __byte_perm_S (w[16], w[17], selector);
|
|
w[36] = __byte_perm_S (w[15], w[16], selector);
|
|
w[35] = __byte_perm_S (w[14], w[15], selector);
|
|
w[34] = __byte_perm_S (w[13], w[14], selector);
|
|
w[33] = __byte_perm_S (w[12], w[13], selector);
|
|
w[32] = __byte_perm_S (w[11], w[12], selector);
|
|
w[31] = __byte_perm_S (w[10], w[11], selector);
|
|
w[30] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[29] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[28] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[27] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[26] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[25] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[24] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[23] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[22] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[21] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[20] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = __byte_perm_S (w[41], w[42], selector);
|
|
w[62] = __byte_perm_S (w[40], w[41], selector);
|
|
w[61] = __byte_perm_S (w[39], w[40], selector);
|
|
w[60] = __byte_perm_S (w[38], w[39], selector);
|
|
w[59] = __byte_perm_S (w[37], w[38], selector);
|
|
w[58] = __byte_perm_S (w[36], w[37], selector);
|
|
w[57] = __byte_perm_S (w[35], w[36], selector);
|
|
w[56] = __byte_perm_S (w[34], w[35], selector);
|
|
w[55] = __byte_perm_S (w[33], w[34], selector);
|
|
w[54] = __byte_perm_S (w[32], w[33], selector);
|
|
w[53] = __byte_perm_S (w[31], w[32], selector);
|
|
w[52] = __byte_perm_S (w[30], w[31], selector);
|
|
w[51] = __byte_perm_S (w[29], w[30], selector);
|
|
w[50] = __byte_perm_S (w[28], w[29], selector);
|
|
w[49] = __byte_perm_S (w[27], w[28], selector);
|
|
w[48] = __byte_perm_S (w[26], w[27], selector);
|
|
w[47] = __byte_perm_S (w[25], w[26], selector);
|
|
w[46] = __byte_perm_S (w[24], w[25], selector);
|
|
w[45] = __byte_perm_S (w[23], w[24], selector);
|
|
w[44] = __byte_perm_S (w[22], w[23], selector);
|
|
w[43] = __byte_perm_S (w[21], w[22], selector);
|
|
w[42] = __byte_perm_S (w[20], w[21], selector);
|
|
w[41] = __byte_perm_S (w[19], w[20], selector);
|
|
w[40] = __byte_perm_S (w[18], w[19], selector);
|
|
w[39] = __byte_perm_S (w[17], w[18], selector);
|
|
w[38] = __byte_perm_S (w[16], w[17], selector);
|
|
w[37] = __byte_perm_S (w[15], w[16], selector);
|
|
w[36] = __byte_perm_S (w[14], w[15], selector);
|
|
w[35] = __byte_perm_S (w[13], w[14], selector);
|
|
w[34] = __byte_perm_S (w[12], w[13], selector);
|
|
w[33] = __byte_perm_S (w[11], w[12], selector);
|
|
w[32] = __byte_perm_S (w[10], w[11], selector);
|
|
w[31] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[30] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[29] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[28] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[27] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[26] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[25] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[24] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[23] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[22] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[21] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = __byte_perm_S (w[40], w[41], selector);
|
|
w[62] = __byte_perm_S (w[39], w[40], selector);
|
|
w[61] = __byte_perm_S (w[38], w[39], selector);
|
|
w[60] = __byte_perm_S (w[37], w[38], selector);
|
|
w[59] = __byte_perm_S (w[36], w[37], selector);
|
|
w[58] = __byte_perm_S (w[35], w[36], selector);
|
|
w[57] = __byte_perm_S (w[34], w[35], selector);
|
|
w[56] = __byte_perm_S (w[33], w[34], selector);
|
|
w[55] = __byte_perm_S (w[32], w[33], selector);
|
|
w[54] = __byte_perm_S (w[31], w[32], selector);
|
|
w[53] = __byte_perm_S (w[30], w[31], selector);
|
|
w[52] = __byte_perm_S (w[29], w[30], selector);
|
|
w[51] = __byte_perm_S (w[28], w[29], selector);
|
|
w[50] = __byte_perm_S (w[27], w[28], selector);
|
|
w[49] = __byte_perm_S (w[26], w[27], selector);
|
|
w[48] = __byte_perm_S (w[25], w[26], selector);
|
|
w[47] = __byte_perm_S (w[24], w[25], selector);
|
|
w[46] = __byte_perm_S (w[23], w[24], selector);
|
|
w[45] = __byte_perm_S (w[22], w[23], selector);
|
|
w[44] = __byte_perm_S (w[21], w[22], selector);
|
|
w[43] = __byte_perm_S (w[20], w[21], selector);
|
|
w[42] = __byte_perm_S (w[19], w[20], selector);
|
|
w[41] = __byte_perm_S (w[18], w[19], selector);
|
|
w[40] = __byte_perm_S (w[17], w[18], selector);
|
|
w[39] = __byte_perm_S (w[16], w[17], selector);
|
|
w[38] = __byte_perm_S (w[15], w[16], selector);
|
|
w[37] = __byte_perm_S (w[14], w[15], selector);
|
|
w[36] = __byte_perm_S (w[13], w[14], selector);
|
|
w[35] = __byte_perm_S (w[12], w[13], selector);
|
|
w[34] = __byte_perm_S (w[11], w[12], selector);
|
|
w[33] = __byte_perm_S (w[10], w[11], selector);
|
|
w[32] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[31] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[30] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[29] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[28] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[27] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[26] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[25] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[24] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[23] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[22] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = __byte_perm_S (w[39], w[40], selector);
|
|
w[62] = __byte_perm_S (w[38], w[39], selector);
|
|
w[61] = __byte_perm_S (w[37], w[38], selector);
|
|
w[60] = __byte_perm_S (w[36], w[37], selector);
|
|
w[59] = __byte_perm_S (w[35], w[36], selector);
|
|
w[58] = __byte_perm_S (w[34], w[35], selector);
|
|
w[57] = __byte_perm_S (w[33], w[34], selector);
|
|
w[56] = __byte_perm_S (w[32], w[33], selector);
|
|
w[55] = __byte_perm_S (w[31], w[32], selector);
|
|
w[54] = __byte_perm_S (w[30], w[31], selector);
|
|
w[53] = __byte_perm_S (w[29], w[30], selector);
|
|
w[52] = __byte_perm_S (w[28], w[29], selector);
|
|
w[51] = __byte_perm_S (w[27], w[28], selector);
|
|
w[50] = __byte_perm_S (w[26], w[27], selector);
|
|
w[49] = __byte_perm_S (w[25], w[26], selector);
|
|
w[48] = __byte_perm_S (w[24], w[25], selector);
|
|
w[47] = __byte_perm_S (w[23], w[24], selector);
|
|
w[46] = __byte_perm_S (w[22], w[23], selector);
|
|
w[45] = __byte_perm_S (w[21], w[22], selector);
|
|
w[44] = __byte_perm_S (w[20], w[21], selector);
|
|
w[43] = __byte_perm_S (w[19], w[20], selector);
|
|
w[42] = __byte_perm_S (w[18], w[19], selector);
|
|
w[41] = __byte_perm_S (w[17], w[18], selector);
|
|
w[40] = __byte_perm_S (w[16], w[17], selector);
|
|
w[39] = __byte_perm_S (w[15], w[16], selector);
|
|
w[38] = __byte_perm_S (w[14], w[15], selector);
|
|
w[37] = __byte_perm_S (w[13], w[14], selector);
|
|
w[36] = __byte_perm_S (w[12], w[13], selector);
|
|
w[35] = __byte_perm_S (w[11], w[12], selector);
|
|
w[34] = __byte_perm_S (w[10], w[11], selector);
|
|
w[33] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[32] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[31] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[30] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[29] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[28] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[27] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[26] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[25] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[24] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[23] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = __byte_perm_S (w[38], w[39], selector);
|
|
w[62] = __byte_perm_S (w[37], w[38], selector);
|
|
w[61] = __byte_perm_S (w[36], w[37], selector);
|
|
w[60] = __byte_perm_S (w[35], w[36], selector);
|
|
w[59] = __byte_perm_S (w[34], w[35], selector);
|
|
w[58] = __byte_perm_S (w[33], w[34], selector);
|
|
w[57] = __byte_perm_S (w[32], w[33], selector);
|
|
w[56] = __byte_perm_S (w[31], w[32], selector);
|
|
w[55] = __byte_perm_S (w[30], w[31], selector);
|
|
w[54] = __byte_perm_S (w[29], w[30], selector);
|
|
w[53] = __byte_perm_S (w[28], w[29], selector);
|
|
w[52] = __byte_perm_S (w[27], w[28], selector);
|
|
w[51] = __byte_perm_S (w[26], w[27], selector);
|
|
w[50] = __byte_perm_S (w[25], w[26], selector);
|
|
w[49] = __byte_perm_S (w[24], w[25], selector);
|
|
w[48] = __byte_perm_S (w[23], w[24], selector);
|
|
w[47] = __byte_perm_S (w[22], w[23], selector);
|
|
w[46] = __byte_perm_S (w[21], w[22], selector);
|
|
w[45] = __byte_perm_S (w[20], w[21], selector);
|
|
w[44] = __byte_perm_S (w[19], w[20], selector);
|
|
w[43] = __byte_perm_S (w[18], w[19], selector);
|
|
w[42] = __byte_perm_S (w[17], w[18], selector);
|
|
w[41] = __byte_perm_S (w[16], w[17], selector);
|
|
w[40] = __byte_perm_S (w[15], w[16], selector);
|
|
w[39] = __byte_perm_S (w[14], w[15], selector);
|
|
w[38] = __byte_perm_S (w[13], w[14], selector);
|
|
w[37] = __byte_perm_S (w[12], w[13], selector);
|
|
w[36] = __byte_perm_S (w[11], w[12], selector);
|
|
w[35] = __byte_perm_S (w[10], w[11], selector);
|
|
w[34] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[33] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[32] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[31] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[30] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[29] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[28] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[27] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[26] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[25] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[24] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = __byte_perm_S (w[37], w[38], selector);
|
|
w[62] = __byte_perm_S (w[36], w[37], selector);
|
|
w[61] = __byte_perm_S (w[35], w[36], selector);
|
|
w[60] = __byte_perm_S (w[34], w[35], selector);
|
|
w[59] = __byte_perm_S (w[33], w[34], selector);
|
|
w[58] = __byte_perm_S (w[32], w[33], selector);
|
|
w[57] = __byte_perm_S (w[31], w[32], selector);
|
|
w[56] = __byte_perm_S (w[30], w[31], selector);
|
|
w[55] = __byte_perm_S (w[29], w[30], selector);
|
|
w[54] = __byte_perm_S (w[28], w[29], selector);
|
|
w[53] = __byte_perm_S (w[27], w[28], selector);
|
|
w[52] = __byte_perm_S (w[26], w[27], selector);
|
|
w[51] = __byte_perm_S (w[25], w[26], selector);
|
|
w[50] = __byte_perm_S (w[24], w[25], selector);
|
|
w[49] = __byte_perm_S (w[23], w[24], selector);
|
|
w[48] = __byte_perm_S (w[22], w[23], selector);
|
|
w[47] = __byte_perm_S (w[21], w[22], selector);
|
|
w[46] = __byte_perm_S (w[20], w[21], selector);
|
|
w[45] = __byte_perm_S (w[19], w[20], selector);
|
|
w[44] = __byte_perm_S (w[18], w[19], selector);
|
|
w[43] = __byte_perm_S (w[17], w[18], selector);
|
|
w[42] = __byte_perm_S (w[16], w[17], selector);
|
|
w[41] = __byte_perm_S (w[15], w[16], selector);
|
|
w[40] = __byte_perm_S (w[14], w[15], selector);
|
|
w[39] = __byte_perm_S (w[13], w[14], selector);
|
|
w[38] = __byte_perm_S (w[12], w[13], selector);
|
|
w[37] = __byte_perm_S (w[11], w[12], selector);
|
|
w[36] = __byte_perm_S (w[10], w[11], selector);
|
|
w[35] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[34] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[33] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[32] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[31] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[30] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[29] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[28] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[27] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[26] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[25] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = __byte_perm_S (w[36], w[37], selector);
|
|
w[62] = __byte_perm_S (w[35], w[36], selector);
|
|
w[61] = __byte_perm_S (w[34], w[35], selector);
|
|
w[60] = __byte_perm_S (w[33], w[34], selector);
|
|
w[59] = __byte_perm_S (w[32], w[33], selector);
|
|
w[58] = __byte_perm_S (w[31], w[32], selector);
|
|
w[57] = __byte_perm_S (w[30], w[31], selector);
|
|
w[56] = __byte_perm_S (w[29], w[30], selector);
|
|
w[55] = __byte_perm_S (w[28], w[29], selector);
|
|
w[54] = __byte_perm_S (w[27], w[28], selector);
|
|
w[53] = __byte_perm_S (w[26], w[27], selector);
|
|
w[52] = __byte_perm_S (w[25], w[26], selector);
|
|
w[51] = __byte_perm_S (w[24], w[25], selector);
|
|
w[50] = __byte_perm_S (w[23], w[24], selector);
|
|
w[49] = __byte_perm_S (w[22], w[23], selector);
|
|
w[48] = __byte_perm_S (w[21], w[22], selector);
|
|
w[47] = __byte_perm_S (w[20], w[21], selector);
|
|
w[46] = __byte_perm_S (w[19], w[20], selector);
|
|
w[45] = __byte_perm_S (w[18], w[19], selector);
|
|
w[44] = __byte_perm_S (w[17], w[18], selector);
|
|
w[43] = __byte_perm_S (w[16], w[17], selector);
|
|
w[42] = __byte_perm_S (w[15], w[16], selector);
|
|
w[41] = __byte_perm_S (w[14], w[15], selector);
|
|
w[40] = __byte_perm_S (w[13], w[14], selector);
|
|
w[39] = __byte_perm_S (w[12], w[13], selector);
|
|
w[38] = __byte_perm_S (w[11], w[12], selector);
|
|
w[37] = __byte_perm_S (w[10], w[11], selector);
|
|
w[36] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[35] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[34] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[33] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[32] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[31] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[30] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[29] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[28] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[27] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[26] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = __byte_perm_S (w[35], w[36], selector);
|
|
w[62] = __byte_perm_S (w[34], w[35], selector);
|
|
w[61] = __byte_perm_S (w[33], w[34], selector);
|
|
w[60] = __byte_perm_S (w[32], w[33], selector);
|
|
w[59] = __byte_perm_S (w[31], w[32], selector);
|
|
w[58] = __byte_perm_S (w[30], w[31], selector);
|
|
w[57] = __byte_perm_S (w[29], w[30], selector);
|
|
w[56] = __byte_perm_S (w[28], w[29], selector);
|
|
w[55] = __byte_perm_S (w[27], w[28], selector);
|
|
w[54] = __byte_perm_S (w[26], w[27], selector);
|
|
w[53] = __byte_perm_S (w[25], w[26], selector);
|
|
w[52] = __byte_perm_S (w[24], w[25], selector);
|
|
w[51] = __byte_perm_S (w[23], w[24], selector);
|
|
w[50] = __byte_perm_S (w[22], w[23], selector);
|
|
w[49] = __byte_perm_S (w[21], w[22], selector);
|
|
w[48] = __byte_perm_S (w[20], w[21], selector);
|
|
w[47] = __byte_perm_S (w[19], w[20], selector);
|
|
w[46] = __byte_perm_S (w[18], w[19], selector);
|
|
w[45] = __byte_perm_S (w[17], w[18], selector);
|
|
w[44] = __byte_perm_S (w[16], w[17], selector);
|
|
w[43] = __byte_perm_S (w[15], w[16], selector);
|
|
w[42] = __byte_perm_S (w[14], w[15], selector);
|
|
w[41] = __byte_perm_S (w[13], w[14], selector);
|
|
w[40] = __byte_perm_S (w[12], w[13], selector);
|
|
w[39] = __byte_perm_S (w[11], w[12], selector);
|
|
w[38] = __byte_perm_S (w[10], w[11], selector);
|
|
w[37] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[36] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[35] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[34] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[33] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[32] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[31] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[30] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[29] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[28] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[27] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = __byte_perm_S (w[34], w[35], selector);
|
|
w[62] = __byte_perm_S (w[33], w[34], selector);
|
|
w[61] = __byte_perm_S (w[32], w[33], selector);
|
|
w[60] = __byte_perm_S (w[31], w[32], selector);
|
|
w[59] = __byte_perm_S (w[30], w[31], selector);
|
|
w[58] = __byte_perm_S (w[29], w[30], selector);
|
|
w[57] = __byte_perm_S (w[28], w[29], selector);
|
|
w[56] = __byte_perm_S (w[27], w[28], selector);
|
|
w[55] = __byte_perm_S (w[26], w[27], selector);
|
|
w[54] = __byte_perm_S (w[25], w[26], selector);
|
|
w[53] = __byte_perm_S (w[24], w[25], selector);
|
|
w[52] = __byte_perm_S (w[23], w[24], selector);
|
|
w[51] = __byte_perm_S (w[22], w[23], selector);
|
|
w[50] = __byte_perm_S (w[21], w[22], selector);
|
|
w[49] = __byte_perm_S (w[20], w[21], selector);
|
|
w[48] = __byte_perm_S (w[19], w[20], selector);
|
|
w[47] = __byte_perm_S (w[18], w[19], selector);
|
|
w[46] = __byte_perm_S (w[17], w[18], selector);
|
|
w[45] = __byte_perm_S (w[16], w[17], selector);
|
|
w[44] = __byte_perm_S (w[15], w[16], selector);
|
|
w[43] = __byte_perm_S (w[14], w[15], selector);
|
|
w[42] = __byte_perm_S (w[13], w[14], selector);
|
|
w[41] = __byte_perm_S (w[12], w[13], selector);
|
|
w[40] = __byte_perm_S (w[11], w[12], selector);
|
|
w[39] = __byte_perm_S (w[10], w[11], selector);
|
|
w[38] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[37] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[36] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[35] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[34] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[33] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[32] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[31] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[30] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[29] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[28] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = __byte_perm_S (w[33], w[34], selector);
|
|
w[62] = __byte_perm_S (w[32], w[33], selector);
|
|
w[61] = __byte_perm_S (w[31], w[32], selector);
|
|
w[60] = __byte_perm_S (w[30], w[31], selector);
|
|
w[59] = __byte_perm_S (w[29], w[30], selector);
|
|
w[58] = __byte_perm_S (w[28], w[29], selector);
|
|
w[57] = __byte_perm_S (w[27], w[28], selector);
|
|
w[56] = __byte_perm_S (w[26], w[27], selector);
|
|
w[55] = __byte_perm_S (w[25], w[26], selector);
|
|
w[54] = __byte_perm_S (w[24], w[25], selector);
|
|
w[53] = __byte_perm_S (w[23], w[24], selector);
|
|
w[52] = __byte_perm_S (w[22], w[23], selector);
|
|
w[51] = __byte_perm_S (w[21], w[22], selector);
|
|
w[50] = __byte_perm_S (w[20], w[21], selector);
|
|
w[49] = __byte_perm_S (w[19], w[20], selector);
|
|
w[48] = __byte_perm_S (w[18], w[19], selector);
|
|
w[47] = __byte_perm_S (w[17], w[18], selector);
|
|
w[46] = __byte_perm_S (w[16], w[17], selector);
|
|
w[45] = __byte_perm_S (w[15], w[16], selector);
|
|
w[44] = __byte_perm_S (w[14], w[15], selector);
|
|
w[43] = __byte_perm_S (w[13], w[14], selector);
|
|
w[42] = __byte_perm_S (w[12], w[13], selector);
|
|
w[41] = __byte_perm_S (w[11], w[12], selector);
|
|
w[40] = __byte_perm_S (w[10], w[11], selector);
|
|
w[39] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[38] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[37] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[36] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[35] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[34] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[33] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[32] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[31] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[30] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[29] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = __byte_perm_S (w[32], w[33], selector);
|
|
w[62] = __byte_perm_S (w[31], w[32], selector);
|
|
w[61] = __byte_perm_S (w[30], w[31], selector);
|
|
w[60] = __byte_perm_S (w[29], w[30], selector);
|
|
w[59] = __byte_perm_S (w[28], w[29], selector);
|
|
w[58] = __byte_perm_S (w[27], w[28], selector);
|
|
w[57] = __byte_perm_S (w[26], w[27], selector);
|
|
w[56] = __byte_perm_S (w[25], w[26], selector);
|
|
w[55] = __byte_perm_S (w[24], w[25], selector);
|
|
w[54] = __byte_perm_S (w[23], w[24], selector);
|
|
w[53] = __byte_perm_S (w[22], w[23], selector);
|
|
w[52] = __byte_perm_S (w[21], w[22], selector);
|
|
w[51] = __byte_perm_S (w[20], w[21], selector);
|
|
w[50] = __byte_perm_S (w[19], w[20], selector);
|
|
w[49] = __byte_perm_S (w[18], w[19], selector);
|
|
w[48] = __byte_perm_S (w[17], w[18], selector);
|
|
w[47] = __byte_perm_S (w[16], w[17], selector);
|
|
w[46] = __byte_perm_S (w[15], w[16], selector);
|
|
w[45] = __byte_perm_S (w[14], w[15], selector);
|
|
w[44] = __byte_perm_S (w[13], w[14], selector);
|
|
w[43] = __byte_perm_S (w[12], w[13], selector);
|
|
w[42] = __byte_perm_S (w[11], w[12], selector);
|
|
w[41] = __byte_perm_S (w[10], w[11], selector);
|
|
w[40] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[39] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[38] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[37] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[36] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[35] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[34] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[33] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[32] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[31] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[30] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = __byte_perm_S (w[31], w[32], selector);
|
|
w[62] = __byte_perm_S (w[30], w[31], selector);
|
|
w[61] = __byte_perm_S (w[29], w[30], selector);
|
|
w[60] = __byte_perm_S (w[28], w[29], selector);
|
|
w[59] = __byte_perm_S (w[27], w[28], selector);
|
|
w[58] = __byte_perm_S (w[26], w[27], selector);
|
|
w[57] = __byte_perm_S (w[25], w[26], selector);
|
|
w[56] = __byte_perm_S (w[24], w[25], selector);
|
|
w[55] = __byte_perm_S (w[23], w[24], selector);
|
|
w[54] = __byte_perm_S (w[22], w[23], selector);
|
|
w[53] = __byte_perm_S (w[21], w[22], selector);
|
|
w[52] = __byte_perm_S (w[20], w[21], selector);
|
|
w[51] = __byte_perm_S (w[19], w[20], selector);
|
|
w[50] = __byte_perm_S (w[18], w[19], selector);
|
|
w[49] = __byte_perm_S (w[17], w[18], selector);
|
|
w[48] = __byte_perm_S (w[16], w[17], selector);
|
|
w[47] = __byte_perm_S (w[15], w[16], selector);
|
|
w[46] = __byte_perm_S (w[14], w[15], selector);
|
|
w[45] = __byte_perm_S (w[13], w[14], selector);
|
|
w[44] = __byte_perm_S (w[12], w[13], selector);
|
|
w[43] = __byte_perm_S (w[11], w[12], selector);
|
|
w[42] = __byte_perm_S (w[10], w[11], selector);
|
|
w[41] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[40] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[39] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[38] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[37] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[36] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[35] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[34] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[33] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[32] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[31] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = __byte_perm_S (w[30], w[31], selector);
|
|
w[62] = __byte_perm_S (w[29], w[30], selector);
|
|
w[61] = __byte_perm_S (w[28], w[29], selector);
|
|
w[60] = __byte_perm_S (w[27], w[28], selector);
|
|
w[59] = __byte_perm_S (w[26], w[27], selector);
|
|
w[58] = __byte_perm_S (w[25], w[26], selector);
|
|
w[57] = __byte_perm_S (w[24], w[25], selector);
|
|
w[56] = __byte_perm_S (w[23], w[24], selector);
|
|
w[55] = __byte_perm_S (w[22], w[23], selector);
|
|
w[54] = __byte_perm_S (w[21], w[22], selector);
|
|
w[53] = __byte_perm_S (w[20], w[21], selector);
|
|
w[52] = __byte_perm_S (w[19], w[20], selector);
|
|
w[51] = __byte_perm_S (w[18], w[19], selector);
|
|
w[50] = __byte_perm_S (w[17], w[18], selector);
|
|
w[49] = __byte_perm_S (w[16], w[17], selector);
|
|
w[48] = __byte_perm_S (w[15], w[16], selector);
|
|
w[47] = __byte_perm_S (w[14], w[15], selector);
|
|
w[46] = __byte_perm_S (w[13], w[14], selector);
|
|
w[45] = __byte_perm_S (w[12], w[13], selector);
|
|
w[44] = __byte_perm_S (w[11], w[12], selector);
|
|
w[43] = __byte_perm_S (w[10], w[11], selector);
|
|
w[42] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[41] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[40] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[39] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[38] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[37] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[36] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[35] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[34] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[33] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[32] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = __byte_perm_S (w[29], w[30], selector);
|
|
w[62] = __byte_perm_S (w[28], w[29], selector);
|
|
w[61] = __byte_perm_S (w[27], w[28], selector);
|
|
w[60] = __byte_perm_S (w[26], w[27], selector);
|
|
w[59] = __byte_perm_S (w[25], w[26], selector);
|
|
w[58] = __byte_perm_S (w[24], w[25], selector);
|
|
w[57] = __byte_perm_S (w[23], w[24], selector);
|
|
w[56] = __byte_perm_S (w[22], w[23], selector);
|
|
w[55] = __byte_perm_S (w[21], w[22], selector);
|
|
w[54] = __byte_perm_S (w[20], w[21], selector);
|
|
w[53] = __byte_perm_S (w[19], w[20], selector);
|
|
w[52] = __byte_perm_S (w[18], w[19], selector);
|
|
w[51] = __byte_perm_S (w[17], w[18], selector);
|
|
w[50] = __byte_perm_S (w[16], w[17], selector);
|
|
w[49] = __byte_perm_S (w[15], w[16], selector);
|
|
w[48] = __byte_perm_S (w[14], w[15], selector);
|
|
w[47] = __byte_perm_S (w[13], w[14], selector);
|
|
w[46] = __byte_perm_S (w[12], w[13], selector);
|
|
w[45] = __byte_perm_S (w[11], w[12], selector);
|
|
w[44] = __byte_perm_S (w[10], w[11], selector);
|
|
w[43] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[42] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[41] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[40] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[39] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[38] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[37] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[36] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[35] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[34] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[33] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = __byte_perm_S (w[28], w[29], selector);
|
|
w[62] = __byte_perm_S (w[27], w[28], selector);
|
|
w[61] = __byte_perm_S (w[26], w[27], selector);
|
|
w[60] = __byte_perm_S (w[25], w[26], selector);
|
|
w[59] = __byte_perm_S (w[24], w[25], selector);
|
|
w[58] = __byte_perm_S (w[23], w[24], selector);
|
|
w[57] = __byte_perm_S (w[22], w[23], selector);
|
|
w[56] = __byte_perm_S (w[21], w[22], selector);
|
|
w[55] = __byte_perm_S (w[20], w[21], selector);
|
|
w[54] = __byte_perm_S (w[19], w[20], selector);
|
|
w[53] = __byte_perm_S (w[18], w[19], selector);
|
|
w[52] = __byte_perm_S (w[17], w[18], selector);
|
|
w[51] = __byte_perm_S (w[16], w[17], selector);
|
|
w[50] = __byte_perm_S (w[15], w[16], selector);
|
|
w[49] = __byte_perm_S (w[14], w[15], selector);
|
|
w[48] = __byte_perm_S (w[13], w[14], selector);
|
|
w[47] = __byte_perm_S (w[12], w[13], selector);
|
|
w[46] = __byte_perm_S (w[11], w[12], selector);
|
|
w[45] = __byte_perm_S (w[10], w[11], selector);
|
|
w[44] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[43] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[42] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[41] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[40] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[39] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[38] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[37] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[36] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[35] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[34] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = __byte_perm_S (w[27], w[28], selector);
|
|
w[62] = __byte_perm_S (w[26], w[27], selector);
|
|
w[61] = __byte_perm_S (w[25], w[26], selector);
|
|
w[60] = __byte_perm_S (w[24], w[25], selector);
|
|
w[59] = __byte_perm_S (w[23], w[24], selector);
|
|
w[58] = __byte_perm_S (w[22], w[23], selector);
|
|
w[57] = __byte_perm_S (w[21], w[22], selector);
|
|
w[56] = __byte_perm_S (w[20], w[21], selector);
|
|
w[55] = __byte_perm_S (w[19], w[20], selector);
|
|
w[54] = __byte_perm_S (w[18], w[19], selector);
|
|
w[53] = __byte_perm_S (w[17], w[18], selector);
|
|
w[52] = __byte_perm_S (w[16], w[17], selector);
|
|
w[51] = __byte_perm_S (w[15], w[16], selector);
|
|
w[50] = __byte_perm_S (w[14], w[15], selector);
|
|
w[49] = __byte_perm_S (w[13], w[14], selector);
|
|
w[48] = __byte_perm_S (w[12], w[13], selector);
|
|
w[47] = __byte_perm_S (w[11], w[12], selector);
|
|
w[46] = __byte_perm_S (w[10], w[11], selector);
|
|
w[45] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[44] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[43] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[42] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[41] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[40] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[39] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[38] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[37] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[36] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[35] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = __byte_perm_S (w[26], w[27], selector);
|
|
w[62] = __byte_perm_S (w[25], w[26], selector);
|
|
w[61] = __byte_perm_S (w[24], w[25], selector);
|
|
w[60] = __byte_perm_S (w[23], w[24], selector);
|
|
w[59] = __byte_perm_S (w[22], w[23], selector);
|
|
w[58] = __byte_perm_S (w[21], w[22], selector);
|
|
w[57] = __byte_perm_S (w[20], w[21], selector);
|
|
w[56] = __byte_perm_S (w[19], w[20], selector);
|
|
w[55] = __byte_perm_S (w[18], w[19], selector);
|
|
w[54] = __byte_perm_S (w[17], w[18], selector);
|
|
w[53] = __byte_perm_S (w[16], w[17], selector);
|
|
w[52] = __byte_perm_S (w[15], w[16], selector);
|
|
w[51] = __byte_perm_S (w[14], w[15], selector);
|
|
w[50] = __byte_perm_S (w[13], w[14], selector);
|
|
w[49] = __byte_perm_S (w[12], w[13], selector);
|
|
w[48] = __byte_perm_S (w[11], w[12], selector);
|
|
w[47] = __byte_perm_S (w[10], w[11], selector);
|
|
w[46] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[45] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[44] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[43] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[42] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[41] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[40] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[39] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[38] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[37] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[36] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = __byte_perm_S (w[25], w[26], selector);
|
|
w[62] = __byte_perm_S (w[24], w[25], selector);
|
|
w[61] = __byte_perm_S (w[23], w[24], selector);
|
|
w[60] = __byte_perm_S (w[22], w[23], selector);
|
|
w[59] = __byte_perm_S (w[21], w[22], selector);
|
|
w[58] = __byte_perm_S (w[20], w[21], selector);
|
|
w[57] = __byte_perm_S (w[19], w[20], selector);
|
|
w[56] = __byte_perm_S (w[18], w[19], selector);
|
|
w[55] = __byte_perm_S (w[17], w[18], selector);
|
|
w[54] = __byte_perm_S (w[16], w[17], selector);
|
|
w[53] = __byte_perm_S (w[15], w[16], selector);
|
|
w[52] = __byte_perm_S (w[14], w[15], selector);
|
|
w[51] = __byte_perm_S (w[13], w[14], selector);
|
|
w[50] = __byte_perm_S (w[12], w[13], selector);
|
|
w[49] = __byte_perm_S (w[11], w[12], selector);
|
|
w[48] = __byte_perm_S (w[10], w[11], selector);
|
|
w[47] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[46] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[45] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[44] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[43] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[42] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[41] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[40] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[39] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[38] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[37] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = __byte_perm_S (w[24], w[25], selector);
|
|
w[62] = __byte_perm_S (w[23], w[24], selector);
|
|
w[61] = __byte_perm_S (w[22], w[23], selector);
|
|
w[60] = __byte_perm_S (w[21], w[22], selector);
|
|
w[59] = __byte_perm_S (w[20], w[21], selector);
|
|
w[58] = __byte_perm_S (w[19], w[20], selector);
|
|
w[57] = __byte_perm_S (w[18], w[19], selector);
|
|
w[56] = __byte_perm_S (w[17], w[18], selector);
|
|
w[55] = __byte_perm_S (w[16], w[17], selector);
|
|
w[54] = __byte_perm_S (w[15], w[16], selector);
|
|
w[53] = __byte_perm_S (w[14], w[15], selector);
|
|
w[52] = __byte_perm_S (w[13], w[14], selector);
|
|
w[51] = __byte_perm_S (w[12], w[13], selector);
|
|
w[50] = __byte_perm_S (w[11], w[12], selector);
|
|
w[49] = __byte_perm_S (w[10], w[11], selector);
|
|
w[48] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[47] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[46] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[45] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[44] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[43] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[42] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[41] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[40] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[39] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[38] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = __byte_perm_S (w[23], w[24], selector);
|
|
w[62] = __byte_perm_S (w[22], w[23], selector);
|
|
w[61] = __byte_perm_S (w[21], w[22], selector);
|
|
w[60] = __byte_perm_S (w[20], w[21], selector);
|
|
w[59] = __byte_perm_S (w[19], w[20], selector);
|
|
w[58] = __byte_perm_S (w[18], w[19], selector);
|
|
w[57] = __byte_perm_S (w[17], w[18], selector);
|
|
w[56] = __byte_perm_S (w[16], w[17], selector);
|
|
w[55] = __byte_perm_S (w[15], w[16], selector);
|
|
w[54] = __byte_perm_S (w[14], w[15], selector);
|
|
w[53] = __byte_perm_S (w[13], w[14], selector);
|
|
w[52] = __byte_perm_S (w[12], w[13], selector);
|
|
w[51] = __byte_perm_S (w[11], w[12], selector);
|
|
w[50] = __byte_perm_S (w[10], w[11], selector);
|
|
w[49] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[48] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[47] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[46] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[45] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[44] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[43] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[42] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[41] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[40] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[39] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = __byte_perm_S (w[22], w[23], selector);
|
|
w[62] = __byte_perm_S (w[21], w[22], selector);
|
|
w[61] = __byte_perm_S (w[20], w[21], selector);
|
|
w[60] = __byte_perm_S (w[19], w[20], selector);
|
|
w[59] = __byte_perm_S (w[18], w[19], selector);
|
|
w[58] = __byte_perm_S (w[17], w[18], selector);
|
|
w[57] = __byte_perm_S (w[16], w[17], selector);
|
|
w[56] = __byte_perm_S (w[15], w[16], selector);
|
|
w[55] = __byte_perm_S (w[14], w[15], selector);
|
|
w[54] = __byte_perm_S (w[13], w[14], selector);
|
|
w[53] = __byte_perm_S (w[12], w[13], selector);
|
|
w[52] = __byte_perm_S (w[11], w[12], selector);
|
|
w[51] = __byte_perm_S (w[10], w[11], selector);
|
|
w[50] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[49] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[48] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[47] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[46] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[45] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[44] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[43] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[42] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[41] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[40] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = __byte_perm_S (w[21], w[22], selector);
|
|
w[62] = __byte_perm_S (w[20], w[21], selector);
|
|
w[61] = __byte_perm_S (w[19], w[20], selector);
|
|
w[60] = __byte_perm_S (w[18], w[19], selector);
|
|
w[59] = __byte_perm_S (w[17], w[18], selector);
|
|
w[58] = __byte_perm_S (w[16], w[17], selector);
|
|
w[57] = __byte_perm_S (w[15], w[16], selector);
|
|
w[56] = __byte_perm_S (w[14], w[15], selector);
|
|
w[55] = __byte_perm_S (w[13], w[14], selector);
|
|
w[54] = __byte_perm_S (w[12], w[13], selector);
|
|
w[53] = __byte_perm_S (w[11], w[12], selector);
|
|
w[52] = __byte_perm_S (w[10], w[11], selector);
|
|
w[51] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[50] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[49] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[48] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[47] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[46] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[45] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[44] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[43] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[42] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[41] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = __byte_perm_S (w[20], w[21], selector);
|
|
w[62] = __byte_perm_S (w[19], w[20], selector);
|
|
w[61] = __byte_perm_S (w[18], w[19], selector);
|
|
w[60] = __byte_perm_S (w[17], w[18], selector);
|
|
w[59] = __byte_perm_S (w[16], w[17], selector);
|
|
w[58] = __byte_perm_S (w[15], w[16], selector);
|
|
w[57] = __byte_perm_S (w[14], w[15], selector);
|
|
w[56] = __byte_perm_S (w[13], w[14], selector);
|
|
w[55] = __byte_perm_S (w[12], w[13], selector);
|
|
w[54] = __byte_perm_S (w[11], w[12], selector);
|
|
w[53] = __byte_perm_S (w[10], w[11], selector);
|
|
w[52] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[51] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[50] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[49] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[48] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[47] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[46] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[45] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[44] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[43] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[42] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = __byte_perm_S (w[19], w[20], selector);
|
|
w[62] = __byte_perm_S (w[18], w[19], selector);
|
|
w[61] = __byte_perm_S (w[17], w[18], selector);
|
|
w[60] = __byte_perm_S (w[16], w[17], selector);
|
|
w[59] = __byte_perm_S (w[15], w[16], selector);
|
|
w[58] = __byte_perm_S (w[14], w[15], selector);
|
|
w[57] = __byte_perm_S (w[13], w[14], selector);
|
|
w[56] = __byte_perm_S (w[12], w[13], selector);
|
|
w[55] = __byte_perm_S (w[11], w[12], selector);
|
|
w[54] = __byte_perm_S (w[10], w[11], selector);
|
|
w[53] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[52] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[51] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[50] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[49] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[48] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[47] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[46] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[45] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[44] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[43] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = __byte_perm_S (w[18], w[19], selector);
|
|
w[62] = __byte_perm_S (w[17], w[18], selector);
|
|
w[61] = __byte_perm_S (w[16], w[17], selector);
|
|
w[60] = __byte_perm_S (w[15], w[16], selector);
|
|
w[59] = __byte_perm_S (w[14], w[15], selector);
|
|
w[58] = __byte_perm_S (w[13], w[14], selector);
|
|
w[57] = __byte_perm_S (w[12], w[13], selector);
|
|
w[56] = __byte_perm_S (w[11], w[12], selector);
|
|
w[55] = __byte_perm_S (w[10], w[11], selector);
|
|
w[54] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[53] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[52] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[51] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[50] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[49] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[48] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[47] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[46] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[45] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[44] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = __byte_perm_S (w[17], w[18], selector);
|
|
w[62] = __byte_perm_S (w[16], w[17], selector);
|
|
w[61] = __byte_perm_S (w[15], w[16], selector);
|
|
w[60] = __byte_perm_S (w[14], w[15], selector);
|
|
w[59] = __byte_perm_S (w[13], w[14], selector);
|
|
w[58] = __byte_perm_S (w[12], w[13], selector);
|
|
w[57] = __byte_perm_S (w[11], w[12], selector);
|
|
w[56] = __byte_perm_S (w[10], w[11], selector);
|
|
w[55] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[54] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[53] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[52] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[51] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[50] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[49] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[48] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[47] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[46] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[45] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = __byte_perm_S (w[16], w[17], selector);
|
|
w[62] = __byte_perm_S (w[15], w[16], selector);
|
|
w[61] = __byte_perm_S (w[14], w[15], selector);
|
|
w[60] = __byte_perm_S (w[13], w[14], selector);
|
|
w[59] = __byte_perm_S (w[12], w[13], selector);
|
|
w[58] = __byte_perm_S (w[11], w[12], selector);
|
|
w[57] = __byte_perm_S (w[10], w[11], selector);
|
|
w[56] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[55] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[54] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[53] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[52] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[51] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[50] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[49] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[48] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[47] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[46] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = __byte_perm_S (w[15], w[16], selector);
|
|
w[62] = __byte_perm_S (w[14], w[15], selector);
|
|
w[61] = __byte_perm_S (w[13], w[14], selector);
|
|
w[60] = __byte_perm_S (w[12], w[13], selector);
|
|
w[59] = __byte_perm_S (w[11], w[12], selector);
|
|
w[58] = __byte_perm_S (w[10], w[11], selector);
|
|
w[57] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[56] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[55] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[54] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[53] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[52] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[51] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[50] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[49] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[48] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[47] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = __byte_perm_S (w[14], w[15], selector);
|
|
w[62] = __byte_perm_S (w[13], w[14], selector);
|
|
w[61] = __byte_perm_S (w[12], w[13], selector);
|
|
w[60] = __byte_perm_S (w[11], w[12], selector);
|
|
w[59] = __byte_perm_S (w[10], w[11], selector);
|
|
w[58] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[57] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[56] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[55] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[54] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[53] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[52] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[51] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[50] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[49] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[48] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = __byte_perm_S (w[13], w[14], selector);
|
|
w[62] = __byte_perm_S (w[12], w[13], selector);
|
|
w[61] = __byte_perm_S (w[11], w[12], selector);
|
|
w[60] = __byte_perm_S (w[10], w[11], selector);
|
|
w[59] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[58] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[57] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[56] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[55] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[54] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[53] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[52] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[51] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[50] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[49] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = __byte_perm_S (w[12], w[13], selector);
|
|
w[62] = __byte_perm_S (w[11], w[12], selector);
|
|
w[61] = __byte_perm_S (w[10], w[11], selector);
|
|
w[60] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[59] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[58] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[57] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[56] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[55] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[54] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[53] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[52] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[51] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[50] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = __byte_perm_S (w[11], w[12], selector);
|
|
w[62] = __byte_perm_S (w[10], w[11], selector);
|
|
w[61] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[60] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[59] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[58] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[57] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[56] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[55] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[54] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[53] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[52] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[51] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = __byte_perm_S (w[10], w[11], selector);
|
|
w[62] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[61] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[60] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[59] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[58] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[57] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[56] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[55] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[54] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[53] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[52] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = __byte_perm_S (w[ 9], w[10], selector);
|
|
w[62] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[61] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[60] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[59] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[58] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[57] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[56] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[55] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[54] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[53] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = __byte_perm_S (w[ 8], w[ 9], selector);
|
|
w[62] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[61] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[60] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[59] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[58] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[57] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[56] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[55] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[54] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = __byte_perm_S (w[ 7], w[ 8], selector);
|
|
w[62] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[61] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[60] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[59] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[58] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[57] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[56] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[55] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = __byte_perm_S (w[ 6], w[ 7], selector);
|
|
w[62] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[61] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[60] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[59] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[58] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[57] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[56] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = __byte_perm_S (w[ 5], w[ 6], selector);
|
|
w[62] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[61] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[60] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[59] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[58] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[57] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = __byte_perm_S (w[ 4], w[ 5], selector);
|
|
w[62] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[61] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[60] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[59] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[58] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = __byte_perm_S (w[ 3], w[ 4], selector);
|
|
w[62] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[61] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[60] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[59] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = __byte_perm_S (w[ 2], w[ 3], selector);
|
|
w[62] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[61] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[60] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = __byte_perm_S (w[ 1], w[ 2], selector);
|
|
w[62] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[61] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = __byte_perm_S (w[ 0], w[ 1], selector);
|
|
w[62] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = __byte_perm_S ( 0, w[ 0], selector);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_1x64_be_S (u32 w[64], const u32 offset)
|
|
{
|
|
#if defined IS_AMD_LEGACY || defined IS_GENERIC
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = amd_bytealign_S (w[62], w[63], offset);
|
|
w[62] = amd_bytealign_S (w[61], w[62], offset);
|
|
w[61] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[60] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[59] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[58] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[57] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[56] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[55] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[54] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[53] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[52] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[51] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[50] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[49] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[48] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[47] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[46] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[45] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[44] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[43] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[42] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[41] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[40] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[39] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[38] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[37] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[36] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[35] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[34] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[33] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[32] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[31] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[30] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[29] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[28] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[27] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[26] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[25] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[24] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[23] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[22] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[21] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[20] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[19] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[18] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[17] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[16] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[15] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[14] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[13] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[12] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[11] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[10] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 2] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 1] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 0] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = amd_bytealign_S (w[61], w[62], offset);
|
|
w[62] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[61] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[60] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[59] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[58] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[57] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[56] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[55] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[54] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[53] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[52] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[51] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[50] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[49] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[48] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[47] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[46] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[45] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[44] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[43] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[42] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[41] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[40] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[39] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[38] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[37] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[36] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[35] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[34] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[33] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[32] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[31] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[30] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[29] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[28] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[27] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[26] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[25] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[24] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[23] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[22] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[21] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[20] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[19] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[18] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[17] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[16] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[15] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[14] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[13] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[12] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[11] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[10] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 2] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 1] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = amd_bytealign_S (w[60], w[61], offset);
|
|
w[62] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[61] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[60] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[59] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[58] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[57] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[56] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[55] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[54] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[53] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[52] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[51] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[50] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[49] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[48] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[47] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[46] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[45] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[44] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[43] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[42] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[41] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[40] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[39] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[38] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[37] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[36] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[35] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[34] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[33] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[32] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[31] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[30] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[29] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[28] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[27] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[26] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[25] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[24] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[23] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[22] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[21] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[20] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[19] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[18] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[17] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[16] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[15] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[14] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[13] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[12] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[11] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[10] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 3] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 2] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = amd_bytealign_S (w[59], w[60], offset);
|
|
w[62] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[61] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[60] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[59] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[58] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[57] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[56] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[55] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[54] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[53] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[52] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[51] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[50] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[49] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[48] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[47] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[46] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[45] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[44] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[43] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[42] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[41] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[40] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[39] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[38] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[37] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[36] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[35] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[34] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[33] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[32] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[31] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[30] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[29] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[28] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[27] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[26] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[25] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[24] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[23] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[22] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[21] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[20] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[19] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[18] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[17] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[16] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[15] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[14] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[13] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[12] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[11] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[10] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 4] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 3] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = amd_bytealign_S (w[58], w[59], offset);
|
|
w[62] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[61] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[60] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[59] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[58] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[57] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[56] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[55] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[54] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[53] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[52] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[51] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[50] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[49] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[48] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[47] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[46] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[45] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[44] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[43] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[42] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[41] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[40] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[39] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[38] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[37] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[36] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[35] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[34] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[33] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[32] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[31] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[30] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[29] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[28] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[27] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[26] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[25] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[24] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[23] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[22] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[21] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[20] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[19] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[18] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[17] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[16] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[15] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[14] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[13] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[12] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[11] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[10] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 5] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 4] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = amd_bytealign_S (w[57], w[58], offset);
|
|
w[62] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[61] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[60] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[59] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[58] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[57] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[56] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[55] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[54] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[53] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[52] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[51] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[50] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[49] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[48] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[47] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[46] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[45] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[44] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[43] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[42] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[41] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[40] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[39] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[38] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[37] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[36] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[35] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[34] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[33] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[32] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[31] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[30] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[29] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[28] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[27] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[26] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[25] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[24] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[23] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[22] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[21] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[20] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[19] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[18] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[17] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[16] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[15] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[14] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[13] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[12] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[11] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[10] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 6] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 5] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = amd_bytealign_S (w[56], w[57], offset);
|
|
w[62] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[61] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[60] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[59] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[58] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[57] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[56] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[55] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[54] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[53] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[52] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[51] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[50] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[49] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[48] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[47] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[46] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[45] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[44] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[43] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[42] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[41] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[40] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[39] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[38] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[37] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[36] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[35] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[34] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[33] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[32] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[31] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[30] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[29] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[28] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[27] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[26] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[25] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[24] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[23] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[22] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[21] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[20] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[19] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[18] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[17] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[16] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[15] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[14] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[13] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[12] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[11] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[10] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 7] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 6] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = amd_bytealign_S (w[55], w[56], offset);
|
|
w[62] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[61] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[60] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[59] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[58] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[57] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[56] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[55] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[54] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[53] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[52] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[51] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[50] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[49] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[48] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[47] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[46] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[45] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[44] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[43] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[42] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[41] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[40] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[39] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[38] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[37] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[36] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[35] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[34] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[33] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[32] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[31] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[30] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[29] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[28] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[27] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[26] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[25] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[24] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[23] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[22] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[21] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[20] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[19] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[18] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[17] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[16] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[15] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[14] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[13] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[12] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[11] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[10] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 8] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 7] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = amd_bytealign_S (w[54], w[55], offset);
|
|
w[62] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[61] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[60] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[59] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[58] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[57] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[56] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[55] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[54] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[53] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[52] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[51] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[50] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[49] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[48] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[47] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[46] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[45] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[44] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[43] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[42] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[41] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[40] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[39] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[38] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[37] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[36] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[35] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[34] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[33] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[32] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[31] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[30] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[29] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[28] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[27] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[26] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[25] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[24] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[23] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[22] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[21] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[20] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[19] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[18] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[17] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[16] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[15] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[14] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[13] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[12] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[11] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[10] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[ 9] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 8] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = amd_bytealign_S (w[53], w[54], offset);
|
|
w[62] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[61] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[60] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[59] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[58] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[57] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[56] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[55] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[54] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[53] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[52] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[51] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[50] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[49] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[48] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[47] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[46] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[45] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[44] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[43] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[42] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[41] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[40] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[39] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[38] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[37] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[36] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[35] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[34] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[33] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[32] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[31] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[30] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[29] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[28] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[27] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[26] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[25] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[24] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[23] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[22] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[21] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[20] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[19] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[18] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[17] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[16] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[15] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[14] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[13] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[12] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[11] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[10] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[ 9] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = amd_bytealign_S (w[52], w[53], offset);
|
|
w[62] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[61] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[60] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[59] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[58] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[57] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[56] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[55] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[54] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[53] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[52] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[51] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[50] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[49] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[48] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[47] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[46] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[45] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[44] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[43] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[42] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[41] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[40] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[39] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[38] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[37] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[36] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[35] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[34] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[33] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[32] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[31] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[30] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[29] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[28] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[27] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[26] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[25] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[24] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[23] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[22] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[21] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[20] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[19] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[18] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[17] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[16] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[15] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[14] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[13] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[12] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[11] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[10] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = amd_bytealign_S (w[51], w[52], offset);
|
|
w[62] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[61] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[60] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[59] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[58] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[57] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[56] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[55] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[54] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[53] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[52] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[51] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[50] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[49] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[48] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[47] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[46] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[45] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[44] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[43] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[42] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[41] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[40] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[39] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[38] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[37] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[36] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[35] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[34] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[33] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[32] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[31] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[30] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[29] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[28] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[27] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[26] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[25] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[24] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[23] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[22] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[21] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[20] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[19] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[18] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[17] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[16] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[15] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[14] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[13] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[12] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[11] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = amd_bytealign_S (w[50], w[51], offset);
|
|
w[62] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[61] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[60] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[59] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[58] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[57] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[56] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[55] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[54] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[53] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[52] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[51] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[50] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[49] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[48] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[47] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[46] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[45] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[44] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[43] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[42] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[41] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[40] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[39] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[38] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[37] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[36] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[35] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[34] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[33] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[32] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[31] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[30] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[29] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[28] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[27] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[26] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[25] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[24] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[23] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[22] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[21] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[20] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[19] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[18] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[17] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[16] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[15] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[14] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[13] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[12] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = amd_bytealign_S (w[49], w[50], offset);
|
|
w[62] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[61] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[60] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[59] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[58] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[57] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[56] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[55] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[54] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[53] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[52] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[51] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[50] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[49] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[48] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[47] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[46] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[45] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[44] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[43] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[42] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[41] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[40] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[39] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[38] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[37] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[36] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[35] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[34] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[33] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[32] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[31] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[30] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[29] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[28] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[27] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[26] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[25] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[24] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[23] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[22] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[21] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[20] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[19] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[18] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[17] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[16] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[15] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[14] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[13] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = amd_bytealign_S (w[48], w[49], offset);
|
|
w[62] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[61] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[60] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[59] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[58] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[57] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[56] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[55] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[54] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[53] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[52] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[51] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[50] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[49] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[48] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[47] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[46] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[45] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[44] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[43] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[42] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[41] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[40] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[39] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[38] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[37] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[36] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[35] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[34] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[33] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[32] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[31] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[30] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[29] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[28] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[27] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[26] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[25] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[24] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[23] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[22] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[21] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[20] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[19] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[18] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[17] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[16] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[15] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[14] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = amd_bytealign_S (w[47], w[48], offset);
|
|
w[62] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[61] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[60] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[59] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[58] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[57] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[56] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[55] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[54] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[53] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[52] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[51] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[50] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[49] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[48] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[47] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[46] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[45] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[44] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[43] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[42] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[41] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[40] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[39] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[38] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[37] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[36] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[35] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[34] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[33] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[32] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[31] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[30] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[29] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[28] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[27] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[26] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[25] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[24] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[23] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[22] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[21] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[20] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[19] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[18] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[17] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[16] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[15] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = amd_bytealign_S (w[46], w[47], offset);
|
|
w[62] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[61] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[60] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[59] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[58] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[57] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[56] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[55] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[54] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[53] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[52] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[51] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[50] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[49] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[48] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[47] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[46] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[45] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[44] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[43] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[42] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[41] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[40] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[39] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[38] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[37] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[36] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[35] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[34] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[33] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[32] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[31] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[30] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[29] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[28] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[27] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[26] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[25] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[24] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[23] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[22] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[21] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[20] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[19] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[18] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[17] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[16] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = amd_bytealign_S (w[45], w[46], offset);
|
|
w[62] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[61] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[60] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[59] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[58] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[57] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[56] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[55] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[54] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[53] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[52] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[51] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[50] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[49] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[48] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[47] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[46] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[45] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[44] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[43] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[42] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[41] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[40] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[39] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[38] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[37] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[36] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[35] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[34] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[33] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[32] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[31] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[30] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[29] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[28] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[27] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[26] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[25] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[24] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[23] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[22] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[21] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[20] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[19] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[18] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[17] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = amd_bytealign_S (w[44], w[45], offset);
|
|
w[62] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[61] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[60] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[59] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[58] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[57] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[56] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[55] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[54] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[53] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[52] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[51] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[50] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[49] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[48] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[47] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[46] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[45] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[44] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[43] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[42] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[41] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[40] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[39] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[38] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[37] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[36] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[35] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[34] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[33] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[32] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[31] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[30] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[29] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[28] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[27] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[26] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[25] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[24] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[23] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[22] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[21] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[20] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[19] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[18] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = amd_bytealign_S (w[43], w[44], offset);
|
|
w[62] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[61] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[60] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[59] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[58] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[57] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[56] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[55] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[54] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[53] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[52] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[51] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[50] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[49] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[48] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[47] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[46] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[45] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[44] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[43] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[42] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[41] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[40] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[39] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[38] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[37] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[36] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[35] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[34] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[33] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[32] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[31] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[30] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[29] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[28] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[27] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[26] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[25] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[24] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[23] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[22] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[21] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[20] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[19] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = amd_bytealign_S (w[42], w[43], offset);
|
|
w[62] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[61] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[60] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[59] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[58] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[57] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[56] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[55] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[54] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[53] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[52] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[51] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[50] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[49] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[48] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[47] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[46] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[45] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[44] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[43] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[42] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[41] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[40] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[39] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[38] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[37] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[36] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[35] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[34] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[33] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[32] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[31] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[30] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[29] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[28] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[27] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[26] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[25] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[24] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[23] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[22] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[21] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[20] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = amd_bytealign_S (w[41], w[42], offset);
|
|
w[62] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[61] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[60] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[59] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[58] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[57] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[56] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[55] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[54] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[53] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[52] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[51] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[50] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[49] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[48] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[47] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[46] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[45] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[44] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[43] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[42] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[41] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[40] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[39] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[38] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[37] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[36] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[35] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[34] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[33] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[32] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[31] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[30] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[29] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[28] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[27] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[26] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[25] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[24] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[23] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[22] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[21] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = amd_bytealign_S (w[40], w[41], offset);
|
|
w[62] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[61] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[60] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[59] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[58] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[57] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[56] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[55] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[54] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[53] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[52] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[51] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[50] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[49] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[48] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[47] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[46] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[45] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[44] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[43] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[42] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[41] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[40] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[39] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[38] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[37] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[36] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[35] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[34] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[33] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[32] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[31] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[30] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[29] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[28] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[27] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[26] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[25] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[24] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[23] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[22] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = amd_bytealign_S (w[39], w[40], offset);
|
|
w[62] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[61] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[60] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[59] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[58] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[57] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[56] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[55] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[54] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[53] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[52] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[51] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[50] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[49] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[48] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[47] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[46] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[45] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[44] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[43] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[42] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[41] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[40] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[39] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[38] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[37] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[36] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[35] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[34] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[33] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[32] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[31] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[30] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[29] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[28] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[27] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[26] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[25] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[24] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[23] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = amd_bytealign_S (w[38], w[39], offset);
|
|
w[62] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[61] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[60] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[59] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[58] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[57] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[56] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[55] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[54] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[53] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[52] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[51] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[50] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[49] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[48] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[47] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[46] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[45] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[44] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[43] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[42] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[41] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[40] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[39] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[38] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[37] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[36] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[35] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[34] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[33] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[32] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[31] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[30] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[29] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[28] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[27] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[26] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[25] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[24] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = amd_bytealign_S (w[37], w[38], offset);
|
|
w[62] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[61] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[60] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[59] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[58] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[57] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[56] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[55] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[54] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[53] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[52] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[51] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[50] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[49] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[48] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[47] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[46] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[45] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[44] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[43] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[42] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[41] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[40] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[39] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[38] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[37] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[36] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[35] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[34] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[33] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[32] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[31] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[30] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[29] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[28] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[27] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[26] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[25] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = amd_bytealign_S (w[36], w[37], offset);
|
|
w[62] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[61] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[60] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[59] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[58] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[57] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[56] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[55] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[54] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[53] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[52] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[51] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[50] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[49] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[48] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[47] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[46] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[45] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[44] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[43] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[42] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[41] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[40] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[39] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[38] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[37] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[36] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[35] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[34] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[33] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[32] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[31] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[30] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[29] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[28] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[27] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[26] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = amd_bytealign_S (w[35], w[36], offset);
|
|
w[62] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[61] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[60] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[59] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[58] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[57] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[56] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[55] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[54] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[53] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[52] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[51] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[50] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[49] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[48] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[47] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[46] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[45] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[44] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[43] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[42] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[41] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[40] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[39] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[38] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[37] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[36] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[35] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[34] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[33] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[32] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[31] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[30] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[29] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[28] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[27] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = amd_bytealign_S (w[34], w[35], offset);
|
|
w[62] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[61] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[60] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[59] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[58] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[57] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[56] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[55] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[54] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[53] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[52] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[51] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[50] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[49] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[48] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[47] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[46] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[45] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[44] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[43] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[42] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[41] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[40] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[39] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[38] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[37] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[36] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[35] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[34] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[33] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[32] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[31] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[30] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[29] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[28] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = amd_bytealign_S (w[33], w[34], offset);
|
|
w[62] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[61] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[60] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[59] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[58] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[57] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[56] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[55] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[54] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[53] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[52] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[51] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[50] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[49] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[48] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[47] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[46] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[45] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[44] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[43] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[42] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[41] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[40] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[39] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[38] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[37] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[36] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[35] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[34] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[33] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[32] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[31] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[30] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[29] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = amd_bytealign_S (w[32], w[33], offset);
|
|
w[62] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[61] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[60] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[59] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[58] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[57] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[56] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[55] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[54] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[53] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[52] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[51] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[50] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[49] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[48] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[47] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[46] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[45] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[44] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[43] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[42] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[41] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[40] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[39] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[38] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[37] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[36] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[35] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[34] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[33] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[32] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[31] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[30] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = amd_bytealign_S (w[31], w[32], offset);
|
|
w[62] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[61] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[60] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[59] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[58] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[57] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[56] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[55] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[54] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[53] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[52] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[51] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[50] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[49] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[48] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[47] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[46] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[45] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[44] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[43] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[42] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[41] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[40] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[39] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[38] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[37] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[36] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[35] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[34] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[33] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[32] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[31] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = amd_bytealign_S (w[30], w[31], offset);
|
|
w[62] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[61] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[60] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[59] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[58] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[57] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[56] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[55] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[54] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[53] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[52] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[51] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[50] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[49] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[48] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[47] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[46] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[45] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[44] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[43] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[42] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[41] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[40] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[39] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[38] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[37] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[36] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[35] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[34] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[33] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[32] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = amd_bytealign_S (w[29], w[30], offset);
|
|
w[62] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[61] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[60] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[59] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[58] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[57] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[56] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[55] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[54] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[53] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[52] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[51] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[50] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[49] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[48] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[47] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[46] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[45] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[44] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[43] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[42] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[41] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[40] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[39] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[38] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[37] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[36] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[35] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[34] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[33] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = amd_bytealign_S (w[28], w[29], offset);
|
|
w[62] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[61] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[60] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[59] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[58] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[57] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[56] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[55] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[54] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[53] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[52] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[51] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[50] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[49] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[48] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[47] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[46] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[45] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[44] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[43] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[42] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[41] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[40] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[39] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[38] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[37] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[36] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[35] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[34] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = amd_bytealign_S (w[27], w[28], offset);
|
|
w[62] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[61] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[60] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[59] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[58] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[57] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[56] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[55] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[54] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[53] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[52] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[51] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[50] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[49] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[48] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[47] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[46] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[45] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[44] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[43] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[42] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[41] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[40] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[39] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[38] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[37] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[36] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[35] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = amd_bytealign_S (w[26], w[27], offset);
|
|
w[62] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[61] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[60] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[59] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[58] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[57] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[56] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[55] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[54] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[53] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[52] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[51] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[50] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[49] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[48] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[47] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[46] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[45] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[44] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[43] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[42] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[41] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[40] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[39] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[38] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[37] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[36] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = amd_bytealign_S (w[25], w[26], offset);
|
|
w[62] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[61] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[60] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[59] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[58] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[57] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[56] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[55] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[54] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[53] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[52] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[51] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[50] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[49] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[48] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[47] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[46] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[45] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[44] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[43] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[42] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[41] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[40] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[39] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[38] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[37] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = amd_bytealign_S (w[24], w[25], offset);
|
|
w[62] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[61] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[60] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[59] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[58] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[57] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[56] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[55] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[54] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[53] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[52] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[51] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[50] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[49] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[48] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[47] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[46] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[45] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[44] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[43] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[42] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[41] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[40] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[39] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[38] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = amd_bytealign_S (w[23], w[24], offset);
|
|
w[62] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[61] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[60] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[59] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[58] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[57] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[56] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[55] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[54] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[53] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[52] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[51] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[50] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[49] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[48] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[47] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[46] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[45] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[44] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[43] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[42] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[41] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[40] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[39] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = amd_bytealign_S (w[22], w[23], offset);
|
|
w[62] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[61] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[60] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[59] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[58] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[57] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[56] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[55] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[54] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[53] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[52] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[51] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[50] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[49] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[48] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[47] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[46] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[45] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[44] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[43] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[42] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[41] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[40] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = amd_bytealign_S (w[21], w[22], offset);
|
|
w[62] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[61] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[60] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[59] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[58] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[57] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[56] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[55] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[54] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[53] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[52] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[51] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[50] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[49] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[48] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[47] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[46] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[45] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[44] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[43] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[42] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[41] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = amd_bytealign_S (w[20], w[21], offset);
|
|
w[62] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[61] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[60] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[59] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[58] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[57] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[56] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[55] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[54] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[53] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[52] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[51] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[50] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[49] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[48] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[47] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[46] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[45] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[44] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[43] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[42] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = amd_bytealign_S (w[19], w[20], offset);
|
|
w[62] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[61] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[60] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[59] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[58] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[57] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[56] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[55] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[54] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[53] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[52] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[51] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[50] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[49] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[48] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[47] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[46] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[45] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[44] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[43] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = amd_bytealign_S (w[18], w[19], offset);
|
|
w[62] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[61] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[60] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[59] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[58] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[57] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[56] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[55] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[54] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[53] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[52] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[51] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[50] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[49] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[48] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[47] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[46] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[45] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[44] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = amd_bytealign_S (w[17], w[18], offset);
|
|
w[62] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[61] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[60] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[59] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[58] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[57] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[56] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[55] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[54] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[53] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[52] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[51] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[50] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[49] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[48] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[47] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[46] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[45] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = amd_bytealign_S (w[16], w[17], offset);
|
|
w[62] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[61] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[60] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[59] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[58] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[57] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[56] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[55] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[54] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[53] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[52] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[51] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[50] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[49] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[48] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[47] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[46] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = amd_bytealign_S (w[15], w[16], offset);
|
|
w[62] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[61] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[60] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[59] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[58] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[57] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[56] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[55] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[54] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[53] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[52] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[51] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[50] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[49] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[48] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[47] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = amd_bytealign_S (w[14], w[15], offset);
|
|
w[62] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[61] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[60] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[59] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[58] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[57] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[56] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[55] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[54] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[53] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[52] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[51] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[50] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[49] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[48] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = amd_bytealign_S (w[13], w[14], offset);
|
|
w[62] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[61] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[60] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[59] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[58] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[57] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[56] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[55] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[54] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[53] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[52] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[51] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[50] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[49] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = amd_bytealign_S (w[12], w[13], offset);
|
|
w[62] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[61] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[60] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[59] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[58] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[57] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[56] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[55] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[54] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[53] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[52] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[51] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[50] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = amd_bytealign_S (w[11], w[12], offset);
|
|
w[62] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[61] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[60] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[59] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[58] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[57] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[56] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[55] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[54] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[53] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[52] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[51] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = amd_bytealign_S (w[10], w[11], offset);
|
|
w[62] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[61] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[60] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[59] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[58] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[57] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[56] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[55] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[54] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[53] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[52] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = amd_bytealign_S (w[ 9], w[10], offset);
|
|
w[62] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[61] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[60] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[59] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[58] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[57] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[56] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[55] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[54] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[53] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = amd_bytealign_S (w[ 8], w[ 9], offset);
|
|
w[62] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[61] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[60] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[59] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[58] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[57] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[56] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[55] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[54] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = amd_bytealign_S (w[ 7], w[ 8], offset);
|
|
w[62] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[61] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[60] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[59] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[58] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[57] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[56] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[55] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = amd_bytealign_S (w[ 6], w[ 7], offset);
|
|
w[62] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[61] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[60] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[59] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[58] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[57] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[56] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = amd_bytealign_S (w[ 5], w[ 6], offset);
|
|
w[62] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[61] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[60] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[59] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[58] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[57] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = amd_bytealign_S (w[ 4], w[ 5], offset);
|
|
w[62] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[61] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[60] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[59] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[58] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = amd_bytealign_S (w[ 3], w[ 4], offset);
|
|
w[62] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[61] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[60] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[59] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = amd_bytealign_S (w[ 2], w[ 3], offset);
|
|
w[62] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[61] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[60] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = amd_bytealign_S (w[ 1], w[ 2], offset);
|
|
w[62] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[61] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = amd_bytealign_S (w[ 0], w[ 1], offset);
|
|
w[62] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = amd_bytealign_S ( 0, w[ 0], offset);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM || defined IS_NV
|
|
|
|
#if defined IS_NV
|
|
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
|
#endif
|
|
|
|
#if defined IS_AMD_ROCM
|
|
const int selector = 0x0706050403020100 >> ((offset & 3) * 8);
|
|
#endif
|
|
|
|
switch (offset / 4)
|
|
{
|
|
case 0:
|
|
w[63] = __byte_perm_S (w[63], w[62], selector);
|
|
w[62] = __byte_perm_S (w[62], w[61], selector);
|
|
w[61] = __byte_perm_S (w[61], w[60], selector);
|
|
w[60] = __byte_perm_S (w[60], w[59], selector);
|
|
w[59] = __byte_perm_S (w[59], w[58], selector);
|
|
w[58] = __byte_perm_S (w[58], w[57], selector);
|
|
w[57] = __byte_perm_S (w[57], w[56], selector);
|
|
w[56] = __byte_perm_S (w[56], w[55], selector);
|
|
w[55] = __byte_perm_S (w[55], w[54], selector);
|
|
w[54] = __byte_perm_S (w[54], w[53], selector);
|
|
w[53] = __byte_perm_S (w[53], w[52], selector);
|
|
w[52] = __byte_perm_S (w[52], w[51], selector);
|
|
w[51] = __byte_perm_S (w[51], w[50], selector);
|
|
w[50] = __byte_perm_S (w[50], w[49], selector);
|
|
w[49] = __byte_perm_S (w[49], w[48], selector);
|
|
w[48] = __byte_perm_S (w[48], w[47], selector);
|
|
w[47] = __byte_perm_S (w[47], w[46], selector);
|
|
w[46] = __byte_perm_S (w[46], w[45], selector);
|
|
w[45] = __byte_perm_S (w[45], w[44], selector);
|
|
w[44] = __byte_perm_S (w[44], w[43], selector);
|
|
w[43] = __byte_perm_S (w[43], w[42], selector);
|
|
w[42] = __byte_perm_S (w[42], w[41], selector);
|
|
w[41] = __byte_perm_S (w[41], w[40], selector);
|
|
w[40] = __byte_perm_S (w[40], w[39], selector);
|
|
w[39] = __byte_perm_S (w[39], w[38], selector);
|
|
w[38] = __byte_perm_S (w[38], w[37], selector);
|
|
w[37] = __byte_perm_S (w[37], w[36], selector);
|
|
w[36] = __byte_perm_S (w[36], w[35], selector);
|
|
w[35] = __byte_perm_S (w[35], w[34], selector);
|
|
w[34] = __byte_perm_S (w[34], w[33], selector);
|
|
w[33] = __byte_perm_S (w[33], w[32], selector);
|
|
w[32] = __byte_perm_S (w[32], w[31], selector);
|
|
w[31] = __byte_perm_S (w[31], w[30], selector);
|
|
w[30] = __byte_perm_S (w[30], w[29], selector);
|
|
w[29] = __byte_perm_S (w[29], w[28], selector);
|
|
w[28] = __byte_perm_S (w[28], w[27], selector);
|
|
w[27] = __byte_perm_S (w[27], w[26], selector);
|
|
w[26] = __byte_perm_S (w[26], w[25], selector);
|
|
w[25] = __byte_perm_S (w[25], w[24], selector);
|
|
w[24] = __byte_perm_S (w[24], w[23], selector);
|
|
w[23] = __byte_perm_S (w[23], w[22], selector);
|
|
w[22] = __byte_perm_S (w[22], w[21], selector);
|
|
w[21] = __byte_perm_S (w[21], w[20], selector);
|
|
w[20] = __byte_perm_S (w[20], w[19], selector);
|
|
w[19] = __byte_perm_S (w[19], w[18], selector);
|
|
w[18] = __byte_perm_S (w[18], w[17], selector);
|
|
w[17] = __byte_perm_S (w[17], w[16], selector);
|
|
w[16] = __byte_perm_S (w[16], w[15], selector);
|
|
w[15] = __byte_perm_S (w[15], w[14], selector);
|
|
w[14] = __byte_perm_S (w[14], w[13], selector);
|
|
w[13] = __byte_perm_S (w[13], w[12], selector);
|
|
w[12] = __byte_perm_S (w[12], w[11], selector);
|
|
w[11] = __byte_perm_S (w[11], w[10], selector);
|
|
w[10] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[ 9] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[ 8] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[ 7] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[ 6] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[ 5] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 4] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 3] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 2] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 1] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 0] = __byte_perm_S (w[ 0], 0, selector);
|
|
|
|
break;
|
|
|
|
case 1:
|
|
w[63] = __byte_perm_S (w[62], w[61], selector);
|
|
w[62] = __byte_perm_S (w[61], w[60], selector);
|
|
w[61] = __byte_perm_S (w[60], w[59], selector);
|
|
w[60] = __byte_perm_S (w[59], w[58], selector);
|
|
w[59] = __byte_perm_S (w[58], w[57], selector);
|
|
w[58] = __byte_perm_S (w[57], w[56], selector);
|
|
w[57] = __byte_perm_S (w[56], w[55], selector);
|
|
w[56] = __byte_perm_S (w[55], w[54], selector);
|
|
w[55] = __byte_perm_S (w[54], w[53], selector);
|
|
w[54] = __byte_perm_S (w[53], w[52], selector);
|
|
w[53] = __byte_perm_S (w[52], w[51], selector);
|
|
w[52] = __byte_perm_S (w[51], w[50], selector);
|
|
w[51] = __byte_perm_S (w[50], w[49], selector);
|
|
w[50] = __byte_perm_S (w[49], w[48], selector);
|
|
w[49] = __byte_perm_S (w[48], w[47], selector);
|
|
w[48] = __byte_perm_S (w[47], w[46], selector);
|
|
w[47] = __byte_perm_S (w[46], w[45], selector);
|
|
w[46] = __byte_perm_S (w[45], w[44], selector);
|
|
w[45] = __byte_perm_S (w[44], w[43], selector);
|
|
w[44] = __byte_perm_S (w[43], w[42], selector);
|
|
w[43] = __byte_perm_S (w[42], w[41], selector);
|
|
w[42] = __byte_perm_S (w[41], w[40], selector);
|
|
w[41] = __byte_perm_S (w[40], w[39], selector);
|
|
w[40] = __byte_perm_S (w[39], w[38], selector);
|
|
w[39] = __byte_perm_S (w[38], w[37], selector);
|
|
w[38] = __byte_perm_S (w[37], w[36], selector);
|
|
w[37] = __byte_perm_S (w[36], w[35], selector);
|
|
w[36] = __byte_perm_S (w[35], w[34], selector);
|
|
w[35] = __byte_perm_S (w[34], w[33], selector);
|
|
w[34] = __byte_perm_S (w[33], w[32], selector);
|
|
w[33] = __byte_perm_S (w[32], w[31], selector);
|
|
w[32] = __byte_perm_S (w[31], w[30], selector);
|
|
w[31] = __byte_perm_S (w[30], w[29], selector);
|
|
w[30] = __byte_perm_S (w[29], w[28], selector);
|
|
w[29] = __byte_perm_S (w[28], w[27], selector);
|
|
w[28] = __byte_perm_S (w[27], w[26], selector);
|
|
w[27] = __byte_perm_S (w[26], w[25], selector);
|
|
w[26] = __byte_perm_S (w[25], w[24], selector);
|
|
w[25] = __byte_perm_S (w[24], w[23], selector);
|
|
w[24] = __byte_perm_S (w[23], w[22], selector);
|
|
w[23] = __byte_perm_S (w[22], w[21], selector);
|
|
w[22] = __byte_perm_S (w[21], w[20], selector);
|
|
w[21] = __byte_perm_S (w[20], w[19], selector);
|
|
w[20] = __byte_perm_S (w[19], w[18], selector);
|
|
w[19] = __byte_perm_S (w[18], w[17], selector);
|
|
w[18] = __byte_perm_S (w[17], w[16], selector);
|
|
w[17] = __byte_perm_S (w[16], w[15], selector);
|
|
w[16] = __byte_perm_S (w[15], w[14], selector);
|
|
w[15] = __byte_perm_S (w[14], w[13], selector);
|
|
w[14] = __byte_perm_S (w[13], w[12], selector);
|
|
w[13] = __byte_perm_S (w[12], w[11], selector);
|
|
w[12] = __byte_perm_S (w[11], w[10], selector);
|
|
w[11] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[10] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[ 9] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[ 8] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[ 7] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[ 6] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 5] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 4] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 3] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 2] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 1] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
w[63] = __byte_perm_S (w[61], w[60], selector);
|
|
w[62] = __byte_perm_S (w[60], w[59], selector);
|
|
w[61] = __byte_perm_S (w[59], w[58], selector);
|
|
w[60] = __byte_perm_S (w[58], w[57], selector);
|
|
w[59] = __byte_perm_S (w[57], w[56], selector);
|
|
w[58] = __byte_perm_S (w[56], w[55], selector);
|
|
w[57] = __byte_perm_S (w[55], w[54], selector);
|
|
w[56] = __byte_perm_S (w[54], w[53], selector);
|
|
w[55] = __byte_perm_S (w[53], w[52], selector);
|
|
w[54] = __byte_perm_S (w[52], w[51], selector);
|
|
w[53] = __byte_perm_S (w[51], w[50], selector);
|
|
w[52] = __byte_perm_S (w[50], w[49], selector);
|
|
w[51] = __byte_perm_S (w[49], w[48], selector);
|
|
w[50] = __byte_perm_S (w[48], w[47], selector);
|
|
w[49] = __byte_perm_S (w[47], w[46], selector);
|
|
w[48] = __byte_perm_S (w[46], w[45], selector);
|
|
w[47] = __byte_perm_S (w[45], w[44], selector);
|
|
w[46] = __byte_perm_S (w[44], w[43], selector);
|
|
w[45] = __byte_perm_S (w[43], w[42], selector);
|
|
w[44] = __byte_perm_S (w[42], w[41], selector);
|
|
w[43] = __byte_perm_S (w[41], w[40], selector);
|
|
w[42] = __byte_perm_S (w[40], w[39], selector);
|
|
w[41] = __byte_perm_S (w[39], w[38], selector);
|
|
w[40] = __byte_perm_S (w[38], w[37], selector);
|
|
w[39] = __byte_perm_S (w[37], w[36], selector);
|
|
w[38] = __byte_perm_S (w[36], w[35], selector);
|
|
w[37] = __byte_perm_S (w[35], w[34], selector);
|
|
w[36] = __byte_perm_S (w[34], w[33], selector);
|
|
w[35] = __byte_perm_S (w[33], w[32], selector);
|
|
w[34] = __byte_perm_S (w[32], w[31], selector);
|
|
w[33] = __byte_perm_S (w[31], w[30], selector);
|
|
w[32] = __byte_perm_S (w[30], w[29], selector);
|
|
w[31] = __byte_perm_S (w[29], w[28], selector);
|
|
w[30] = __byte_perm_S (w[28], w[27], selector);
|
|
w[29] = __byte_perm_S (w[27], w[26], selector);
|
|
w[28] = __byte_perm_S (w[26], w[25], selector);
|
|
w[27] = __byte_perm_S (w[25], w[24], selector);
|
|
w[26] = __byte_perm_S (w[24], w[23], selector);
|
|
w[25] = __byte_perm_S (w[23], w[22], selector);
|
|
w[24] = __byte_perm_S (w[22], w[21], selector);
|
|
w[23] = __byte_perm_S (w[21], w[20], selector);
|
|
w[22] = __byte_perm_S (w[20], w[19], selector);
|
|
w[21] = __byte_perm_S (w[19], w[18], selector);
|
|
w[20] = __byte_perm_S (w[18], w[17], selector);
|
|
w[19] = __byte_perm_S (w[17], w[16], selector);
|
|
w[18] = __byte_perm_S (w[16], w[15], selector);
|
|
w[17] = __byte_perm_S (w[15], w[14], selector);
|
|
w[16] = __byte_perm_S (w[14], w[13], selector);
|
|
w[15] = __byte_perm_S (w[13], w[12], selector);
|
|
w[14] = __byte_perm_S (w[12], w[11], selector);
|
|
w[13] = __byte_perm_S (w[11], w[10], selector);
|
|
w[12] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[11] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[10] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[ 9] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[ 8] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[ 7] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 6] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 5] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 4] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 3] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 2] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
w[63] = __byte_perm_S (w[60], w[59], selector);
|
|
w[62] = __byte_perm_S (w[59], w[58], selector);
|
|
w[61] = __byte_perm_S (w[58], w[57], selector);
|
|
w[60] = __byte_perm_S (w[57], w[56], selector);
|
|
w[59] = __byte_perm_S (w[56], w[55], selector);
|
|
w[58] = __byte_perm_S (w[55], w[54], selector);
|
|
w[57] = __byte_perm_S (w[54], w[53], selector);
|
|
w[56] = __byte_perm_S (w[53], w[52], selector);
|
|
w[55] = __byte_perm_S (w[52], w[51], selector);
|
|
w[54] = __byte_perm_S (w[51], w[50], selector);
|
|
w[53] = __byte_perm_S (w[50], w[49], selector);
|
|
w[52] = __byte_perm_S (w[49], w[48], selector);
|
|
w[51] = __byte_perm_S (w[48], w[47], selector);
|
|
w[50] = __byte_perm_S (w[47], w[46], selector);
|
|
w[49] = __byte_perm_S (w[46], w[45], selector);
|
|
w[48] = __byte_perm_S (w[45], w[44], selector);
|
|
w[47] = __byte_perm_S (w[44], w[43], selector);
|
|
w[46] = __byte_perm_S (w[43], w[42], selector);
|
|
w[45] = __byte_perm_S (w[42], w[41], selector);
|
|
w[44] = __byte_perm_S (w[41], w[40], selector);
|
|
w[43] = __byte_perm_S (w[40], w[39], selector);
|
|
w[42] = __byte_perm_S (w[39], w[38], selector);
|
|
w[41] = __byte_perm_S (w[38], w[37], selector);
|
|
w[40] = __byte_perm_S (w[37], w[36], selector);
|
|
w[39] = __byte_perm_S (w[36], w[35], selector);
|
|
w[38] = __byte_perm_S (w[35], w[34], selector);
|
|
w[37] = __byte_perm_S (w[34], w[33], selector);
|
|
w[36] = __byte_perm_S (w[33], w[32], selector);
|
|
w[35] = __byte_perm_S (w[32], w[31], selector);
|
|
w[34] = __byte_perm_S (w[31], w[30], selector);
|
|
w[33] = __byte_perm_S (w[30], w[29], selector);
|
|
w[32] = __byte_perm_S (w[29], w[28], selector);
|
|
w[31] = __byte_perm_S (w[28], w[27], selector);
|
|
w[30] = __byte_perm_S (w[27], w[26], selector);
|
|
w[29] = __byte_perm_S (w[26], w[25], selector);
|
|
w[28] = __byte_perm_S (w[25], w[24], selector);
|
|
w[27] = __byte_perm_S (w[24], w[23], selector);
|
|
w[26] = __byte_perm_S (w[23], w[22], selector);
|
|
w[25] = __byte_perm_S (w[22], w[21], selector);
|
|
w[24] = __byte_perm_S (w[21], w[20], selector);
|
|
w[23] = __byte_perm_S (w[20], w[19], selector);
|
|
w[22] = __byte_perm_S (w[19], w[18], selector);
|
|
w[21] = __byte_perm_S (w[18], w[17], selector);
|
|
w[20] = __byte_perm_S (w[17], w[16], selector);
|
|
w[19] = __byte_perm_S (w[16], w[15], selector);
|
|
w[18] = __byte_perm_S (w[15], w[14], selector);
|
|
w[17] = __byte_perm_S (w[14], w[13], selector);
|
|
w[16] = __byte_perm_S (w[13], w[12], selector);
|
|
w[15] = __byte_perm_S (w[12], w[11], selector);
|
|
w[14] = __byte_perm_S (w[11], w[10], selector);
|
|
w[13] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[12] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[11] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[10] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[ 9] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[ 8] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 7] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 6] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 5] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 4] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 3] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
w[63] = __byte_perm_S (w[59], w[58], selector);
|
|
w[62] = __byte_perm_S (w[58], w[57], selector);
|
|
w[61] = __byte_perm_S (w[57], w[56], selector);
|
|
w[60] = __byte_perm_S (w[56], w[55], selector);
|
|
w[59] = __byte_perm_S (w[55], w[54], selector);
|
|
w[58] = __byte_perm_S (w[54], w[53], selector);
|
|
w[57] = __byte_perm_S (w[53], w[52], selector);
|
|
w[56] = __byte_perm_S (w[52], w[51], selector);
|
|
w[55] = __byte_perm_S (w[51], w[50], selector);
|
|
w[54] = __byte_perm_S (w[50], w[49], selector);
|
|
w[53] = __byte_perm_S (w[49], w[48], selector);
|
|
w[52] = __byte_perm_S (w[48], w[47], selector);
|
|
w[51] = __byte_perm_S (w[47], w[46], selector);
|
|
w[50] = __byte_perm_S (w[46], w[45], selector);
|
|
w[49] = __byte_perm_S (w[45], w[44], selector);
|
|
w[48] = __byte_perm_S (w[44], w[43], selector);
|
|
w[47] = __byte_perm_S (w[43], w[42], selector);
|
|
w[46] = __byte_perm_S (w[42], w[41], selector);
|
|
w[45] = __byte_perm_S (w[41], w[40], selector);
|
|
w[44] = __byte_perm_S (w[40], w[39], selector);
|
|
w[43] = __byte_perm_S (w[39], w[38], selector);
|
|
w[42] = __byte_perm_S (w[38], w[37], selector);
|
|
w[41] = __byte_perm_S (w[37], w[36], selector);
|
|
w[40] = __byte_perm_S (w[36], w[35], selector);
|
|
w[39] = __byte_perm_S (w[35], w[34], selector);
|
|
w[38] = __byte_perm_S (w[34], w[33], selector);
|
|
w[37] = __byte_perm_S (w[33], w[32], selector);
|
|
w[36] = __byte_perm_S (w[32], w[31], selector);
|
|
w[35] = __byte_perm_S (w[31], w[30], selector);
|
|
w[34] = __byte_perm_S (w[30], w[29], selector);
|
|
w[33] = __byte_perm_S (w[29], w[28], selector);
|
|
w[32] = __byte_perm_S (w[28], w[27], selector);
|
|
w[31] = __byte_perm_S (w[27], w[26], selector);
|
|
w[30] = __byte_perm_S (w[26], w[25], selector);
|
|
w[29] = __byte_perm_S (w[25], w[24], selector);
|
|
w[28] = __byte_perm_S (w[24], w[23], selector);
|
|
w[27] = __byte_perm_S (w[23], w[22], selector);
|
|
w[26] = __byte_perm_S (w[22], w[21], selector);
|
|
w[25] = __byte_perm_S (w[21], w[20], selector);
|
|
w[24] = __byte_perm_S (w[20], w[19], selector);
|
|
w[23] = __byte_perm_S (w[19], w[18], selector);
|
|
w[22] = __byte_perm_S (w[18], w[17], selector);
|
|
w[21] = __byte_perm_S (w[17], w[16], selector);
|
|
w[20] = __byte_perm_S (w[16], w[15], selector);
|
|
w[19] = __byte_perm_S (w[15], w[14], selector);
|
|
w[18] = __byte_perm_S (w[14], w[13], selector);
|
|
w[17] = __byte_perm_S (w[13], w[12], selector);
|
|
w[16] = __byte_perm_S (w[12], w[11], selector);
|
|
w[15] = __byte_perm_S (w[11], w[10], selector);
|
|
w[14] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[13] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[12] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[11] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[10] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[ 9] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 8] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 7] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 6] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 5] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 4] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 5:
|
|
w[63] = __byte_perm_S (w[58], w[57], selector);
|
|
w[62] = __byte_perm_S (w[57], w[56], selector);
|
|
w[61] = __byte_perm_S (w[56], w[55], selector);
|
|
w[60] = __byte_perm_S (w[55], w[54], selector);
|
|
w[59] = __byte_perm_S (w[54], w[53], selector);
|
|
w[58] = __byte_perm_S (w[53], w[52], selector);
|
|
w[57] = __byte_perm_S (w[52], w[51], selector);
|
|
w[56] = __byte_perm_S (w[51], w[50], selector);
|
|
w[55] = __byte_perm_S (w[50], w[49], selector);
|
|
w[54] = __byte_perm_S (w[49], w[48], selector);
|
|
w[53] = __byte_perm_S (w[48], w[47], selector);
|
|
w[52] = __byte_perm_S (w[47], w[46], selector);
|
|
w[51] = __byte_perm_S (w[46], w[45], selector);
|
|
w[50] = __byte_perm_S (w[45], w[44], selector);
|
|
w[49] = __byte_perm_S (w[44], w[43], selector);
|
|
w[48] = __byte_perm_S (w[43], w[42], selector);
|
|
w[47] = __byte_perm_S (w[42], w[41], selector);
|
|
w[46] = __byte_perm_S (w[41], w[40], selector);
|
|
w[45] = __byte_perm_S (w[40], w[39], selector);
|
|
w[44] = __byte_perm_S (w[39], w[38], selector);
|
|
w[43] = __byte_perm_S (w[38], w[37], selector);
|
|
w[42] = __byte_perm_S (w[37], w[36], selector);
|
|
w[41] = __byte_perm_S (w[36], w[35], selector);
|
|
w[40] = __byte_perm_S (w[35], w[34], selector);
|
|
w[39] = __byte_perm_S (w[34], w[33], selector);
|
|
w[38] = __byte_perm_S (w[33], w[32], selector);
|
|
w[37] = __byte_perm_S (w[32], w[31], selector);
|
|
w[36] = __byte_perm_S (w[31], w[30], selector);
|
|
w[35] = __byte_perm_S (w[30], w[29], selector);
|
|
w[34] = __byte_perm_S (w[29], w[28], selector);
|
|
w[33] = __byte_perm_S (w[28], w[27], selector);
|
|
w[32] = __byte_perm_S (w[27], w[26], selector);
|
|
w[31] = __byte_perm_S (w[26], w[25], selector);
|
|
w[30] = __byte_perm_S (w[25], w[24], selector);
|
|
w[29] = __byte_perm_S (w[24], w[23], selector);
|
|
w[28] = __byte_perm_S (w[23], w[22], selector);
|
|
w[27] = __byte_perm_S (w[22], w[21], selector);
|
|
w[26] = __byte_perm_S (w[21], w[20], selector);
|
|
w[25] = __byte_perm_S (w[20], w[19], selector);
|
|
w[24] = __byte_perm_S (w[19], w[18], selector);
|
|
w[23] = __byte_perm_S (w[18], w[17], selector);
|
|
w[22] = __byte_perm_S (w[17], w[16], selector);
|
|
w[21] = __byte_perm_S (w[16], w[15], selector);
|
|
w[20] = __byte_perm_S (w[15], w[14], selector);
|
|
w[19] = __byte_perm_S (w[14], w[13], selector);
|
|
w[18] = __byte_perm_S (w[13], w[12], selector);
|
|
w[17] = __byte_perm_S (w[12], w[11], selector);
|
|
w[16] = __byte_perm_S (w[11], w[10], selector);
|
|
w[15] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[14] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[13] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[12] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[11] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[10] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[ 9] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 8] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 7] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 6] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 5] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 6:
|
|
w[63] = __byte_perm_S (w[57], w[56], selector);
|
|
w[62] = __byte_perm_S (w[56], w[55], selector);
|
|
w[61] = __byte_perm_S (w[55], w[54], selector);
|
|
w[60] = __byte_perm_S (w[54], w[53], selector);
|
|
w[59] = __byte_perm_S (w[53], w[52], selector);
|
|
w[58] = __byte_perm_S (w[52], w[51], selector);
|
|
w[57] = __byte_perm_S (w[51], w[50], selector);
|
|
w[56] = __byte_perm_S (w[50], w[49], selector);
|
|
w[55] = __byte_perm_S (w[49], w[48], selector);
|
|
w[54] = __byte_perm_S (w[48], w[47], selector);
|
|
w[53] = __byte_perm_S (w[47], w[46], selector);
|
|
w[52] = __byte_perm_S (w[46], w[45], selector);
|
|
w[51] = __byte_perm_S (w[45], w[44], selector);
|
|
w[50] = __byte_perm_S (w[44], w[43], selector);
|
|
w[49] = __byte_perm_S (w[43], w[42], selector);
|
|
w[48] = __byte_perm_S (w[42], w[41], selector);
|
|
w[47] = __byte_perm_S (w[41], w[40], selector);
|
|
w[46] = __byte_perm_S (w[40], w[39], selector);
|
|
w[45] = __byte_perm_S (w[39], w[38], selector);
|
|
w[44] = __byte_perm_S (w[38], w[37], selector);
|
|
w[43] = __byte_perm_S (w[37], w[36], selector);
|
|
w[42] = __byte_perm_S (w[36], w[35], selector);
|
|
w[41] = __byte_perm_S (w[35], w[34], selector);
|
|
w[40] = __byte_perm_S (w[34], w[33], selector);
|
|
w[39] = __byte_perm_S (w[33], w[32], selector);
|
|
w[38] = __byte_perm_S (w[32], w[31], selector);
|
|
w[37] = __byte_perm_S (w[31], w[30], selector);
|
|
w[36] = __byte_perm_S (w[30], w[29], selector);
|
|
w[35] = __byte_perm_S (w[29], w[28], selector);
|
|
w[34] = __byte_perm_S (w[28], w[27], selector);
|
|
w[33] = __byte_perm_S (w[27], w[26], selector);
|
|
w[32] = __byte_perm_S (w[26], w[25], selector);
|
|
w[31] = __byte_perm_S (w[25], w[24], selector);
|
|
w[30] = __byte_perm_S (w[24], w[23], selector);
|
|
w[29] = __byte_perm_S (w[23], w[22], selector);
|
|
w[28] = __byte_perm_S (w[22], w[21], selector);
|
|
w[27] = __byte_perm_S (w[21], w[20], selector);
|
|
w[26] = __byte_perm_S (w[20], w[19], selector);
|
|
w[25] = __byte_perm_S (w[19], w[18], selector);
|
|
w[24] = __byte_perm_S (w[18], w[17], selector);
|
|
w[23] = __byte_perm_S (w[17], w[16], selector);
|
|
w[22] = __byte_perm_S (w[16], w[15], selector);
|
|
w[21] = __byte_perm_S (w[15], w[14], selector);
|
|
w[20] = __byte_perm_S (w[14], w[13], selector);
|
|
w[19] = __byte_perm_S (w[13], w[12], selector);
|
|
w[18] = __byte_perm_S (w[12], w[11], selector);
|
|
w[17] = __byte_perm_S (w[11], w[10], selector);
|
|
w[16] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[15] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[14] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[13] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[12] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[11] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[10] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[ 9] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 8] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 7] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 6] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 7:
|
|
w[63] = __byte_perm_S (w[56], w[55], selector);
|
|
w[62] = __byte_perm_S (w[55], w[54], selector);
|
|
w[61] = __byte_perm_S (w[54], w[53], selector);
|
|
w[60] = __byte_perm_S (w[53], w[52], selector);
|
|
w[59] = __byte_perm_S (w[52], w[51], selector);
|
|
w[58] = __byte_perm_S (w[51], w[50], selector);
|
|
w[57] = __byte_perm_S (w[50], w[49], selector);
|
|
w[56] = __byte_perm_S (w[49], w[48], selector);
|
|
w[55] = __byte_perm_S (w[48], w[47], selector);
|
|
w[54] = __byte_perm_S (w[47], w[46], selector);
|
|
w[53] = __byte_perm_S (w[46], w[45], selector);
|
|
w[52] = __byte_perm_S (w[45], w[44], selector);
|
|
w[51] = __byte_perm_S (w[44], w[43], selector);
|
|
w[50] = __byte_perm_S (w[43], w[42], selector);
|
|
w[49] = __byte_perm_S (w[42], w[41], selector);
|
|
w[48] = __byte_perm_S (w[41], w[40], selector);
|
|
w[47] = __byte_perm_S (w[40], w[39], selector);
|
|
w[46] = __byte_perm_S (w[39], w[38], selector);
|
|
w[45] = __byte_perm_S (w[38], w[37], selector);
|
|
w[44] = __byte_perm_S (w[37], w[36], selector);
|
|
w[43] = __byte_perm_S (w[36], w[35], selector);
|
|
w[42] = __byte_perm_S (w[35], w[34], selector);
|
|
w[41] = __byte_perm_S (w[34], w[33], selector);
|
|
w[40] = __byte_perm_S (w[33], w[32], selector);
|
|
w[39] = __byte_perm_S (w[32], w[31], selector);
|
|
w[38] = __byte_perm_S (w[31], w[30], selector);
|
|
w[37] = __byte_perm_S (w[30], w[29], selector);
|
|
w[36] = __byte_perm_S (w[29], w[28], selector);
|
|
w[35] = __byte_perm_S (w[28], w[27], selector);
|
|
w[34] = __byte_perm_S (w[27], w[26], selector);
|
|
w[33] = __byte_perm_S (w[26], w[25], selector);
|
|
w[32] = __byte_perm_S (w[25], w[24], selector);
|
|
w[31] = __byte_perm_S (w[24], w[23], selector);
|
|
w[30] = __byte_perm_S (w[23], w[22], selector);
|
|
w[29] = __byte_perm_S (w[22], w[21], selector);
|
|
w[28] = __byte_perm_S (w[21], w[20], selector);
|
|
w[27] = __byte_perm_S (w[20], w[19], selector);
|
|
w[26] = __byte_perm_S (w[19], w[18], selector);
|
|
w[25] = __byte_perm_S (w[18], w[17], selector);
|
|
w[24] = __byte_perm_S (w[17], w[16], selector);
|
|
w[23] = __byte_perm_S (w[16], w[15], selector);
|
|
w[22] = __byte_perm_S (w[15], w[14], selector);
|
|
w[21] = __byte_perm_S (w[14], w[13], selector);
|
|
w[20] = __byte_perm_S (w[13], w[12], selector);
|
|
w[19] = __byte_perm_S (w[12], w[11], selector);
|
|
w[18] = __byte_perm_S (w[11], w[10], selector);
|
|
w[17] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[16] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[15] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[14] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[13] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[12] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[11] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[10] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[ 9] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 8] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 7] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 8:
|
|
w[63] = __byte_perm_S (w[55], w[54], selector);
|
|
w[62] = __byte_perm_S (w[54], w[53], selector);
|
|
w[61] = __byte_perm_S (w[53], w[52], selector);
|
|
w[60] = __byte_perm_S (w[52], w[51], selector);
|
|
w[59] = __byte_perm_S (w[51], w[50], selector);
|
|
w[58] = __byte_perm_S (w[50], w[49], selector);
|
|
w[57] = __byte_perm_S (w[49], w[48], selector);
|
|
w[56] = __byte_perm_S (w[48], w[47], selector);
|
|
w[55] = __byte_perm_S (w[47], w[46], selector);
|
|
w[54] = __byte_perm_S (w[46], w[45], selector);
|
|
w[53] = __byte_perm_S (w[45], w[44], selector);
|
|
w[52] = __byte_perm_S (w[44], w[43], selector);
|
|
w[51] = __byte_perm_S (w[43], w[42], selector);
|
|
w[50] = __byte_perm_S (w[42], w[41], selector);
|
|
w[49] = __byte_perm_S (w[41], w[40], selector);
|
|
w[48] = __byte_perm_S (w[40], w[39], selector);
|
|
w[47] = __byte_perm_S (w[39], w[38], selector);
|
|
w[46] = __byte_perm_S (w[38], w[37], selector);
|
|
w[45] = __byte_perm_S (w[37], w[36], selector);
|
|
w[44] = __byte_perm_S (w[36], w[35], selector);
|
|
w[43] = __byte_perm_S (w[35], w[34], selector);
|
|
w[42] = __byte_perm_S (w[34], w[33], selector);
|
|
w[41] = __byte_perm_S (w[33], w[32], selector);
|
|
w[40] = __byte_perm_S (w[32], w[31], selector);
|
|
w[39] = __byte_perm_S (w[31], w[30], selector);
|
|
w[38] = __byte_perm_S (w[30], w[29], selector);
|
|
w[37] = __byte_perm_S (w[29], w[28], selector);
|
|
w[36] = __byte_perm_S (w[28], w[27], selector);
|
|
w[35] = __byte_perm_S (w[27], w[26], selector);
|
|
w[34] = __byte_perm_S (w[26], w[25], selector);
|
|
w[33] = __byte_perm_S (w[25], w[24], selector);
|
|
w[32] = __byte_perm_S (w[24], w[23], selector);
|
|
w[31] = __byte_perm_S (w[23], w[22], selector);
|
|
w[30] = __byte_perm_S (w[22], w[21], selector);
|
|
w[29] = __byte_perm_S (w[21], w[20], selector);
|
|
w[28] = __byte_perm_S (w[20], w[19], selector);
|
|
w[27] = __byte_perm_S (w[19], w[18], selector);
|
|
w[26] = __byte_perm_S (w[18], w[17], selector);
|
|
w[25] = __byte_perm_S (w[17], w[16], selector);
|
|
w[24] = __byte_perm_S (w[16], w[15], selector);
|
|
w[23] = __byte_perm_S (w[15], w[14], selector);
|
|
w[22] = __byte_perm_S (w[14], w[13], selector);
|
|
w[21] = __byte_perm_S (w[13], w[12], selector);
|
|
w[20] = __byte_perm_S (w[12], w[11], selector);
|
|
w[19] = __byte_perm_S (w[11], w[10], selector);
|
|
w[18] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[17] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[16] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[15] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[14] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[13] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[12] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[11] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[10] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[ 9] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 8] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 9:
|
|
w[63] = __byte_perm_S (w[54], w[53], selector);
|
|
w[62] = __byte_perm_S (w[53], w[52], selector);
|
|
w[61] = __byte_perm_S (w[52], w[51], selector);
|
|
w[60] = __byte_perm_S (w[51], w[50], selector);
|
|
w[59] = __byte_perm_S (w[50], w[49], selector);
|
|
w[58] = __byte_perm_S (w[49], w[48], selector);
|
|
w[57] = __byte_perm_S (w[48], w[47], selector);
|
|
w[56] = __byte_perm_S (w[47], w[46], selector);
|
|
w[55] = __byte_perm_S (w[46], w[45], selector);
|
|
w[54] = __byte_perm_S (w[45], w[44], selector);
|
|
w[53] = __byte_perm_S (w[44], w[43], selector);
|
|
w[52] = __byte_perm_S (w[43], w[42], selector);
|
|
w[51] = __byte_perm_S (w[42], w[41], selector);
|
|
w[50] = __byte_perm_S (w[41], w[40], selector);
|
|
w[49] = __byte_perm_S (w[40], w[39], selector);
|
|
w[48] = __byte_perm_S (w[39], w[38], selector);
|
|
w[47] = __byte_perm_S (w[38], w[37], selector);
|
|
w[46] = __byte_perm_S (w[37], w[36], selector);
|
|
w[45] = __byte_perm_S (w[36], w[35], selector);
|
|
w[44] = __byte_perm_S (w[35], w[34], selector);
|
|
w[43] = __byte_perm_S (w[34], w[33], selector);
|
|
w[42] = __byte_perm_S (w[33], w[32], selector);
|
|
w[41] = __byte_perm_S (w[32], w[31], selector);
|
|
w[40] = __byte_perm_S (w[31], w[30], selector);
|
|
w[39] = __byte_perm_S (w[30], w[29], selector);
|
|
w[38] = __byte_perm_S (w[29], w[28], selector);
|
|
w[37] = __byte_perm_S (w[28], w[27], selector);
|
|
w[36] = __byte_perm_S (w[27], w[26], selector);
|
|
w[35] = __byte_perm_S (w[26], w[25], selector);
|
|
w[34] = __byte_perm_S (w[25], w[24], selector);
|
|
w[33] = __byte_perm_S (w[24], w[23], selector);
|
|
w[32] = __byte_perm_S (w[23], w[22], selector);
|
|
w[31] = __byte_perm_S (w[22], w[21], selector);
|
|
w[30] = __byte_perm_S (w[21], w[20], selector);
|
|
w[29] = __byte_perm_S (w[20], w[19], selector);
|
|
w[28] = __byte_perm_S (w[19], w[18], selector);
|
|
w[27] = __byte_perm_S (w[18], w[17], selector);
|
|
w[26] = __byte_perm_S (w[17], w[16], selector);
|
|
w[25] = __byte_perm_S (w[16], w[15], selector);
|
|
w[24] = __byte_perm_S (w[15], w[14], selector);
|
|
w[23] = __byte_perm_S (w[14], w[13], selector);
|
|
w[22] = __byte_perm_S (w[13], w[12], selector);
|
|
w[21] = __byte_perm_S (w[12], w[11], selector);
|
|
w[20] = __byte_perm_S (w[11], w[10], selector);
|
|
w[19] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[18] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[17] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[16] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[15] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[14] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[13] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[12] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[11] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[10] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[ 9] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 10:
|
|
w[63] = __byte_perm_S (w[53], w[52], selector);
|
|
w[62] = __byte_perm_S (w[52], w[51], selector);
|
|
w[61] = __byte_perm_S (w[51], w[50], selector);
|
|
w[60] = __byte_perm_S (w[50], w[49], selector);
|
|
w[59] = __byte_perm_S (w[49], w[48], selector);
|
|
w[58] = __byte_perm_S (w[48], w[47], selector);
|
|
w[57] = __byte_perm_S (w[47], w[46], selector);
|
|
w[56] = __byte_perm_S (w[46], w[45], selector);
|
|
w[55] = __byte_perm_S (w[45], w[44], selector);
|
|
w[54] = __byte_perm_S (w[44], w[43], selector);
|
|
w[53] = __byte_perm_S (w[43], w[42], selector);
|
|
w[52] = __byte_perm_S (w[42], w[41], selector);
|
|
w[51] = __byte_perm_S (w[41], w[40], selector);
|
|
w[50] = __byte_perm_S (w[40], w[39], selector);
|
|
w[49] = __byte_perm_S (w[39], w[38], selector);
|
|
w[48] = __byte_perm_S (w[38], w[37], selector);
|
|
w[47] = __byte_perm_S (w[37], w[36], selector);
|
|
w[46] = __byte_perm_S (w[36], w[35], selector);
|
|
w[45] = __byte_perm_S (w[35], w[34], selector);
|
|
w[44] = __byte_perm_S (w[34], w[33], selector);
|
|
w[43] = __byte_perm_S (w[33], w[32], selector);
|
|
w[42] = __byte_perm_S (w[32], w[31], selector);
|
|
w[41] = __byte_perm_S (w[31], w[30], selector);
|
|
w[40] = __byte_perm_S (w[30], w[29], selector);
|
|
w[39] = __byte_perm_S (w[29], w[28], selector);
|
|
w[38] = __byte_perm_S (w[28], w[27], selector);
|
|
w[37] = __byte_perm_S (w[27], w[26], selector);
|
|
w[36] = __byte_perm_S (w[26], w[25], selector);
|
|
w[35] = __byte_perm_S (w[25], w[24], selector);
|
|
w[34] = __byte_perm_S (w[24], w[23], selector);
|
|
w[33] = __byte_perm_S (w[23], w[22], selector);
|
|
w[32] = __byte_perm_S (w[22], w[21], selector);
|
|
w[31] = __byte_perm_S (w[21], w[20], selector);
|
|
w[30] = __byte_perm_S (w[20], w[19], selector);
|
|
w[29] = __byte_perm_S (w[19], w[18], selector);
|
|
w[28] = __byte_perm_S (w[18], w[17], selector);
|
|
w[27] = __byte_perm_S (w[17], w[16], selector);
|
|
w[26] = __byte_perm_S (w[16], w[15], selector);
|
|
w[25] = __byte_perm_S (w[15], w[14], selector);
|
|
w[24] = __byte_perm_S (w[14], w[13], selector);
|
|
w[23] = __byte_perm_S (w[13], w[12], selector);
|
|
w[22] = __byte_perm_S (w[12], w[11], selector);
|
|
w[21] = __byte_perm_S (w[11], w[10], selector);
|
|
w[20] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[19] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[18] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[17] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[16] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[15] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[14] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[13] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[12] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[11] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[10] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 11:
|
|
w[63] = __byte_perm_S (w[52], w[51], selector);
|
|
w[62] = __byte_perm_S (w[51], w[50], selector);
|
|
w[61] = __byte_perm_S (w[50], w[49], selector);
|
|
w[60] = __byte_perm_S (w[49], w[48], selector);
|
|
w[59] = __byte_perm_S (w[48], w[47], selector);
|
|
w[58] = __byte_perm_S (w[47], w[46], selector);
|
|
w[57] = __byte_perm_S (w[46], w[45], selector);
|
|
w[56] = __byte_perm_S (w[45], w[44], selector);
|
|
w[55] = __byte_perm_S (w[44], w[43], selector);
|
|
w[54] = __byte_perm_S (w[43], w[42], selector);
|
|
w[53] = __byte_perm_S (w[42], w[41], selector);
|
|
w[52] = __byte_perm_S (w[41], w[40], selector);
|
|
w[51] = __byte_perm_S (w[40], w[39], selector);
|
|
w[50] = __byte_perm_S (w[39], w[38], selector);
|
|
w[49] = __byte_perm_S (w[38], w[37], selector);
|
|
w[48] = __byte_perm_S (w[37], w[36], selector);
|
|
w[47] = __byte_perm_S (w[36], w[35], selector);
|
|
w[46] = __byte_perm_S (w[35], w[34], selector);
|
|
w[45] = __byte_perm_S (w[34], w[33], selector);
|
|
w[44] = __byte_perm_S (w[33], w[32], selector);
|
|
w[43] = __byte_perm_S (w[32], w[31], selector);
|
|
w[42] = __byte_perm_S (w[31], w[30], selector);
|
|
w[41] = __byte_perm_S (w[30], w[29], selector);
|
|
w[40] = __byte_perm_S (w[29], w[28], selector);
|
|
w[39] = __byte_perm_S (w[28], w[27], selector);
|
|
w[38] = __byte_perm_S (w[27], w[26], selector);
|
|
w[37] = __byte_perm_S (w[26], w[25], selector);
|
|
w[36] = __byte_perm_S (w[25], w[24], selector);
|
|
w[35] = __byte_perm_S (w[24], w[23], selector);
|
|
w[34] = __byte_perm_S (w[23], w[22], selector);
|
|
w[33] = __byte_perm_S (w[22], w[21], selector);
|
|
w[32] = __byte_perm_S (w[21], w[20], selector);
|
|
w[31] = __byte_perm_S (w[20], w[19], selector);
|
|
w[30] = __byte_perm_S (w[19], w[18], selector);
|
|
w[29] = __byte_perm_S (w[18], w[17], selector);
|
|
w[28] = __byte_perm_S (w[17], w[16], selector);
|
|
w[27] = __byte_perm_S (w[16], w[15], selector);
|
|
w[26] = __byte_perm_S (w[15], w[14], selector);
|
|
w[25] = __byte_perm_S (w[14], w[13], selector);
|
|
w[24] = __byte_perm_S (w[13], w[12], selector);
|
|
w[23] = __byte_perm_S (w[12], w[11], selector);
|
|
w[22] = __byte_perm_S (w[11], w[10], selector);
|
|
w[21] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[20] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[19] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[18] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[17] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[16] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[15] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[14] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[13] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[12] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[11] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 12:
|
|
w[63] = __byte_perm_S (w[51], w[50], selector);
|
|
w[62] = __byte_perm_S (w[50], w[49], selector);
|
|
w[61] = __byte_perm_S (w[49], w[48], selector);
|
|
w[60] = __byte_perm_S (w[48], w[47], selector);
|
|
w[59] = __byte_perm_S (w[47], w[46], selector);
|
|
w[58] = __byte_perm_S (w[46], w[45], selector);
|
|
w[57] = __byte_perm_S (w[45], w[44], selector);
|
|
w[56] = __byte_perm_S (w[44], w[43], selector);
|
|
w[55] = __byte_perm_S (w[43], w[42], selector);
|
|
w[54] = __byte_perm_S (w[42], w[41], selector);
|
|
w[53] = __byte_perm_S (w[41], w[40], selector);
|
|
w[52] = __byte_perm_S (w[40], w[39], selector);
|
|
w[51] = __byte_perm_S (w[39], w[38], selector);
|
|
w[50] = __byte_perm_S (w[38], w[37], selector);
|
|
w[49] = __byte_perm_S (w[37], w[36], selector);
|
|
w[48] = __byte_perm_S (w[36], w[35], selector);
|
|
w[47] = __byte_perm_S (w[35], w[34], selector);
|
|
w[46] = __byte_perm_S (w[34], w[33], selector);
|
|
w[45] = __byte_perm_S (w[33], w[32], selector);
|
|
w[44] = __byte_perm_S (w[32], w[31], selector);
|
|
w[43] = __byte_perm_S (w[31], w[30], selector);
|
|
w[42] = __byte_perm_S (w[30], w[29], selector);
|
|
w[41] = __byte_perm_S (w[29], w[28], selector);
|
|
w[40] = __byte_perm_S (w[28], w[27], selector);
|
|
w[39] = __byte_perm_S (w[27], w[26], selector);
|
|
w[38] = __byte_perm_S (w[26], w[25], selector);
|
|
w[37] = __byte_perm_S (w[25], w[24], selector);
|
|
w[36] = __byte_perm_S (w[24], w[23], selector);
|
|
w[35] = __byte_perm_S (w[23], w[22], selector);
|
|
w[34] = __byte_perm_S (w[22], w[21], selector);
|
|
w[33] = __byte_perm_S (w[21], w[20], selector);
|
|
w[32] = __byte_perm_S (w[20], w[19], selector);
|
|
w[31] = __byte_perm_S (w[19], w[18], selector);
|
|
w[30] = __byte_perm_S (w[18], w[17], selector);
|
|
w[29] = __byte_perm_S (w[17], w[16], selector);
|
|
w[28] = __byte_perm_S (w[16], w[15], selector);
|
|
w[27] = __byte_perm_S (w[15], w[14], selector);
|
|
w[26] = __byte_perm_S (w[14], w[13], selector);
|
|
w[25] = __byte_perm_S (w[13], w[12], selector);
|
|
w[24] = __byte_perm_S (w[12], w[11], selector);
|
|
w[23] = __byte_perm_S (w[11], w[10], selector);
|
|
w[22] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[21] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[20] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[19] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[18] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[17] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[16] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[15] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[14] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[13] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[12] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 13:
|
|
w[63] = __byte_perm_S (w[50], w[49], selector);
|
|
w[62] = __byte_perm_S (w[49], w[48], selector);
|
|
w[61] = __byte_perm_S (w[48], w[47], selector);
|
|
w[60] = __byte_perm_S (w[47], w[46], selector);
|
|
w[59] = __byte_perm_S (w[46], w[45], selector);
|
|
w[58] = __byte_perm_S (w[45], w[44], selector);
|
|
w[57] = __byte_perm_S (w[44], w[43], selector);
|
|
w[56] = __byte_perm_S (w[43], w[42], selector);
|
|
w[55] = __byte_perm_S (w[42], w[41], selector);
|
|
w[54] = __byte_perm_S (w[41], w[40], selector);
|
|
w[53] = __byte_perm_S (w[40], w[39], selector);
|
|
w[52] = __byte_perm_S (w[39], w[38], selector);
|
|
w[51] = __byte_perm_S (w[38], w[37], selector);
|
|
w[50] = __byte_perm_S (w[37], w[36], selector);
|
|
w[49] = __byte_perm_S (w[36], w[35], selector);
|
|
w[48] = __byte_perm_S (w[35], w[34], selector);
|
|
w[47] = __byte_perm_S (w[34], w[33], selector);
|
|
w[46] = __byte_perm_S (w[33], w[32], selector);
|
|
w[45] = __byte_perm_S (w[32], w[31], selector);
|
|
w[44] = __byte_perm_S (w[31], w[30], selector);
|
|
w[43] = __byte_perm_S (w[30], w[29], selector);
|
|
w[42] = __byte_perm_S (w[29], w[28], selector);
|
|
w[41] = __byte_perm_S (w[28], w[27], selector);
|
|
w[40] = __byte_perm_S (w[27], w[26], selector);
|
|
w[39] = __byte_perm_S (w[26], w[25], selector);
|
|
w[38] = __byte_perm_S (w[25], w[24], selector);
|
|
w[37] = __byte_perm_S (w[24], w[23], selector);
|
|
w[36] = __byte_perm_S (w[23], w[22], selector);
|
|
w[35] = __byte_perm_S (w[22], w[21], selector);
|
|
w[34] = __byte_perm_S (w[21], w[20], selector);
|
|
w[33] = __byte_perm_S (w[20], w[19], selector);
|
|
w[32] = __byte_perm_S (w[19], w[18], selector);
|
|
w[31] = __byte_perm_S (w[18], w[17], selector);
|
|
w[30] = __byte_perm_S (w[17], w[16], selector);
|
|
w[29] = __byte_perm_S (w[16], w[15], selector);
|
|
w[28] = __byte_perm_S (w[15], w[14], selector);
|
|
w[27] = __byte_perm_S (w[14], w[13], selector);
|
|
w[26] = __byte_perm_S (w[13], w[12], selector);
|
|
w[25] = __byte_perm_S (w[12], w[11], selector);
|
|
w[24] = __byte_perm_S (w[11], w[10], selector);
|
|
w[23] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[22] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[21] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[20] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[19] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[18] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[17] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[16] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[15] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[14] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[13] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 14:
|
|
w[63] = __byte_perm_S (w[49], w[48], selector);
|
|
w[62] = __byte_perm_S (w[48], w[47], selector);
|
|
w[61] = __byte_perm_S (w[47], w[46], selector);
|
|
w[60] = __byte_perm_S (w[46], w[45], selector);
|
|
w[59] = __byte_perm_S (w[45], w[44], selector);
|
|
w[58] = __byte_perm_S (w[44], w[43], selector);
|
|
w[57] = __byte_perm_S (w[43], w[42], selector);
|
|
w[56] = __byte_perm_S (w[42], w[41], selector);
|
|
w[55] = __byte_perm_S (w[41], w[40], selector);
|
|
w[54] = __byte_perm_S (w[40], w[39], selector);
|
|
w[53] = __byte_perm_S (w[39], w[38], selector);
|
|
w[52] = __byte_perm_S (w[38], w[37], selector);
|
|
w[51] = __byte_perm_S (w[37], w[36], selector);
|
|
w[50] = __byte_perm_S (w[36], w[35], selector);
|
|
w[49] = __byte_perm_S (w[35], w[34], selector);
|
|
w[48] = __byte_perm_S (w[34], w[33], selector);
|
|
w[47] = __byte_perm_S (w[33], w[32], selector);
|
|
w[46] = __byte_perm_S (w[32], w[31], selector);
|
|
w[45] = __byte_perm_S (w[31], w[30], selector);
|
|
w[44] = __byte_perm_S (w[30], w[29], selector);
|
|
w[43] = __byte_perm_S (w[29], w[28], selector);
|
|
w[42] = __byte_perm_S (w[28], w[27], selector);
|
|
w[41] = __byte_perm_S (w[27], w[26], selector);
|
|
w[40] = __byte_perm_S (w[26], w[25], selector);
|
|
w[39] = __byte_perm_S (w[25], w[24], selector);
|
|
w[38] = __byte_perm_S (w[24], w[23], selector);
|
|
w[37] = __byte_perm_S (w[23], w[22], selector);
|
|
w[36] = __byte_perm_S (w[22], w[21], selector);
|
|
w[35] = __byte_perm_S (w[21], w[20], selector);
|
|
w[34] = __byte_perm_S (w[20], w[19], selector);
|
|
w[33] = __byte_perm_S (w[19], w[18], selector);
|
|
w[32] = __byte_perm_S (w[18], w[17], selector);
|
|
w[31] = __byte_perm_S (w[17], w[16], selector);
|
|
w[30] = __byte_perm_S (w[16], w[15], selector);
|
|
w[29] = __byte_perm_S (w[15], w[14], selector);
|
|
w[28] = __byte_perm_S (w[14], w[13], selector);
|
|
w[27] = __byte_perm_S (w[13], w[12], selector);
|
|
w[26] = __byte_perm_S (w[12], w[11], selector);
|
|
w[25] = __byte_perm_S (w[11], w[10], selector);
|
|
w[24] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[23] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[22] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[21] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[20] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[19] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[18] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[17] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[16] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[15] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[14] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 15:
|
|
w[63] = __byte_perm_S (w[48], w[47], selector);
|
|
w[62] = __byte_perm_S (w[47], w[46], selector);
|
|
w[61] = __byte_perm_S (w[46], w[45], selector);
|
|
w[60] = __byte_perm_S (w[45], w[44], selector);
|
|
w[59] = __byte_perm_S (w[44], w[43], selector);
|
|
w[58] = __byte_perm_S (w[43], w[42], selector);
|
|
w[57] = __byte_perm_S (w[42], w[41], selector);
|
|
w[56] = __byte_perm_S (w[41], w[40], selector);
|
|
w[55] = __byte_perm_S (w[40], w[39], selector);
|
|
w[54] = __byte_perm_S (w[39], w[38], selector);
|
|
w[53] = __byte_perm_S (w[38], w[37], selector);
|
|
w[52] = __byte_perm_S (w[37], w[36], selector);
|
|
w[51] = __byte_perm_S (w[36], w[35], selector);
|
|
w[50] = __byte_perm_S (w[35], w[34], selector);
|
|
w[49] = __byte_perm_S (w[34], w[33], selector);
|
|
w[48] = __byte_perm_S (w[33], w[32], selector);
|
|
w[47] = __byte_perm_S (w[32], w[31], selector);
|
|
w[46] = __byte_perm_S (w[31], w[30], selector);
|
|
w[45] = __byte_perm_S (w[30], w[29], selector);
|
|
w[44] = __byte_perm_S (w[29], w[28], selector);
|
|
w[43] = __byte_perm_S (w[28], w[27], selector);
|
|
w[42] = __byte_perm_S (w[27], w[26], selector);
|
|
w[41] = __byte_perm_S (w[26], w[25], selector);
|
|
w[40] = __byte_perm_S (w[25], w[24], selector);
|
|
w[39] = __byte_perm_S (w[24], w[23], selector);
|
|
w[38] = __byte_perm_S (w[23], w[22], selector);
|
|
w[37] = __byte_perm_S (w[22], w[21], selector);
|
|
w[36] = __byte_perm_S (w[21], w[20], selector);
|
|
w[35] = __byte_perm_S (w[20], w[19], selector);
|
|
w[34] = __byte_perm_S (w[19], w[18], selector);
|
|
w[33] = __byte_perm_S (w[18], w[17], selector);
|
|
w[32] = __byte_perm_S (w[17], w[16], selector);
|
|
w[31] = __byte_perm_S (w[16], w[15], selector);
|
|
w[30] = __byte_perm_S (w[15], w[14], selector);
|
|
w[29] = __byte_perm_S (w[14], w[13], selector);
|
|
w[28] = __byte_perm_S (w[13], w[12], selector);
|
|
w[27] = __byte_perm_S (w[12], w[11], selector);
|
|
w[26] = __byte_perm_S (w[11], w[10], selector);
|
|
w[25] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[24] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[23] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[22] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[21] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[20] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[19] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[18] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[17] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[16] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[15] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 16:
|
|
w[63] = __byte_perm_S (w[47], w[46], selector);
|
|
w[62] = __byte_perm_S (w[46], w[45], selector);
|
|
w[61] = __byte_perm_S (w[45], w[44], selector);
|
|
w[60] = __byte_perm_S (w[44], w[43], selector);
|
|
w[59] = __byte_perm_S (w[43], w[42], selector);
|
|
w[58] = __byte_perm_S (w[42], w[41], selector);
|
|
w[57] = __byte_perm_S (w[41], w[40], selector);
|
|
w[56] = __byte_perm_S (w[40], w[39], selector);
|
|
w[55] = __byte_perm_S (w[39], w[38], selector);
|
|
w[54] = __byte_perm_S (w[38], w[37], selector);
|
|
w[53] = __byte_perm_S (w[37], w[36], selector);
|
|
w[52] = __byte_perm_S (w[36], w[35], selector);
|
|
w[51] = __byte_perm_S (w[35], w[34], selector);
|
|
w[50] = __byte_perm_S (w[34], w[33], selector);
|
|
w[49] = __byte_perm_S (w[33], w[32], selector);
|
|
w[48] = __byte_perm_S (w[32], w[31], selector);
|
|
w[47] = __byte_perm_S (w[31], w[30], selector);
|
|
w[46] = __byte_perm_S (w[30], w[29], selector);
|
|
w[45] = __byte_perm_S (w[29], w[28], selector);
|
|
w[44] = __byte_perm_S (w[28], w[27], selector);
|
|
w[43] = __byte_perm_S (w[27], w[26], selector);
|
|
w[42] = __byte_perm_S (w[26], w[25], selector);
|
|
w[41] = __byte_perm_S (w[25], w[24], selector);
|
|
w[40] = __byte_perm_S (w[24], w[23], selector);
|
|
w[39] = __byte_perm_S (w[23], w[22], selector);
|
|
w[38] = __byte_perm_S (w[22], w[21], selector);
|
|
w[37] = __byte_perm_S (w[21], w[20], selector);
|
|
w[36] = __byte_perm_S (w[20], w[19], selector);
|
|
w[35] = __byte_perm_S (w[19], w[18], selector);
|
|
w[34] = __byte_perm_S (w[18], w[17], selector);
|
|
w[33] = __byte_perm_S (w[17], w[16], selector);
|
|
w[32] = __byte_perm_S (w[16], w[15], selector);
|
|
w[31] = __byte_perm_S (w[15], w[14], selector);
|
|
w[30] = __byte_perm_S (w[14], w[13], selector);
|
|
w[29] = __byte_perm_S (w[13], w[12], selector);
|
|
w[28] = __byte_perm_S (w[12], w[11], selector);
|
|
w[27] = __byte_perm_S (w[11], w[10], selector);
|
|
w[26] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[25] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[24] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[23] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[22] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[21] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[20] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[19] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[18] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[17] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[16] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 17:
|
|
w[63] = __byte_perm_S (w[46], w[45], selector);
|
|
w[62] = __byte_perm_S (w[45], w[44], selector);
|
|
w[61] = __byte_perm_S (w[44], w[43], selector);
|
|
w[60] = __byte_perm_S (w[43], w[42], selector);
|
|
w[59] = __byte_perm_S (w[42], w[41], selector);
|
|
w[58] = __byte_perm_S (w[41], w[40], selector);
|
|
w[57] = __byte_perm_S (w[40], w[39], selector);
|
|
w[56] = __byte_perm_S (w[39], w[38], selector);
|
|
w[55] = __byte_perm_S (w[38], w[37], selector);
|
|
w[54] = __byte_perm_S (w[37], w[36], selector);
|
|
w[53] = __byte_perm_S (w[36], w[35], selector);
|
|
w[52] = __byte_perm_S (w[35], w[34], selector);
|
|
w[51] = __byte_perm_S (w[34], w[33], selector);
|
|
w[50] = __byte_perm_S (w[33], w[32], selector);
|
|
w[49] = __byte_perm_S (w[32], w[31], selector);
|
|
w[48] = __byte_perm_S (w[31], w[30], selector);
|
|
w[47] = __byte_perm_S (w[30], w[29], selector);
|
|
w[46] = __byte_perm_S (w[29], w[28], selector);
|
|
w[45] = __byte_perm_S (w[28], w[27], selector);
|
|
w[44] = __byte_perm_S (w[27], w[26], selector);
|
|
w[43] = __byte_perm_S (w[26], w[25], selector);
|
|
w[42] = __byte_perm_S (w[25], w[24], selector);
|
|
w[41] = __byte_perm_S (w[24], w[23], selector);
|
|
w[40] = __byte_perm_S (w[23], w[22], selector);
|
|
w[39] = __byte_perm_S (w[22], w[21], selector);
|
|
w[38] = __byte_perm_S (w[21], w[20], selector);
|
|
w[37] = __byte_perm_S (w[20], w[19], selector);
|
|
w[36] = __byte_perm_S (w[19], w[18], selector);
|
|
w[35] = __byte_perm_S (w[18], w[17], selector);
|
|
w[34] = __byte_perm_S (w[17], w[16], selector);
|
|
w[33] = __byte_perm_S (w[16], w[15], selector);
|
|
w[32] = __byte_perm_S (w[15], w[14], selector);
|
|
w[31] = __byte_perm_S (w[14], w[13], selector);
|
|
w[30] = __byte_perm_S (w[13], w[12], selector);
|
|
w[29] = __byte_perm_S (w[12], w[11], selector);
|
|
w[28] = __byte_perm_S (w[11], w[10], selector);
|
|
w[27] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[26] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[25] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[24] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[23] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[22] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[21] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[20] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[19] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[18] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[17] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 18:
|
|
w[63] = __byte_perm_S (w[45], w[44], selector);
|
|
w[62] = __byte_perm_S (w[44], w[43], selector);
|
|
w[61] = __byte_perm_S (w[43], w[42], selector);
|
|
w[60] = __byte_perm_S (w[42], w[41], selector);
|
|
w[59] = __byte_perm_S (w[41], w[40], selector);
|
|
w[58] = __byte_perm_S (w[40], w[39], selector);
|
|
w[57] = __byte_perm_S (w[39], w[38], selector);
|
|
w[56] = __byte_perm_S (w[38], w[37], selector);
|
|
w[55] = __byte_perm_S (w[37], w[36], selector);
|
|
w[54] = __byte_perm_S (w[36], w[35], selector);
|
|
w[53] = __byte_perm_S (w[35], w[34], selector);
|
|
w[52] = __byte_perm_S (w[34], w[33], selector);
|
|
w[51] = __byte_perm_S (w[33], w[32], selector);
|
|
w[50] = __byte_perm_S (w[32], w[31], selector);
|
|
w[49] = __byte_perm_S (w[31], w[30], selector);
|
|
w[48] = __byte_perm_S (w[30], w[29], selector);
|
|
w[47] = __byte_perm_S (w[29], w[28], selector);
|
|
w[46] = __byte_perm_S (w[28], w[27], selector);
|
|
w[45] = __byte_perm_S (w[27], w[26], selector);
|
|
w[44] = __byte_perm_S (w[26], w[25], selector);
|
|
w[43] = __byte_perm_S (w[25], w[24], selector);
|
|
w[42] = __byte_perm_S (w[24], w[23], selector);
|
|
w[41] = __byte_perm_S (w[23], w[22], selector);
|
|
w[40] = __byte_perm_S (w[22], w[21], selector);
|
|
w[39] = __byte_perm_S (w[21], w[20], selector);
|
|
w[38] = __byte_perm_S (w[20], w[19], selector);
|
|
w[37] = __byte_perm_S (w[19], w[18], selector);
|
|
w[36] = __byte_perm_S (w[18], w[17], selector);
|
|
w[35] = __byte_perm_S (w[17], w[16], selector);
|
|
w[34] = __byte_perm_S (w[16], w[15], selector);
|
|
w[33] = __byte_perm_S (w[15], w[14], selector);
|
|
w[32] = __byte_perm_S (w[14], w[13], selector);
|
|
w[31] = __byte_perm_S (w[13], w[12], selector);
|
|
w[30] = __byte_perm_S (w[12], w[11], selector);
|
|
w[29] = __byte_perm_S (w[11], w[10], selector);
|
|
w[28] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[27] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[26] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[25] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[24] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[23] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[22] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[21] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[20] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[19] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[18] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 19:
|
|
w[63] = __byte_perm_S (w[44], w[43], selector);
|
|
w[62] = __byte_perm_S (w[43], w[42], selector);
|
|
w[61] = __byte_perm_S (w[42], w[41], selector);
|
|
w[60] = __byte_perm_S (w[41], w[40], selector);
|
|
w[59] = __byte_perm_S (w[40], w[39], selector);
|
|
w[58] = __byte_perm_S (w[39], w[38], selector);
|
|
w[57] = __byte_perm_S (w[38], w[37], selector);
|
|
w[56] = __byte_perm_S (w[37], w[36], selector);
|
|
w[55] = __byte_perm_S (w[36], w[35], selector);
|
|
w[54] = __byte_perm_S (w[35], w[34], selector);
|
|
w[53] = __byte_perm_S (w[34], w[33], selector);
|
|
w[52] = __byte_perm_S (w[33], w[32], selector);
|
|
w[51] = __byte_perm_S (w[32], w[31], selector);
|
|
w[50] = __byte_perm_S (w[31], w[30], selector);
|
|
w[49] = __byte_perm_S (w[30], w[29], selector);
|
|
w[48] = __byte_perm_S (w[29], w[28], selector);
|
|
w[47] = __byte_perm_S (w[28], w[27], selector);
|
|
w[46] = __byte_perm_S (w[27], w[26], selector);
|
|
w[45] = __byte_perm_S (w[26], w[25], selector);
|
|
w[44] = __byte_perm_S (w[25], w[24], selector);
|
|
w[43] = __byte_perm_S (w[24], w[23], selector);
|
|
w[42] = __byte_perm_S (w[23], w[22], selector);
|
|
w[41] = __byte_perm_S (w[22], w[21], selector);
|
|
w[40] = __byte_perm_S (w[21], w[20], selector);
|
|
w[39] = __byte_perm_S (w[20], w[19], selector);
|
|
w[38] = __byte_perm_S (w[19], w[18], selector);
|
|
w[37] = __byte_perm_S (w[18], w[17], selector);
|
|
w[36] = __byte_perm_S (w[17], w[16], selector);
|
|
w[35] = __byte_perm_S (w[16], w[15], selector);
|
|
w[34] = __byte_perm_S (w[15], w[14], selector);
|
|
w[33] = __byte_perm_S (w[14], w[13], selector);
|
|
w[32] = __byte_perm_S (w[13], w[12], selector);
|
|
w[31] = __byte_perm_S (w[12], w[11], selector);
|
|
w[30] = __byte_perm_S (w[11], w[10], selector);
|
|
w[29] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[28] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[27] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[26] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[25] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[24] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[23] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[22] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[21] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[20] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[19] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 20:
|
|
w[63] = __byte_perm_S (w[43], w[42], selector);
|
|
w[62] = __byte_perm_S (w[42], w[41], selector);
|
|
w[61] = __byte_perm_S (w[41], w[40], selector);
|
|
w[60] = __byte_perm_S (w[40], w[39], selector);
|
|
w[59] = __byte_perm_S (w[39], w[38], selector);
|
|
w[58] = __byte_perm_S (w[38], w[37], selector);
|
|
w[57] = __byte_perm_S (w[37], w[36], selector);
|
|
w[56] = __byte_perm_S (w[36], w[35], selector);
|
|
w[55] = __byte_perm_S (w[35], w[34], selector);
|
|
w[54] = __byte_perm_S (w[34], w[33], selector);
|
|
w[53] = __byte_perm_S (w[33], w[32], selector);
|
|
w[52] = __byte_perm_S (w[32], w[31], selector);
|
|
w[51] = __byte_perm_S (w[31], w[30], selector);
|
|
w[50] = __byte_perm_S (w[30], w[29], selector);
|
|
w[49] = __byte_perm_S (w[29], w[28], selector);
|
|
w[48] = __byte_perm_S (w[28], w[27], selector);
|
|
w[47] = __byte_perm_S (w[27], w[26], selector);
|
|
w[46] = __byte_perm_S (w[26], w[25], selector);
|
|
w[45] = __byte_perm_S (w[25], w[24], selector);
|
|
w[44] = __byte_perm_S (w[24], w[23], selector);
|
|
w[43] = __byte_perm_S (w[23], w[22], selector);
|
|
w[42] = __byte_perm_S (w[22], w[21], selector);
|
|
w[41] = __byte_perm_S (w[21], w[20], selector);
|
|
w[40] = __byte_perm_S (w[20], w[19], selector);
|
|
w[39] = __byte_perm_S (w[19], w[18], selector);
|
|
w[38] = __byte_perm_S (w[18], w[17], selector);
|
|
w[37] = __byte_perm_S (w[17], w[16], selector);
|
|
w[36] = __byte_perm_S (w[16], w[15], selector);
|
|
w[35] = __byte_perm_S (w[15], w[14], selector);
|
|
w[34] = __byte_perm_S (w[14], w[13], selector);
|
|
w[33] = __byte_perm_S (w[13], w[12], selector);
|
|
w[32] = __byte_perm_S (w[12], w[11], selector);
|
|
w[31] = __byte_perm_S (w[11], w[10], selector);
|
|
w[30] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[29] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[28] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[27] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[26] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[25] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[24] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[23] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[22] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[21] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[20] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 21:
|
|
w[63] = __byte_perm_S (w[42], w[41], selector);
|
|
w[62] = __byte_perm_S (w[41], w[40], selector);
|
|
w[61] = __byte_perm_S (w[40], w[39], selector);
|
|
w[60] = __byte_perm_S (w[39], w[38], selector);
|
|
w[59] = __byte_perm_S (w[38], w[37], selector);
|
|
w[58] = __byte_perm_S (w[37], w[36], selector);
|
|
w[57] = __byte_perm_S (w[36], w[35], selector);
|
|
w[56] = __byte_perm_S (w[35], w[34], selector);
|
|
w[55] = __byte_perm_S (w[34], w[33], selector);
|
|
w[54] = __byte_perm_S (w[33], w[32], selector);
|
|
w[53] = __byte_perm_S (w[32], w[31], selector);
|
|
w[52] = __byte_perm_S (w[31], w[30], selector);
|
|
w[51] = __byte_perm_S (w[30], w[29], selector);
|
|
w[50] = __byte_perm_S (w[29], w[28], selector);
|
|
w[49] = __byte_perm_S (w[28], w[27], selector);
|
|
w[48] = __byte_perm_S (w[27], w[26], selector);
|
|
w[47] = __byte_perm_S (w[26], w[25], selector);
|
|
w[46] = __byte_perm_S (w[25], w[24], selector);
|
|
w[45] = __byte_perm_S (w[24], w[23], selector);
|
|
w[44] = __byte_perm_S (w[23], w[22], selector);
|
|
w[43] = __byte_perm_S (w[22], w[21], selector);
|
|
w[42] = __byte_perm_S (w[21], w[20], selector);
|
|
w[41] = __byte_perm_S (w[20], w[19], selector);
|
|
w[40] = __byte_perm_S (w[19], w[18], selector);
|
|
w[39] = __byte_perm_S (w[18], w[17], selector);
|
|
w[38] = __byte_perm_S (w[17], w[16], selector);
|
|
w[37] = __byte_perm_S (w[16], w[15], selector);
|
|
w[36] = __byte_perm_S (w[15], w[14], selector);
|
|
w[35] = __byte_perm_S (w[14], w[13], selector);
|
|
w[34] = __byte_perm_S (w[13], w[12], selector);
|
|
w[33] = __byte_perm_S (w[12], w[11], selector);
|
|
w[32] = __byte_perm_S (w[11], w[10], selector);
|
|
w[31] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[30] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[29] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[28] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[27] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[26] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[25] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[24] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[23] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[22] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[21] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 22:
|
|
w[63] = __byte_perm_S (w[41], w[40], selector);
|
|
w[62] = __byte_perm_S (w[40], w[39], selector);
|
|
w[61] = __byte_perm_S (w[39], w[38], selector);
|
|
w[60] = __byte_perm_S (w[38], w[37], selector);
|
|
w[59] = __byte_perm_S (w[37], w[36], selector);
|
|
w[58] = __byte_perm_S (w[36], w[35], selector);
|
|
w[57] = __byte_perm_S (w[35], w[34], selector);
|
|
w[56] = __byte_perm_S (w[34], w[33], selector);
|
|
w[55] = __byte_perm_S (w[33], w[32], selector);
|
|
w[54] = __byte_perm_S (w[32], w[31], selector);
|
|
w[53] = __byte_perm_S (w[31], w[30], selector);
|
|
w[52] = __byte_perm_S (w[30], w[29], selector);
|
|
w[51] = __byte_perm_S (w[29], w[28], selector);
|
|
w[50] = __byte_perm_S (w[28], w[27], selector);
|
|
w[49] = __byte_perm_S (w[27], w[26], selector);
|
|
w[48] = __byte_perm_S (w[26], w[25], selector);
|
|
w[47] = __byte_perm_S (w[25], w[24], selector);
|
|
w[46] = __byte_perm_S (w[24], w[23], selector);
|
|
w[45] = __byte_perm_S (w[23], w[22], selector);
|
|
w[44] = __byte_perm_S (w[22], w[21], selector);
|
|
w[43] = __byte_perm_S (w[21], w[20], selector);
|
|
w[42] = __byte_perm_S (w[20], w[19], selector);
|
|
w[41] = __byte_perm_S (w[19], w[18], selector);
|
|
w[40] = __byte_perm_S (w[18], w[17], selector);
|
|
w[39] = __byte_perm_S (w[17], w[16], selector);
|
|
w[38] = __byte_perm_S (w[16], w[15], selector);
|
|
w[37] = __byte_perm_S (w[15], w[14], selector);
|
|
w[36] = __byte_perm_S (w[14], w[13], selector);
|
|
w[35] = __byte_perm_S (w[13], w[12], selector);
|
|
w[34] = __byte_perm_S (w[12], w[11], selector);
|
|
w[33] = __byte_perm_S (w[11], w[10], selector);
|
|
w[32] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[31] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[30] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[29] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[28] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[27] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[26] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[25] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[24] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[23] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[22] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 23:
|
|
w[63] = __byte_perm_S (w[40], w[39], selector);
|
|
w[62] = __byte_perm_S (w[39], w[38], selector);
|
|
w[61] = __byte_perm_S (w[38], w[37], selector);
|
|
w[60] = __byte_perm_S (w[37], w[36], selector);
|
|
w[59] = __byte_perm_S (w[36], w[35], selector);
|
|
w[58] = __byte_perm_S (w[35], w[34], selector);
|
|
w[57] = __byte_perm_S (w[34], w[33], selector);
|
|
w[56] = __byte_perm_S (w[33], w[32], selector);
|
|
w[55] = __byte_perm_S (w[32], w[31], selector);
|
|
w[54] = __byte_perm_S (w[31], w[30], selector);
|
|
w[53] = __byte_perm_S (w[30], w[29], selector);
|
|
w[52] = __byte_perm_S (w[29], w[28], selector);
|
|
w[51] = __byte_perm_S (w[28], w[27], selector);
|
|
w[50] = __byte_perm_S (w[27], w[26], selector);
|
|
w[49] = __byte_perm_S (w[26], w[25], selector);
|
|
w[48] = __byte_perm_S (w[25], w[24], selector);
|
|
w[47] = __byte_perm_S (w[24], w[23], selector);
|
|
w[46] = __byte_perm_S (w[23], w[22], selector);
|
|
w[45] = __byte_perm_S (w[22], w[21], selector);
|
|
w[44] = __byte_perm_S (w[21], w[20], selector);
|
|
w[43] = __byte_perm_S (w[20], w[19], selector);
|
|
w[42] = __byte_perm_S (w[19], w[18], selector);
|
|
w[41] = __byte_perm_S (w[18], w[17], selector);
|
|
w[40] = __byte_perm_S (w[17], w[16], selector);
|
|
w[39] = __byte_perm_S (w[16], w[15], selector);
|
|
w[38] = __byte_perm_S (w[15], w[14], selector);
|
|
w[37] = __byte_perm_S (w[14], w[13], selector);
|
|
w[36] = __byte_perm_S (w[13], w[12], selector);
|
|
w[35] = __byte_perm_S (w[12], w[11], selector);
|
|
w[34] = __byte_perm_S (w[11], w[10], selector);
|
|
w[33] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[32] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[31] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[30] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[29] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[28] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[27] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[26] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[25] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[24] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[23] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 24:
|
|
w[63] = __byte_perm_S (w[39], w[38], selector);
|
|
w[62] = __byte_perm_S (w[38], w[37], selector);
|
|
w[61] = __byte_perm_S (w[37], w[36], selector);
|
|
w[60] = __byte_perm_S (w[36], w[35], selector);
|
|
w[59] = __byte_perm_S (w[35], w[34], selector);
|
|
w[58] = __byte_perm_S (w[34], w[33], selector);
|
|
w[57] = __byte_perm_S (w[33], w[32], selector);
|
|
w[56] = __byte_perm_S (w[32], w[31], selector);
|
|
w[55] = __byte_perm_S (w[31], w[30], selector);
|
|
w[54] = __byte_perm_S (w[30], w[29], selector);
|
|
w[53] = __byte_perm_S (w[29], w[28], selector);
|
|
w[52] = __byte_perm_S (w[28], w[27], selector);
|
|
w[51] = __byte_perm_S (w[27], w[26], selector);
|
|
w[50] = __byte_perm_S (w[26], w[25], selector);
|
|
w[49] = __byte_perm_S (w[25], w[24], selector);
|
|
w[48] = __byte_perm_S (w[24], w[23], selector);
|
|
w[47] = __byte_perm_S (w[23], w[22], selector);
|
|
w[46] = __byte_perm_S (w[22], w[21], selector);
|
|
w[45] = __byte_perm_S (w[21], w[20], selector);
|
|
w[44] = __byte_perm_S (w[20], w[19], selector);
|
|
w[43] = __byte_perm_S (w[19], w[18], selector);
|
|
w[42] = __byte_perm_S (w[18], w[17], selector);
|
|
w[41] = __byte_perm_S (w[17], w[16], selector);
|
|
w[40] = __byte_perm_S (w[16], w[15], selector);
|
|
w[39] = __byte_perm_S (w[15], w[14], selector);
|
|
w[38] = __byte_perm_S (w[14], w[13], selector);
|
|
w[37] = __byte_perm_S (w[13], w[12], selector);
|
|
w[36] = __byte_perm_S (w[12], w[11], selector);
|
|
w[35] = __byte_perm_S (w[11], w[10], selector);
|
|
w[34] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[33] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[32] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[31] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[30] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[29] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[28] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[27] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[26] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[25] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[24] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 25:
|
|
w[63] = __byte_perm_S (w[38], w[37], selector);
|
|
w[62] = __byte_perm_S (w[37], w[36], selector);
|
|
w[61] = __byte_perm_S (w[36], w[35], selector);
|
|
w[60] = __byte_perm_S (w[35], w[34], selector);
|
|
w[59] = __byte_perm_S (w[34], w[33], selector);
|
|
w[58] = __byte_perm_S (w[33], w[32], selector);
|
|
w[57] = __byte_perm_S (w[32], w[31], selector);
|
|
w[56] = __byte_perm_S (w[31], w[30], selector);
|
|
w[55] = __byte_perm_S (w[30], w[29], selector);
|
|
w[54] = __byte_perm_S (w[29], w[28], selector);
|
|
w[53] = __byte_perm_S (w[28], w[27], selector);
|
|
w[52] = __byte_perm_S (w[27], w[26], selector);
|
|
w[51] = __byte_perm_S (w[26], w[25], selector);
|
|
w[50] = __byte_perm_S (w[25], w[24], selector);
|
|
w[49] = __byte_perm_S (w[24], w[23], selector);
|
|
w[48] = __byte_perm_S (w[23], w[22], selector);
|
|
w[47] = __byte_perm_S (w[22], w[21], selector);
|
|
w[46] = __byte_perm_S (w[21], w[20], selector);
|
|
w[45] = __byte_perm_S (w[20], w[19], selector);
|
|
w[44] = __byte_perm_S (w[19], w[18], selector);
|
|
w[43] = __byte_perm_S (w[18], w[17], selector);
|
|
w[42] = __byte_perm_S (w[17], w[16], selector);
|
|
w[41] = __byte_perm_S (w[16], w[15], selector);
|
|
w[40] = __byte_perm_S (w[15], w[14], selector);
|
|
w[39] = __byte_perm_S (w[14], w[13], selector);
|
|
w[38] = __byte_perm_S (w[13], w[12], selector);
|
|
w[37] = __byte_perm_S (w[12], w[11], selector);
|
|
w[36] = __byte_perm_S (w[11], w[10], selector);
|
|
w[35] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[34] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[33] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[32] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[31] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[30] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[29] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[28] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[27] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[26] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[25] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 26:
|
|
w[63] = __byte_perm_S (w[37], w[36], selector);
|
|
w[62] = __byte_perm_S (w[36], w[35], selector);
|
|
w[61] = __byte_perm_S (w[35], w[34], selector);
|
|
w[60] = __byte_perm_S (w[34], w[33], selector);
|
|
w[59] = __byte_perm_S (w[33], w[32], selector);
|
|
w[58] = __byte_perm_S (w[32], w[31], selector);
|
|
w[57] = __byte_perm_S (w[31], w[30], selector);
|
|
w[56] = __byte_perm_S (w[30], w[29], selector);
|
|
w[55] = __byte_perm_S (w[29], w[28], selector);
|
|
w[54] = __byte_perm_S (w[28], w[27], selector);
|
|
w[53] = __byte_perm_S (w[27], w[26], selector);
|
|
w[52] = __byte_perm_S (w[26], w[25], selector);
|
|
w[51] = __byte_perm_S (w[25], w[24], selector);
|
|
w[50] = __byte_perm_S (w[24], w[23], selector);
|
|
w[49] = __byte_perm_S (w[23], w[22], selector);
|
|
w[48] = __byte_perm_S (w[22], w[21], selector);
|
|
w[47] = __byte_perm_S (w[21], w[20], selector);
|
|
w[46] = __byte_perm_S (w[20], w[19], selector);
|
|
w[45] = __byte_perm_S (w[19], w[18], selector);
|
|
w[44] = __byte_perm_S (w[18], w[17], selector);
|
|
w[43] = __byte_perm_S (w[17], w[16], selector);
|
|
w[42] = __byte_perm_S (w[16], w[15], selector);
|
|
w[41] = __byte_perm_S (w[15], w[14], selector);
|
|
w[40] = __byte_perm_S (w[14], w[13], selector);
|
|
w[39] = __byte_perm_S (w[13], w[12], selector);
|
|
w[38] = __byte_perm_S (w[12], w[11], selector);
|
|
w[37] = __byte_perm_S (w[11], w[10], selector);
|
|
w[36] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[35] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[34] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[33] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[32] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[31] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[30] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[29] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[28] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[27] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[26] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 27:
|
|
w[63] = __byte_perm_S (w[36], w[35], selector);
|
|
w[62] = __byte_perm_S (w[35], w[34], selector);
|
|
w[61] = __byte_perm_S (w[34], w[33], selector);
|
|
w[60] = __byte_perm_S (w[33], w[32], selector);
|
|
w[59] = __byte_perm_S (w[32], w[31], selector);
|
|
w[58] = __byte_perm_S (w[31], w[30], selector);
|
|
w[57] = __byte_perm_S (w[30], w[29], selector);
|
|
w[56] = __byte_perm_S (w[29], w[28], selector);
|
|
w[55] = __byte_perm_S (w[28], w[27], selector);
|
|
w[54] = __byte_perm_S (w[27], w[26], selector);
|
|
w[53] = __byte_perm_S (w[26], w[25], selector);
|
|
w[52] = __byte_perm_S (w[25], w[24], selector);
|
|
w[51] = __byte_perm_S (w[24], w[23], selector);
|
|
w[50] = __byte_perm_S (w[23], w[22], selector);
|
|
w[49] = __byte_perm_S (w[22], w[21], selector);
|
|
w[48] = __byte_perm_S (w[21], w[20], selector);
|
|
w[47] = __byte_perm_S (w[20], w[19], selector);
|
|
w[46] = __byte_perm_S (w[19], w[18], selector);
|
|
w[45] = __byte_perm_S (w[18], w[17], selector);
|
|
w[44] = __byte_perm_S (w[17], w[16], selector);
|
|
w[43] = __byte_perm_S (w[16], w[15], selector);
|
|
w[42] = __byte_perm_S (w[15], w[14], selector);
|
|
w[41] = __byte_perm_S (w[14], w[13], selector);
|
|
w[40] = __byte_perm_S (w[13], w[12], selector);
|
|
w[39] = __byte_perm_S (w[12], w[11], selector);
|
|
w[38] = __byte_perm_S (w[11], w[10], selector);
|
|
w[37] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[36] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[35] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[34] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[33] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[32] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[31] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[30] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[29] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[28] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[27] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 28:
|
|
w[63] = __byte_perm_S (w[35], w[34], selector);
|
|
w[62] = __byte_perm_S (w[34], w[33], selector);
|
|
w[61] = __byte_perm_S (w[33], w[32], selector);
|
|
w[60] = __byte_perm_S (w[32], w[31], selector);
|
|
w[59] = __byte_perm_S (w[31], w[30], selector);
|
|
w[58] = __byte_perm_S (w[30], w[29], selector);
|
|
w[57] = __byte_perm_S (w[29], w[28], selector);
|
|
w[56] = __byte_perm_S (w[28], w[27], selector);
|
|
w[55] = __byte_perm_S (w[27], w[26], selector);
|
|
w[54] = __byte_perm_S (w[26], w[25], selector);
|
|
w[53] = __byte_perm_S (w[25], w[24], selector);
|
|
w[52] = __byte_perm_S (w[24], w[23], selector);
|
|
w[51] = __byte_perm_S (w[23], w[22], selector);
|
|
w[50] = __byte_perm_S (w[22], w[21], selector);
|
|
w[49] = __byte_perm_S (w[21], w[20], selector);
|
|
w[48] = __byte_perm_S (w[20], w[19], selector);
|
|
w[47] = __byte_perm_S (w[19], w[18], selector);
|
|
w[46] = __byte_perm_S (w[18], w[17], selector);
|
|
w[45] = __byte_perm_S (w[17], w[16], selector);
|
|
w[44] = __byte_perm_S (w[16], w[15], selector);
|
|
w[43] = __byte_perm_S (w[15], w[14], selector);
|
|
w[42] = __byte_perm_S (w[14], w[13], selector);
|
|
w[41] = __byte_perm_S (w[13], w[12], selector);
|
|
w[40] = __byte_perm_S (w[12], w[11], selector);
|
|
w[39] = __byte_perm_S (w[11], w[10], selector);
|
|
w[38] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[37] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[36] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[35] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[34] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[33] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[32] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[31] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[30] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[29] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[28] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 29:
|
|
w[63] = __byte_perm_S (w[34], w[33], selector);
|
|
w[62] = __byte_perm_S (w[33], w[32], selector);
|
|
w[61] = __byte_perm_S (w[32], w[31], selector);
|
|
w[60] = __byte_perm_S (w[31], w[30], selector);
|
|
w[59] = __byte_perm_S (w[30], w[29], selector);
|
|
w[58] = __byte_perm_S (w[29], w[28], selector);
|
|
w[57] = __byte_perm_S (w[28], w[27], selector);
|
|
w[56] = __byte_perm_S (w[27], w[26], selector);
|
|
w[55] = __byte_perm_S (w[26], w[25], selector);
|
|
w[54] = __byte_perm_S (w[25], w[24], selector);
|
|
w[53] = __byte_perm_S (w[24], w[23], selector);
|
|
w[52] = __byte_perm_S (w[23], w[22], selector);
|
|
w[51] = __byte_perm_S (w[22], w[21], selector);
|
|
w[50] = __byte_perm_S (w[21], w[20], selector);
|
|
w[49] = __byte_perm_S (w[20], w[19], selector);
|
|
w[48] = __byte_perm_S (w[19], w[18], selector);
|
|
w[47] = __byte_perm_S (w[18], w[17], selector);
|
|
w[46] = __byte_perm_S (w[17], w[16], selector);
|
|
w[45] = __byte_perm_S (w[16], w[15], selector);
|
|
w[44] = __byte_perm_S (w[15], w[14], selector);
|
|
w[43] = __byte_perm_S (w[14], w[13], selector);
|
|
w[42] = __byte_perm_S (w[13], w[12], selector);
|
|
w[41] = __byte_perm_S (w[12], w[11], selector);
|
|
w[40] = __byte_perm_S (w[11], w[10], selector);
|
|
w[39] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[38] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[37] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[36] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[35] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[34] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[33] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[32] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[31] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[30] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[29] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 30:
|
|
w[63] = __byte_perm_S (w[33], w[32], selector);
|
|
w[62] = __byte_perm_S (w[32], w[31], selector);
|
|
w[61] = __byte_perm_S (w[31], w[30], selector);
|
|
w[60] = __byte_perm_S (w[30], w[29], selector);
|
|
w[59] = __byte_perm_S (w[29], w[28], selector);
|
|
w[58] = __byte_perm_S (w[28], w[27], selector);
|
|
w[57] = __byte_perm_S (w[27], w[26], selector);
|
|
w[56] = __byte_perm_S (w[26], w[25], selector);
|
|
w[55] = __byte_perm_S (w[25], w[24], selector);
|
|
w[54] = __byte_perm_S (w[24], w[23], selector);
|
|
w[53] = __byte_perm_S (w[23], w[22], selector);
|
|
w[52] = __byte_perm_S (w[22], w[21], selector);
|
|
w[51] = __byte_perm_S (w[21], w[20], selector);
|
|
w[50] = __byte_perm_S (w[20], w[19], selector);
|
|
w[49] = __byte_perm_S (w[19], w[18], selector);
|
|
w[48] = __byte_perm_S (w[18], w[17], selector);
|
|
w[47] = __byte_perm_S (w[17], w[16], selector);
|
|
w[46] = __byte_perm_S (w[16], w[15], selector);
|
|
w[45] = __byte_perm_S (w[15], w[14], selector);
|
|
w[44] = __byte_perm_S (w[14], w[13], selector);
|
|
w[43] = __byte_perm_S (w[13], w[12], selector);
|
|
w[42] = __byte_perm_S (w[12], w[11], selector);
|
|
w[41] = __byte_perm_S (w[11], w[10], selector);
|
|
w[40] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[39] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[38] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[37] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[36] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[35] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[34] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[33] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[32] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[31] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[30] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 31:
|
|
w[63] = __byte_perm_S (w[32], w[31], selector);
|
|
w[62] = __byte_perm_S (w[31], w[30], selector);
|
|
w[61] = __byte_perm_S (w[30], w[29], selector);
|
|
w[60] = __byte_perm_S (w[29], w[28], selector);
|
|
w[59] = __byte_perm_S (w[28], w[27], selector);
|
|
w[58] = __byte_perm_S (w[27], w[26], selector);
|
|
w[57] = __byte_perm_S (w[26], w[25], selector);
|
|
w[56] = __byte_perm_S (w[25], w[24], selector);
|
|
w[55] = __byte_perm_S (w[24], w[23], selector);
|
|
w[54] = __byte_perm_S (w[23], w[22], selector);
|
|
w[53] = __byte_perm_S (w[22], w[21], selector);
|
|
w[52] = __byte_perm_S (w[21], w[20], selector);
|
|
w[51] = __byte_perm_S (w[20], w[19], selector);
|
|
w[50] = __byte_perm_S (w[19], w[18], selector);
|
|
w[49] = __byte_perm_S (w[18], w[17], selector);
|
|
w[48] = __byte_perm_S (w[17], w[16], selector);
|
|
w[47] = __byte_perm_S (w[16], w[15], selector);
|
|
w[46] = __byte_perm_S (w[15], w[14], selector);
|
|
w[45] = __byte_perm_S (w[14], w[13], selector);
|
|
w[44] = __byte_perm_S (w[13], w[12], selector);
|
|
w[43] = __byte_perm_S (w[12], w[11], selector);
|
|
w[42] = __byte_perm_S (w[11], w[10], selector);
|
|
w[41] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[40] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[39] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[38] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[37] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[36] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[35] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[34] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[33] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[32] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[31] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 32:
|
|
w[63] = __byte_perm_S (w[31], w[30], selector);
|
|
w[62] = __byte_perm_S (w[30], w[29], selector);
|
|
w[61] = __byte_perm_S (w[29], w[28], selector);
|
|
w[60] = __byte_perm_S (w[28], w[27], selector);
|
|
w[59] = __byte_perm_S (w[27], w[26], selector);
|
|
w[58] = __byte_perm_S (w[26], w[25], selector);
|
|
w[57] = __byte_perm_S (w[25], w[24], selector);
|
|
w[56] = __byte_perm_S (w[24], w[23], selector);
|
|
w[55] = __byte_perm_S (w[23], w[22], selector);
|
|
w[54] = __byte_perm_S (w[22], w[21], selector);
|
|
w[53] = __byte_perm_S (w[21], w[20], selector);
|
|
w[52] = __byte_perm_S (w[20], w[19], selector);
|
|
w[51] = __byte_perm_S (w[19], w[18], selector);
|
|
w[50] = __byte_perm_S (w[18], w[17], selector);
|
|
w[49] = __byte_perm_S (w[17], w[16], selector);
|
|
w[48] = __byte_perm_S (w[16], w[15], selector);
|
|
w[47] = __byte_perm_S (w[15], w[14], selector);
|
|
w[46] = __byte_perm_S (w[14], w[13], selector);
|
|
w[45] = __byte_perm_S (w[13], w[12], selector);
|
|
w[44] = __byte_perm_S (w[12], w[11], selector);
|
|
w[43] = __byte_perm_S (w[11], w[10], selector);
|
|
w[42] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[41] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[40] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[39] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[38] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[37] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[36] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[35] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[34] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[33] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[32] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 33:
|
|
w[63] = __byte_perm_S (w[30], w[29], selector);
|
|
w[62] = __byte_perm_S (w[29], w[28], selector);
|
|
w[61] = __byte_perm_S (w[28], w[27], selector);
|
|
w[60] = __byte_perm_S (w[27], w[26], selector);
|
|
w[59] = __byte_perm_S (w[26], w[25], selector);
|
|
w[58] = __byte_perm_S (w[25], w[24], selector);
|
|
w[57] = __byte_perm_S (w[24], w[23], selector);
|
|
w[56] = __byte_perm_S (w[23], w[22], selector);
|
|
w[55] = __byte_perm_S (w[22], w[21], selector);
|
|
w[54] = __byte_perm_S (w[21], w[20], selector);
|
|
w[53] = __byte_perm_S (w[20], w[19], selector);
|
|
w[52] = __byte_perm_S (w[19], w[18], selector);
|
|
w[51] = __byte_perm_S (w[18], w[17], selector);
|
|
w[50] = __byte_perm_S (w[17], w[16], selector);
|
|
w[49] = __byte_perm_S (w[16], w[15], selector);
|
|
w[48] = __byte_perm_S (w[15], w[14], selector);
|
|
w[47] = __byte_perm_S (w[14], w[13], selector);
|
|
w[46] = __byte_perm_S (w[13], w[12], selector);
|
|
w[45] = __byte_perm_S (w[12], w[11], selector);
|
|
w[44] = __byte_perm_S (w[11], w[10], selector);
|
|
w[43] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[42] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[41] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[40] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[39] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[38] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[37] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[36] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[35] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[34] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[33] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 34:
|
|
w[63] = __byte_perm_S (w[29], w[28], selector);
|
|
w[62] = __byte_perm_S (w[28], w[27], selector);
|
|
w[61] = __byte_perm_S (w[27], w[26], selector);
|
|
w[60] = __byte_perm_S (w[26], w[25], selector);
|
|
w[59] = __byte_perm_S (w[25], w[24], selector);
|
|
w[58] = __byte_perm_S (w[24], w[23], selector);
|
|
w[57] = __byte_perm_S (w[23], w[22], selector);
|
|
w[56] = __byte_perm_S (w[22], w[21], selector);
|
|
w[55] = __byte_perm_S (w[21], w[20], selector);
|
|
w[54] = __byte_perm_S (w[20], w[19], selector);
|
|
w[53] = __byte_perm_S (w[19], w[18], selector);
|
|
w[52] = __byte_perm_S (w[18], w[17], selector);
|
|
w[51] = __byte_perm_S (w[17], w[16], selector);
|
|
w[50] = __byte_perm_S (w[16], w[15], selector);
|
|
w[49] = __byte_perm_S (w[15], w[14], selector);
|
|
w[48] = __byte_perm_S (w[14], w[13], selector);
|
|
w[47] = __byte_perm_S (w[13], w[12], selector);
|
|
w[46] = __byte_perm_S (w[12], w[11], selector);
|
|
w[45] = __byte_perm_S (w[11], w[10], selector);
|
|
w[44] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[43] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[42] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[41] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[40] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[39] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[38] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[37] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[36] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[35] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[34] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 35:
|
|
w[63] = __byte_perm_S (w[28], w[27], selector);
|
|
w[62] = __byte_perm_S (w[27], w[26], selector);
|
|
w[61] = __byte_perm_S (w[26], w[25], selector);
|
|
w[60] = __byte_perm_S (w[25], w[24], selector);
|
|
w[59] = __byte_perm_S (w[24], w[23], selector);
|
|
w[58] = __byte_perm_S (w[23], w[22], selector);
|
|
w[57] = __byte_perm_S (w[22], w[21], selector);
|
|
w[56] = __byte_perm_S (w[21], w[20], selector);
|
|
w[55] = __byte_perm_S (w[20], w[19], selector);
|
|
w[54] = __byte_perm_S (w[19], w[18], selector);
|
|
w[53] = __byte_perm_S (w[18], w[17], selector);
|
|
w[52] = __byte_perm_S (w[17], w[16], selector);
|
|
w[51] = __byte_perm_S (w[16], w[15], selector);
|
|
w[50] = __byte_perm_S (w[15], w[14], selector);
|
|
w[49] = __byte_perm_S (w[14], w[13], selector);
|
|
w[48] = __byte_perm_S (w[13], w[12], selector);
|
|
w[47] = __byte_perm_S (w[12], w[11], selector);
|
|
w[46] = __byte_perm_S (w[11], w[10], selector);
|
|
w[45] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[44] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[43] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[42] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[41] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[40] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[39] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[38] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[37] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[36] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[35] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 36:
|
|
w[63] = __byte_perm_S (w[27], w[26], selector);
|
|
w[62] = __byte_perm_S (w[26], w[25], selector);
|
|
w[61] = __byte_perm_S (w[25], w[24], selector);
|
|
w[60] = __byte_perm_S (w[24], w[23], selector);
|
|
w[59] = __byte_perm_S (w[23], w[22], selector);
|
|
w[58] = __byte_perm_S (w[22], w[21], selector);
|
|
w[57] = __byte_perm_S (w[21], w[20], selector);
|
|
w[56] = __byte_perm_S (w[20], w[19], selector);
|
|
w[55] = __byte_perm_S (w[19], w[18], selector);
|
|
w[54] = __byte_perm_S (w[18], w[17], selector);
|
|
w[53] = __byte_perm_S (w[17], w[16], selector);
|
|
w[52] = __byte_perm_S (w[16], w[15], selector);
|
|
w[51] = __byte_perm_S (w[15], w[14], selector);
|
|
w[50] = __byte_perm_S (w[14], w[13], selector);
|
|
w[49] = __byte_perm_S (w[13], w[12], selector);
|
|
w[48] = __byte_perm_S (w[12], w[11], selector);
|
|
w[47] = __byte_perm_S (w[11], w[10], selector);
|
|
w[46] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[45] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[44] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[43] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[42] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[41] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[40] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[39] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[38] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[37] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[36] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 37:
|
|
w[63] = __byte_perm_S (w[26], w[25], selector);
|
|
w[62] = __byte_perm_S (w[25], w[24], selector);
|
|
w[61] = __byte_perm_S (w[24], w[23], selector);
|
|
w[60] = __byte_perm_S (w[23], w[22], selector);
|
|
w[59] = __byte_perm_S (w[22], w[21], selector);
|
|
w[58] = __byte_perm_S (w[21], w[20], selector);
|
|
w[57] = __byte_perm_S (w[20], w[19], selector);
|
|
w[56] = __byte_perm_S (w[19], w[18], selector);
|
|
w[55] = __byte_perm_S (w[18], w[17], selector);
|
|
w[54] = __byte_perm_S (w[17], w[16], selector);
|
|
w[53] = __byte_perm_S (w[16], w[15], selector);
|
|
w[52] = __byte_perm_S (w[15], w[14], selector);
|
|
w[51] = __byte_perm_S (w[14], w[13], selector);
|
|
w[50] = __byte_perm_S (w[13], w[12], selector);
|
|
w[49] = __byte_perm_S (w[12], w[11], selector);
|
|
w[48] = __byte_perm_S (w[11], w[10], selector);
|
|
w[47] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[46] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[45] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[44] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[43] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[42] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[41] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[40] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[39] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[38] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[37] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 38:
|
|
w[63] = __byte_perm_S (w[25], w[24], selector);
|
|
w[62] = __byte_perm_S (w[24], w[23], selector);
|
|
w[61] = __byte_perm_S (w[23], w[22], selector);
|
|
w[60] = __byte_perm_S (w[22], w[21], selector);
|
|
w[59] = __byte_perm_S (w[21], w[20], selector);
|
|
w[58] = __byte_perm_S (w[20], w[19], selector);
|
|
w[57] = __byte_perm_S (w[19], w[18], selector);
|
|
w[56] = __byte_perm_S (w[18], w[17], selector);
|
|
w[55] = __byte_perm_S (w[17], w[16], selector);
|
|
w[54] = __byte_perm_S (w[16], w[15], selector);
|
|
w[53] = __byte_perm_S (w[15], w[14], selector);
|
|
w[52] = __byte_perm_S (w[14], w[13], selector);
|
|
w[51] = __byte_perm_S (w[13], w[12], selector);
|
|
w[50] = __byte_perm_S (w[12], w[11], selector);
|
|
w[49] = __byte_perm_S (w[11], w[10], selector);
|
|
w[48] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[47] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[46] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[45] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[44] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[43] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[42] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[41] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[40] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[39] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[38] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 39:
|
|
w[63] = __byte_perm_S (w[24], w[23], selector);
|
|
w[62] = __byte_perm_S (w[23], w[22], selector);
|
|
w[61] = __byte_perm_S (w[22], w[21], selector);
|
|
w[60] = __byte_perm_S (w[21], w[20], selector);
|
|
w[59] = __byte_perm_S (w[20], w[19], selector);
|
|
w[58] = __byte_perm_S (w[19], w[18], selector);
|
|
w[57] = __byte_perm_S (w[18], w[17], selector);
|
|
w[56] = __byte_perm_S (w[17], w[16], selector);
|
|
w[55] = __byte_perm_S (w[16], w[15], selector);
|
|
w[54] = __byte_perm_S (w[15], w[14], selector);
|
|
w[53] = __byte_perm_S (w[14], w[13], selector);
|
|
w[52] = __byte_perm_S (w[13], w[12], selector);
|
|
w[51] = __byte_perm_S (w[12], w[11], selector);
|
|
w[50] = __byte_perm_S (w[11], w[10], selector);
|
|
w[49] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[48] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[47] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[46] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[45] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[44] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[43] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[42] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[41] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[40] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[39] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 40:
|
|
w[63] = __byte_perm_S (w[23], w[22], selector);
|
|
w[62] = __byte_perm_S (w[22], w[21], selector);
|
|
w[61] = __byte_perm_S (w[21], w[20], selector);
|
|
w[60] = __byte_perm_S (w[20], w[19], selector);
|
|
w[59] = __byte_perm_S (w[19], w[18], selector);
|
|
w[58] = __byte_perm_S (w[18], w[17], selector);
|
|
w[57] = __byte_perm_S (w[17], w[16], selector);
|
|
w[56] = __byte_perm_S (w[16], w[15], selector);
|
|
w[55] = __byte_perm_S (w[15], w[14], selector);
|
|
w[54] = __byte_perm_S (w[14], w[13], selector);
|
|
w[53] = __byte_perm_S (w[13], w[12], selector);
|
|
w[52] = __byte_perm_S (w[12], w[11], selector);
|
|
w[51] = __byte_perm_S (w[11], w[10], selector);
|
|
w[50] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[49] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[48] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[47] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[46] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[45] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[44] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[43] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[42] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[41] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[40] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 41:
|
|
w[63] = __byte_perm_S (w[22], w[21], selector);
|
|
w[62] = __byte_perm_S (w[21], w[20], selector);
|
|
w[61] = __byte_perm_S (w[20], w[19], selector);
|
|
w[60] = __byte_perm_S (w[19], w[18], selector);
|
|
w[59] = __byte_perm_S (w[18], w[17], selector);
|
|
w[58] = __byte_perm_S (w[17], w[16], selector);
|
|
w[57] = __byte_perm_S (w[16], w[15], selector);
|
|
w[56] = __byte_perm_S (w[15], w[14], selector);
|
|
w[55] = __byte_perm_S (w[14], w[13], selector);
|
|
w[54] = __byte_perm_S (w[13], w[12], selector);
|
|
w[53] = __byte_perm_S (w[12], w[11], selector);
|
|
w[52] = __byte_perm_S (w[11], w[10], selector);
|
|
w[51] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[50] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[49] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[48] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[47] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[46] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[45] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[44] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[43] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[42] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[41] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 42:
|
|
w[63] = __byte_perm_S (w[21], w[20], selector);
|
|
w[62] = __byte_perm_S (w[20], w[19], selector);
|
|
w[61] = __byte_perm_S (w[19], w[18], selector);
|
|
w[60] = __byte_perm_S (w[18], w[17], selector);
|
|
w[59] = __byte_perm_S (w[17], w[16], selector);
|
|
w[58] = __byte_perm_S (w[16], w[15], selector);
|
|
w[57] = __byte_perm_S (w[15], w[14], selector);
|
|
w[56] = __byte_perm_S (w[14], w[13], selector);
|
|
w[55] = __byte_perm_S (w[13], w[12], selector);
|
|
w[54] = __byte_perm_S (w[12], w[11], selector);
|
|
w[53] = __byte_perm_S (w[11], w[10], selector);
|
|
w[52] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[51] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[50] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[49] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[48] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[47] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[46] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[45] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[44] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[43] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[42] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 43:
|
|
w[63] = __byte_perm_S (w[20], w[19], selector);
|
|
w[62] = __byte_perm_S (w[19], w[18], selector);
|
|
w[61] = __byte_perm_S (w[18], w[17], selector);
|
|
w[60] = __byte_perm_S (w[17], w[16], selector);
|
|
w[59] = __byte_perm_S (w[16], w[15], selector);
|
|
w[58] = __byte_perm_S (w[15], w[14], selector);
|
|
w[57] = __byte_perm_S (w[14], w[13], selector);
|
|
w[56] = __byte_perm_S (w[13], w[12], selector);
|
|
w[55] = __byte_perm_S (w[12], w[11], selector);
|
|
w[54] = __byte_perm_S (w[11], w[10], selector);
|
|
w[53] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[52] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[51] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[50] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[49] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[48] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[47] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[46] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[45] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[44] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[43] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 44:
|
|
w[63] = __byte_perm_S (w[19], w[18], selector);
|
|
w[62] = __byte_perm_S (w[18], w[17], selector);
|
|
w[61] = __byte_perm_S (w[17], w[16], selector);
|
|
w[60] = __byte_perm_S (w[16], w[15], selector);
|
|
w[59] = __byte_perm_S (w[15], w[14], selector);
|
|
w[58] = __byte_perm_S (w[14], w[13], selector);
|
|
w[57] = __byte_perm_S (w[13], w[12], selector);
|
|
w[56] = __byte_perm_S (w[12], w[11], selector);
|
|
w[55] = __byte_perm_S (w[11], w[10], selector);
|
|
w[54] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[53] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[52] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[51] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[50] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[49] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[48] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[47] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[46] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[45] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[44] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 45:
|
|
w[63] = __byte_perm_S (w[18], w[17], selector);
|
|
w[62] = __byte_perm_S (w[17], w[16], selector);
|
|
w[61] = __byte_perm_S (w[16], w[15], selector);
|
|
w[60] = __byte_perm_S (w[15], w[14], selector);
|
|
w[59] = __byte_perm_S (w[14], w[13], selector);
|
|
w[58] = __byte_perm_S (w[13], w[12], selector);
|
|
w[57] = __byte_perm_S (w[12], w[11], selector);
|
|
w[56] = __byte_perm_S (w[11], w[10], selector);
|
|
w[55] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[54] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[53] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[52] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[51] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[50] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[49] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[48] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[47] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[46] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[45] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 46:
|
|
w[63] = __byte_perm_S (w[17], w[16], selector);
|
|
w[62] = __byte_perm_S (w[16], w[15], selector);
|
|
w[61] = __byte_perm_S (w[15], w[14], selector);
|
|
w[60] = __byte_perm_S (w[14], w[13], selector);
|
|
w[59] = __byte_perm_S (w[13], w[12], selector);
|
|
w[58] = __byte_perm_S (w[12], w[11], selector);
|
|
w[57] = __byte_perm_S (w[11], w[10], selector);
|
|
w[56] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[55] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[54] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[53] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[52] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[51] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[50] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[49] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[48] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[47] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[46] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 47:
|
|
w[63] = __byte_perm_S (w[16], w[15], selector);
|
|
w[62] = __byte_perm_S (w[15], w[14], selector);
|
|
w[61] = __byte_perm_S (w[14], w[13], selector);
|
|
w[60] = __byte_perm_S (w[13], w[12], selector);
|
|
w[59] = __byte_perm_S (w[12], w[11], selector);
|
|
w[58] = __byte_perm_S (w[11], w[10], selector);
|
|
w[57] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[56] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[55] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[54] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[53] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[52] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[51] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[50] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[49] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[48] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[47] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 48:
|
|
w[63] = __byte_perm_S (w[15], w[14], selector);
|
|
w[62] = __byte_perm_S (w[14], w[13], selector);
|
|
w[61] = __byte_perm_S (w[13], w[12], selector);
|
|
w[60] = __byte_perm_S (w[12], w[11], selector);
|
|
w[59] = __byte_perm_S (w[11], w[10], selector);
|
|
w[58] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[57] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[56] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[55] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[54] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[53] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[52] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[51] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[50] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[49] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[48] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 49:
|
|
w[63] = __byte_perm_S (w[14], w[13], selector);
|
|
w[62] = __byte_perm_S (w[13], w[12], selector);
|
|
w[61] = __byte_perm_S (w[12], w[11], selector);
|
|
w[60] = __byte_perm_S (w[11], w[10], selector);
|
|
w[59] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[58] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[57] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[56] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[55] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[54] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[53] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[52] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[51] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[50] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[49] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 50:
|
|
w[63] = __byte_perm_S (w[13], w[12], selector);
|
|
w[62] = __byte_perm_S (w[12], w[11], selector);
|
|
w[61] = __byte_perm_S (w[11], w[10], selector);
|
|
w[60] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[59] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[58] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[57] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[56] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[55] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[54] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[53] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[52] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[51] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[50] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 51:
|
|
w[63] = __byte_perm_S (w[12], w[11], selector);
|
|
w[62] = __byte_perm_S (w[11], w[10], selector);
|
|
w[61] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[60] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[59] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[58] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[57] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[56] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[55] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[54] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[53] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[52] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[51] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 52:
|
|
w[63] = __byte_perm_S (w[11], w[10], selector);
|
|
w[62] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[61] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[60] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[59] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[58] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[57] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[56] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[55] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[54] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[53] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[52] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 53:
|
|
w[63] = __byte_perm_S (w[10], w[ 9], selector);
|
|
w[62] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[61] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[60] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[59] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[58] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[57] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[56] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[55] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[54] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[53] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 54:
|
|
w[63] = __byte_perm_S (w[ 9], w[ 8], selector);
|
|
w[62] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[61] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[60] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[59] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[58] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[57] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[56] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[55] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[54] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 55:
|
|
w[63] = __byte_perm_S (w[ 8], w[ 7], selector);
|
|
w[62] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[61] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[60] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[59] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[58] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[57] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[56] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[55] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 56:
|
|
w[63] = __byte_perm_S (w[ 7], w[ 6], selector);
|
|
w[62] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[61] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[60] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[59] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[58] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[57] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[56] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 57:
|
|
w[63] = __byte_perm_S (w[ 6], w[ 5], selector);
|
|
w[62] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[61] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[60] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[59] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[58] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[57] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 58:
|
|
w[63] = __byte_perm_S (w[ 5], w[ 4], selector);
|
|
w[62] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[61] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[60] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[59] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[58] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 59:
|
|
w[63] = __byte_perm_S (w[ 4], w[ 3], selector);
|
|
w[62] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[61] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[60] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[59] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 60:
|
|
w[63] = __byte_perm_S (w[ 3], w[ 2], selector);
|
|
w[62] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[61] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[60] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 61:
|
|
w[63] = __byte_perm_S (w[ 2], w[ 1], selector);
|
|
w[62] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[61] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 62:
|
|
w[63] = __byte_perm_S (w[ 1], w[ 0], selector);
|
|
w[62] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
|
|
case 63:
|
|
w[63] = __byte_perm_S (w[ 0], 0, selector);
|
|
w[62] = 0;
|
|
w[61] = 0;
|
|
w[60] = 0;
|
|
w[59] = 0;
|
|
w[58] = 0;
|
|
w[57] = 0;
|
|
w[56] = 0;
|
|
w[55] = 0;
|
|
w[54] = 0;
|
|
w[53] = 0;
|
|
w[52] = 0;
|
|
w[51] = 0;
|
|
w[50] = 0;
|
|
w[49] = 0;
|
|
w[48] = 0;
|
|
w[47] = 0;
|
|
w[46] = 0;
|
|
w[45] = 0;
|
|
w[44] = 0;
|
|
w[43] = 0;
|
|
w[42] = 0;
|
|
w[41] = 0;
|
|
w[40] = 0;
|
|
w[39] = 0;
|
|
w[38] = 0;
|
|
w[37] = 0;
|
|
w[36] = 0;
|
|
w[35] = 0;
|
|
w[34] = 0;
|
|
w[33] = 0;
|
|
w[32] = 0;
|
|
w[31] = 0;
|
|
w[30] = 0;
|
|
w[29] = 0;
|
|
w[28] = 0;
|
|
w[27] = 0;
|
|
w[26] = 0;
|
|
w[25] = 0;
|
|
w[24] = 0;
|
|
w[23] = 0;
|
|
w[22] = 0;
|
|
w[21] = 0;
|
|
w[20] = 0;
|
|
w[19] = 0;
|
|
w[18] = 0;
|
|
w[17] = 0;
|
|
w[16] = 0;
|
|
w[15] = 0;
|
|
w[14] = 0;
|
|
w[13] = 0;
|
|
w[12] = 0;
|
|
w[11] = 0;
|
|
w[10] = 0;
|
|
w[ 9] = 0;
|
|
w[ 8] = 0;
|
|
w[ 7] = 0;
|
|
w[ 6] = 0;
|
|
w[ 5] = 0;
|
|
w[ 4] = 0;
|
|
w[ 3] = 0;
|
|
w[ 2] = 0;
|
|
w[ 1] = 0;
|
|
w[ 0] = 0;
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* vector functions on scalar types (for inner loop usage)
|
|
*/
|
|
|
|
#define PACKVS2(sn,vn,e) \
|
|
sn[0] = vn[0].s##e; \
|
|
sn[1] = vn[1].s##e;
|
|
|
|
#define PACKSV2(sn,vn,e) \
|
|
vn[0].s##e = sn[0]; \
|
|
vn[1].s##e = sn[1];
|
|
|
|
#define PACKVS24(s0,s1,v0,v1,e) \
|
|
PACKVS4 (s0, v0, e); \
|
|
PACKVS4 (s1, v1, e);
|
|
|
|
#define PACKSV24(s0,s1,v0,v1,e) \
|
|
PACKSV4 (s0, v0, e); \
|
|
PACKSV4 (s1, v1, e);
|
|
|
|
#define PACKVS4(sn,vn,e) \
|
|
sn[0] = vn[0].s##e; \
|
|
sn[1] = vn[1].s##e; \
|
|
sn[2] = vn[2].s##e; \
|
|
sn[3] = vn[3].s##e;
|
|
|
|
#define PACKSV4(sn,vn,e) \
|
|
vn[0].s##e = sn[0]; \
|
|
vn[1].s##e = sn[1]; \
|
|
vn[2].s##e = sn[2]; \
|
|
vn[3].s##e = sn[3];
|
|
|
|
#define PACKVS44(s0,s1,s2,s3,v0,v1,v2,v3,e) \
|
|
PACKVS4 (s0, v0, e); \
|
|
PACKVS4 (s1, v1, e); \
|
|
PACKVS4 (s2, v2, e); \
|
|
PACKVS4 (s3, v3, e);
|
|
|
|
#define PACKSV44(s0,s1,s2,s3,v0,v1,v2,v3,e) \
|
|
PACKSV4 (s0, v0, e); \
|
|
PACKSV4 (s1, v1, e); \
|
|
PACKSV4 (s2, v2, e); \
|
|
PACKSV4 (s3, v3, e);
|
|
|
|
#define PACKVS84(s0,s1,s2,s3,s4,s5,s6,s7,v0,v1,v2,v3,v4,v5,v6,v7,e) \
|
|
PACKVS4 (s0, v0, e); \
|
|
PACKVS4 (s1, v1, e); \
|
|
PACKVS4 (s2, v2, e); \
|
|
PACKVS4 (s3, v3, e); \
|
|
PACKVS4 (s4, v4, e); \
|
|
PACKVS4 (s5, v5, e); \
|
|
PACKVS4 (s6, v6, e); \
|
|
PACKVS4 (s7, v7, e);
|
|
|
|
#define PACKSV84(s0,s1,s2,s3,s4,s5,s6,s7,v0,v1,v2,v3,v4,v5,v6,v7,e) \
|
|
PACKSV4 (s0, v0, e); \
|
|
PACKSV4 (s1, v1, e); \
|
|
PACKSV4 (s2, v2, e); \
|
|
PACKSV4 (s3, v3, e); \
|
|
PACKSV4 (s4, v4, e); \
|
|
PACKSV4 (s5, v5, e); \
|
|
PACKSV4 (s6, v6, e); \
|
|
PACKSV4 (s7, v7, e);
|
|
|
|
static void switch_buffer_by_offset_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
switch_buffer_by_offset_le_S (w0, w1, w2, w3, offset);
|
|
|
|
#else
|
|
|
|
u32 t0[4];
|
|
u32 t1[4];
|
|
u32 t2[4];
|
|
u32 t3[4];
|
|
|
|
#endif
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
|
|
#elif VECT_SIZE == 4
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
|
|
#elif VECT_SIZE == 8
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7);
|
|
|
|
#elif VECT_SIZE == 16
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void switch_buffer_by_offset_8x4_le_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32x offset)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
switch_buffer_by_offset_8x4_le_S (w0, w1, w2, w3, w4, w5, w6, w7, offset);
|
|
|
|
#else
|
|
|
|
u32 t0[4];
|
|
u32 t1[4];
|
|
u32 t2[4];
|
|
u32 t3[4];
|
|
u32 t4[4];
|
|
u32 t5[4];
|
|
u32 t6[4];
|
|
u32 t7[4];
|
|
|
|
#endif
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
// 1
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
|
|
// 2
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
|
|
#elif VECT_SIZE == 4
|
|
|
|
// 1
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
|
|
// 2
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
|
|
// 3
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
|
|
// 4
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
|
|
#elif VECT_SIZE == 8
|
|
|
|
// 1
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
|
|
// 2
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
|
|
// 3
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
|
|
// 4
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
|
|
// 5
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s4);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4);
|
|
|
|
// 6
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s5);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5);
|
|
|
|
// 7
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s6);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6);
|
|
|
|
// 8
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s7);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7);
|
|
|
|
#elif VECT_SIZE == 16
|
|
|
|
// 1
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0);
|
|
|
|
// 2
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1);
|
|
|
|
// 3
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2);
|
|
|
|
// 4
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3);
|
|
|
|
// 5
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s4);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4);
|
|
|
|
// 6
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s5);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5);
|
|
|
|
// 7
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s6);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6);
|
|
|
|
// 8
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s7);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7);
|
|
|
|
// 9
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 8);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s8);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 8);
|
|
|
|
// 10
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 9);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s9);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 9);
|
|
|
|
// 11
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, a);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sa);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, a);
|
|
|
|
// 12
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, b);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sb);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, b);
|
|
|
|
// 13
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, c);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sc);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, c);
|
|
|
|
// 14
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, d);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sd);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, d);
|
|
|
|
// 15
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, e);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.se);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, e);
|
|
|
|
// 16
|
|
PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, f);
|
|
switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sf);
|
|
PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, f);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void append_0x01_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
append_0x01_2x4_S (w0, w1, offset);
|
|
|
|
#else
|
|
|
|
u32 t0[4];
|
|
u32 t1[4];
|
|
|
|
#endif
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
|
|
#elif VECT_SIZE == 4
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
|
|
#elif VECT_SIZE == 8
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
PACKVS24 (t0, t1, w0, w1, 4); append_0x01_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
|
PACKVS24 (t0, t1, w0, w1, 5); append_0x01_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
|
PACKVS24 (t0, t1, w0, w1, 6); append_0x01_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
|
PACKVS24 (t0, t1, w0, w1, 7); append_0x01_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
|
|
|
#elif VECT_SIZE == 16
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
PACKVS24 (t0, t1, w0, w1, 4); append_0x01_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
|
PACKVS24 (t0, t1, w0, w1, 5); append_0x01_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
|
PACKVS24 (t0, t1, w0, w1, 6); append_0x01_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
|
PACKVS24 (t0, t1, w0, w1, 7); append_0x01_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
|
PACKVS24 (t0, t1, w0, w1, 8); append_0x01_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8);
|
|
PACKVS24 (t0, t1, w0, w1, 9); append_0x01_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9);
|
|
PACKVS24 (t0, t1, w0, w1, a); append_0x01_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a);
|
|
PACKVS24 (t0, t1, w0, w1, b); append_0x01_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b);
|
|
PACKVS24 (t0, t1, w0, w1, c); append_0x01_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c);
|
|
PACKVS24 (t0, t1, w0, w1, d); append_0x01_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d);
|
|
PACKVS24 (t0, t1, w0, w1, e); append_0x01_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e);
|
|
PACKVS24 (t0, t1, w0, w1, f); append_0x01_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void append_0x80_2x4_VV (u32x w0[4], u32x w1[4], const u32x offset)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
append_0x80_2x4_S (w0, w1, offset);
|
|
|
|
#else
|
|
|
|
u32 t0[4];
|
|
u32 t1[4];
|
|
|
|
#endif
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
|
|
#elif VECT_SIZE == 4
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
|
|
#elif VECT_SIZE == 8
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
PACKVS24 (t0, t1, w0, w1, 4); append_0x80_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
|
PACKVS24 (t0, t1, w0, w1, 5); append_0x80_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
|
PACKVS24 (t0, t1, w0, w1, 6); append_0x80_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
|
PACKVS24 (t0, t1, w0, w1, 7); append_0x80_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
|
|
|
#elif VECT_SIZE == 16
|
|
|
|
PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0);
|
|
PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1);
|
|
PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2);
|
|
PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3);
|
|
PACKVS24 (t0, t1, w0, w1, 4); append_0x80_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4);
|
|
PACKVS24 (t0, t1, w0, w1, 5); append_0x80_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5);
|
|
PACKVS24 (t0, t1, w0, w1, 6); append_0x80_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6);
|
|
PACKVS24 (t0, t1, w0, w1, 7); append_0x80_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7);
|
|
PACKVS24 (t0, t1, w0, w1, 8); append_0x80_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8);
|
|
PACKVS24 (t0, t1, w0, w1, 9); append_0x80_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9);
|
|
PACKVS24 (t0, t1, w0, w1, a); append_0x80_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a);
|
|
PACKVS24 (t0, t1, w0, w1, b); append_0x80_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b);
|
|
PACKVS24 (t0, t1, w0, w1, c); append_0x80_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c);
|
|
PACKVS24 (t0, t1, w0, w1, d); append_0x80_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d);
|
|
PACKVS24 (t0, t1, w0, w1, e); append_0x80_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e);
|
|
PACKVS24 (t0, t1, w0, w1, f); append_0x80_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f);
|
|
|
|
#endif
|
|
}
|
|
|
|
static void append_0x80_4x4_VV (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32x offset)
|
|
{
|
|
#if VECT_SIZE == 1
|
|
|
|
append_0x80_4x4_S (w0, w1, w2, w3, offset);
|
|
|
|
#else
|
|
|
|
u32 t0[4];
|
|
u32 t1[4];
|
|
u32 t2[4];
|
|
u32 t3[4];
|
|
|
|
#endif
|
|
|
|
#if VECT_SIZE == 2
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
|
|
#elif VECT_SIZE == 4
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
|
|
#elif VECT_SIZE == 8
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x80_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x80_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x80_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x80_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7);
|
|
|
|
#elif VECT_SIZE == 16
|
|
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x80_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x80_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x80_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x80_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); append_0x80_4x4_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); append_0x80_4x4_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); append_0x80_4x4_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); append_0x80_4x4_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); append_0x80_4x4_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); append_0x80_4x4_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); append_0x80_4x4_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e);
|
|
PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); append_0x80_4x4_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f);
|
|
|
|
#endif
|
|
}
|
|
|
|
__kernel void gpu_memset (__global uint4 *buf, const u32 value, const u64 gid_max)
|
|
{
|
|
const u64 gid = get_global_id (0);
|
|
|
|
if (gid >= gid_max) return;
|
|
|
|
buf[gid] = (uint4) (value);
|
|
}
|