1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-23 08:38:09 +00:00
hashcat/OpenCL/amp_a1.cl

11105 lines
357 KiB
Common Lisp

/**
* Author......: See docs/credits.txt
* License.....: MIT
*/
#include "inc_hash_constants.h"
#include "inc_vendor.cl"
#include "inc_types.cl"
inline void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
switch (offset / 4)
{
case 0:
w[63] = amd_bytealign_S (w[63], w[62], offset_minus_4);
w[62] = amd_bytealign_S (w[62], w[61], offset_minus_4);
w[61] = amd_bytealign_S (w[61], w[60], offset_minus_4);
w[60] = amd_bytealign_S (w[60], w[59], offset_minus_4);
w[59] = amd_bytealign_S (w[59], w[58], offset_minus_4);
w[58] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[57] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[56] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[55] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[54] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[53] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[52] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[51] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[50] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[49] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[48] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[47] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[46] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[45] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[44] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[43] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[42] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[41] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[40] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[39] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[38] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[37] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[36] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[35] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[34] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[33] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[32] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[31] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[30] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[29] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[28] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[27] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[26] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[25] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[24] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[23] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[22] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[21] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[20] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[19] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[18] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[17] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[16] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[15] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[14] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[13] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[12] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[11] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[10] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 4] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 3] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 2] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 1] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 0] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
if (offset_mod_4 == 0)
{
w[ 0] = w[ 1];
w[ 1] = w[ 2];
w[ 2] = w[ 3];
w[ 3] = w[ 4];
w[ 4] = w[ 5];
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 1:
w[63] = amd_bytealign_S (w[62], w[61], offset_minus_4);
w[62] = amd_bytealign_S (w[61], w[60], offset_minus_4);
w[61] = amd_bytealign_S (w[60], w[59], offset_minus_4);
w[60] = amd_bytealign_S (w[59], w[58], offset_minus_4);
w[59] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[58] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[57] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[56] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[55] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[54] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[53] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[52] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[51] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[50] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[49] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[48] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[47] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[46] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[45] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[44] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[43] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[42] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[41] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[40] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[39] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[38] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[37] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[36] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[35] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[34] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[33] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[32] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[31] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[30] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[29] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[28] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[27] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[26] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[25] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[24] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[23] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[22] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[21] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[20] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[19] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[18] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[17] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[16] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[15] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[14] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[13] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[12] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[11] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[10] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 4] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 3] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 2] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 1] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 1] = w[ 2];
w[ 2] = w[ 3];
w[ 3] = w[ 4];
w[ 4] = w[ 5];
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 2:
w[63] = amd_bytealign_S (w[61], w[60], offset_minus_4);
w[62] = amd_bytealign_S (w[60], w[59], offset_minus_4);
w[61] = amd_bytealign_S (w[59], w[58], offset_minus_4);
w[60] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[59] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[58] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[57] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[56] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[55] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[54] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[53] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[52] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[51] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[50] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[49] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[48] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[47] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[46] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[45] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[44] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[43] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[42] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[41] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[40] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[39] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[38] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[37] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[36] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[35] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[34] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[33] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[32] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[31] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[30] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[29] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[28] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[27] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[26] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[25] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[24] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[23] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[22] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[21] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[20] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[19] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[18] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[17] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[16] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[15] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[14] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[13] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[12] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[11] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[10] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 4] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 3] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 2] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 2] = w[ 3];
w[ 3] = w[ 4];
w[ 4] = w[ 5];
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 3:
w[63] = amd_bytealign_S (w[60], w[59], offset_minus_4);
w[62] = amd_bytealign_S (w[59], w[58], offset_minus_4);
w[61] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[60] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[59] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[58] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[57] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[56] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[55] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[54] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[53] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[52] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[51] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[50] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[49] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[48] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[47] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[46] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[45] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[44] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[43] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[42] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[41] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[40] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[39] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[38] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[37] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[36] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[35] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[34] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[33] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[32] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[31] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[30] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[29] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[28] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[27] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[26] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[25] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[24] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[23] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[22] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[21] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[20] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[19] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[18] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[17] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[16] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[15] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[14] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[13] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[12] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[11] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[10] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 4] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 3] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 3] = w[ 4];
w[ 4] = w[ 5];
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 4:
w[63] = amd_bytealign_S (w[59], w[58], offset_minus_4);
w[62] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[61] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[60] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[59] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[58] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[57] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[56] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[55] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[54] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[53] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[52] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[51] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[50] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[49] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[48] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[47] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[46] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[45] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[44] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[43] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[42] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[41] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[40] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[39] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[38] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[37] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[36] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[35] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[34] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[33] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[32] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[31] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[30] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[29] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[28] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[27] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[26] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[25] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[24] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[23] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[22] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[21] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[20] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[19] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[18] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[17] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[16] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[15] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[14] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[13] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[12] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[11] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[10] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 4] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 4] = w[ 5];
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 5:
w[63] = amd_bytealign_S (w[58], w[57], offset_minus_4);
w[62] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[61] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[60] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[59] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[58] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[57] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[56] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[55] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[54] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[53] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[52] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[51] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[50] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[49] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[48] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[47] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[46] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[45] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[44] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[43] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[42] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[41] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[40] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[39] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[38] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[37] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[36] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[35] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[34] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[33] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[32] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[31] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[30] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[29] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[28] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[27] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[26] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[25] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[24] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[23] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[22] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[21] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[20] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[19] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[18] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[17] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[16] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[15] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[14] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[13] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[12] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[11] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[10] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 5] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 5] = w[ 6];
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 6:
w[63] = amd_bytealign_S (w[57], w[56], offset_minus_4);
w[62] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[61] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[60] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[59] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[58] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[57] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[56] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[55] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[54] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[53] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[52] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[51] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[50] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[49] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[48] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[47] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[46] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[45] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[44] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[43] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[42] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[41] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[40] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[39] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[38] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[37] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[36] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[35] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[34] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[33] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[32] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[31] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[30] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[29] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[28] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[27] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[26] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[25] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[24] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[23] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[22] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[21] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[20] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[19] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[18] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[17] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[16] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[15] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[14] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[13] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[12] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[11] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[10] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 6] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 6] = w[ 7];
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 7:
w[63] = amd_bytealign_S (w[56], w[55], offset_minus_4);
w[62] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[61] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[60] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[59] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[58] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[57] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[56] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[55] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[54] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[53] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[52] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[51] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[50] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[49] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[48] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[47] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[46] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[45] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[44] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[43] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[42] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[41] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[40] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[39] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[38] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[37] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[36] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[35] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[34] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[33] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[32] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[31] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[30] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[29] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[28] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[27] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[26] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[25] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[24] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[23] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[22] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[21] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[20] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[19] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[18] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[17] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[16] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[15] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[14] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[13] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[12] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[11] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[10] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 7] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 7] = w[ 8];
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 8:
w[63] = amd_bytealign_S (w[55], w[54], offset_minus_4);
w[62] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[61] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[60] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[59] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[58] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[57] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[56] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[55] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[54] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[53] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[52] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[51] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[50] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[49] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[48] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[47] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[46] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[45] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[44] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[43] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[42] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[41] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[40] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[39] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[38] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[37] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[36] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[35] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[34] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[33] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[32] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[31] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[30] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[29] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[28] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[27] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[26] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[25] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[24] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[23] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[22] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[21] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[20] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[19] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[18] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[17] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[16] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[15] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[14] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[13] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[12] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[11] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[10] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 8] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 8] = w[ 9];
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 9:
w[63] = amd_bytealign_S (w[54], w[53], offset_minus_4);
w[62] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[61] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[60] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[59] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[58] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[57] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[56] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[55] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[54] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[53] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[52] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[51] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[50] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[49] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[48] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[47] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[46] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[45] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[44] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[43] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[42] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[41] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[40] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[39] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[38] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[37] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[36] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[35] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[34] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[33] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[32] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[31] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[30] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[29] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[28] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[27] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[26] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[25] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[24] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[23] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[22] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[21] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[20] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[19] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[18] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[17] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[16] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[15] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[14] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[13] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[12] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[11] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[10] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[ 9] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[ 9] = w[10];
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 10:
w[63] = amd_bytealign_S (w[53], w[52], offset_minus_4);
w[62] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[61] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[60] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[59] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[58] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[57] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[56] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[55] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[54] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[53] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[52] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[51] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[50] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[49] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[48] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[47] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[46] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[45] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[44] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[43] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[42] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[41] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[40] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[39] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[38] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[37] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[36] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[35] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[34] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[33] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[32] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[31] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[30] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[29] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[28] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[27] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[26] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[25] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[24] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[23] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[22] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[21] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[20] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[19] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[18] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[17] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[16] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[15] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[14] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[13] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[12] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[11] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[10] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[10] = w[11];
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 11:
w[63] = amd_bytealign_S (w[52], w[51], offset_minus_4);
w[62] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[61] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[60] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[59] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[58] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[57] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[56] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[55] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[54] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[53] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[52] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[51] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[50] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[49] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[48] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[47] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[46] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[45] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[44] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[43] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[42] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[41] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[40] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[39] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[38] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[37] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[36] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[35] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[34] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[33] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[32] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[31] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[30] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[29] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[28] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[27] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[26] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[25] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[24] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[23] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[22] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[21] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[20] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[19] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[18] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[17] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[16] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[15] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[14] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[13] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[12] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[11] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[11] = w[12];
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 12:
w[63] = amd_bytealign_S (w[51], w[50], offset_minus_4);
w[62] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[61] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[60] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[59] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[58] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[57] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[56] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[55] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[54] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[53] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[52] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[51] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[50] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[49] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[48] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[47] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[46] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[45] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[44] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[43] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[42] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[41] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[40] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[39] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[38] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[37] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[36] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[35] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[34] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[33] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[32] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[31] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[30] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[29] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[28] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[27] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[26] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[25] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[24] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[23] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[22] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[21] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[20] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[19] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[18] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[17] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[16] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[15] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[14] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[13] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[12] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[12] = w[13];
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 13:
w[63] = amd_bytealign_S (w[50], w[49], offset_minus_4);
w[62] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[61] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[60] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[59] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[58] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[57] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[56] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[55] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[54] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[53] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[52] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[51] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[50] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[49] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[48] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[47] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[46] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[45] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[44] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[43] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[42] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[41] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[40] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[39] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[38] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[37] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[36] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[35] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[34] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[33] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[32] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[31] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[30] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[29] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[28] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[27] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[26] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[25] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[24] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[23] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[22] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[21] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[20] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[19] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[18] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[17] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[16] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[15] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[14] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[13] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[13] = w[14];
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 14:
w[63] = amd_bytealign_S (w[49], w[48], offset_minus_4);
w[62] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[61] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[60] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[59] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[58] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[57] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[56] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[55] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[54] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[53] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[52] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[51] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[50] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[49] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[48] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[47] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[46] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[45] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[44] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[43] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[42] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[41] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[40] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[39] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[38] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[37] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[36] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[35] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[34] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[33] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[32] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[31] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[30] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[29] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[28] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[27] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[26] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[25] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[24] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[23] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[22] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[21] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[20] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[19] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[18] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[17] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[16] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[15] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[14] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[14] = w[15];
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 15:
w[63] = amd_bytealign_S (w[48], w[47], offset_minus_4);
w[62] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[61] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[60] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[59] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[58] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[57] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[56] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[55] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[54] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[53] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[52] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[51] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[50] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[49] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[48] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[47] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[46] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[45] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[44] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[43] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[42] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[41] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[40] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[39] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[38] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[37] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[36] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[35] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[34] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[33] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[32] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[31] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[30] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[29] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[28] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[27] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[26] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[25] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[24] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[23] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[22] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[21] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[20] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[19] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[18] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[17] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[16] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[15] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[15] = w[16];
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 16:
w[63] = amd_bytealign_S (w[47], w[46], offset_minus_4);
w[62] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[61] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[60] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[59] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[58] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[57] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[56] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[55] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[54] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[53] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[52] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[51] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[50] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[49] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[48] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[47] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[46] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[45] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[44] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[43] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[42] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[41] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[40] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[39] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[38] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[37] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[36] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[35] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[34] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[33] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[32] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[31] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[30] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[29] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[28] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[27] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[26] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[25] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[24] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[23] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[22] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[21] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[20] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[19] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[18] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[17] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[16] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[16] = w[17];
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 17:
w[63] = amd_bytealign_S (w[46], w[45], offset_minus_4);
w[62] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[61] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[60] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[59] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[58] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[57] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[56] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[55] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[54] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[53] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[52] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[51] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[50] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[49] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[48] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[47] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[46] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[45] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[44] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[43] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[42] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[41] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[40] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[39] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[38] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[37] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[36] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[35] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[34] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[33] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[32] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[31] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[30] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[29] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[28] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[27] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[26] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[25] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[24] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[23] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[22] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[21] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[20] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[19] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[18] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[17] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[17] = w[18];
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 18:
w[63] = amd_bytealign_S (w[45], w[44], offset_minus_4);
w[62] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[61] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[60] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[59] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[58] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[57] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[56] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[55] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[54] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[53] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[52] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[51] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[50] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[49] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[48] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[47] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[46] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[45] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[44] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[43] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[42] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[41] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[40] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[39] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[38] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[37] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[36] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[35] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[34] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[33] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[32] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[31] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[30] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[29] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[28] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[27] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[26] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[25] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[24] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[23] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[22] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[21] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[20] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[19] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[18] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[18] = w[19];
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 19:
w[63] = amd_bytealign_S (w[44], w[43], offset_minus_4);
w[62] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[61] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[60] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[59] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[58] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[57] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[56] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[55] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[54] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[53] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[52] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[51] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[50] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[49] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[48] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[47] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[46] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[45] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[44] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[43] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[42] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[41] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[40] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[39] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[38] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[37] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[36] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[35] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[34] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[33] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[32] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[31] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[30] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[29] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[28] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[27] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[26] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[25] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[24] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[23] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[22] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[21] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[20] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[19] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[19] = w[20];
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 20:
w[63] = amd_bytealign_S (w[43], w[42], offset_minus_4);
w[62] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[61] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[60] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[59] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[58] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[57] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[56] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[55] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[54] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[53] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[52] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[51] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[50] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[49] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[48] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[47] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[46] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[45] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[44] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[43] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[42] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[41] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[40] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[39] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[38] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[37] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[36] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[35] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[34] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[33] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[32] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[31] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[30] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[29] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[28] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[27] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[26] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[25] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[24] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[23] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[22] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[21] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[20] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[20] = w[21];
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 21:
w[63] = amd_bytealign_S (w[42], w[41], offset_minus_4);
w[62] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[61] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[60] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[59] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[58] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[57] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[56] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[55] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[54] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[53] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[52] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[51] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[50] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[49] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[48] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[47] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[46] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[45] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[44] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[43] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[42] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[41] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[40] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[39] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[38] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[37] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[36] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[35] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[34] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[33] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[32] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[31] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[30] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[29] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[28] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[27] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[26] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[25] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[24] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[23] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[22] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[21] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[21] = w[22];
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 22:
w[63] = amd_bytealign_S (w[41], w[40], offset_minus_4);
w[62] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[61] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[60] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[59] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[58] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[57] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[56] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[55] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[54] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[53] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[52] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[51] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[50] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[49] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[48] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[47] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[46] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[45] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[44] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[43] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[42] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[41] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[40] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[39] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[38] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[37] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[36] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[35] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[34] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[33] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[32] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[31] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[30] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[29] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[28] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[27] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[26] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[25] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[24] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[23] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[22] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[22] = w[23];
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 23:
w[63] = amd_bytealign_S (w[40], w[39], offset_minus_4);
w[62] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[61] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[60] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[59] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[58] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[57] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[56] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[55] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[54] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[53] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[52] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[51] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[50] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[49] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[48] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[47] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[46] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[45] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[44] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[43] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[42] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[41] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[40] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[39] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[38] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[37] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[36] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[35] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[34] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[33] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[32] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[31] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[30] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[29] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[28] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[27] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[26] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[25] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[24] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[23] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[23] = w[24];
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 24:
w[63] = amd_bytealign_S (w[39], w[38], offset_minus_4);
w[62] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[61] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[60] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[59] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[58] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[57] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[56] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[55] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[54] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[53] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[52] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[51] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[50] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[49] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[48] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[47] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[46] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[45] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[44] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[43] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[42] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[41] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[40] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[39] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[38] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[37] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[36] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[35] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[34] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[33] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[32] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[31] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[30] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[29] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[28] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[27] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[26] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[25] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[24] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[24] = w[25];
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 25:
w[63] = amd_bytealign_S (w[38], w[37], offset_minus_4);
w[62] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[61] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[60] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[59] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[58] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[57] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[56] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[55] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[54] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[53] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[52] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[51] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[50] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[49] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[48] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[47] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[46] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[45] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[44] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[43] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[42] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[41] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[40] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[39] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[38] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[37] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[36] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[35] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[34] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[33] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[32] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[31] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[30] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[29] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[28] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[27] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[26] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[25] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[25] = w[26];
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 26:
w[63] = amd_bytealign_S (w[37], w[36], offset_minus_4);
w[62] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[61] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[60] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[59] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[58] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[57] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[56] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[55] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[54] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[53] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[52] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[51] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[50] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[49] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[48] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[47] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[46] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[45] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[44] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[43] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[42] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[41] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[40] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[39] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[38] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[37] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[36] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[35] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[34] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[33] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[32] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[31] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[30] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[29] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[28] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[27] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[26] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[26] = w[27];
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 27:
w[63] = amd_bytealign_S (w[36], w[35], offset_minus_4);
w[62] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[61] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[60] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[59] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[58] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[57] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[56] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[55] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[54] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[53] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[52] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[51] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[50] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[49] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[48] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[47] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[46] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[45] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[44] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[43] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[42] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[41] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[40] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[39] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[38] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[37] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[36] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[35] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[34] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[33] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[32] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[31] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[30] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[29] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[28] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[27] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[27] = w[28];
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 28:
w[63] = amd_bytealign_S (w[35], w[34], offset_minus_4);
w[62] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[61] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[60] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[59] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[58] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[57] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[56] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[55] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[54] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[53] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[52] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[51] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[50] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[49] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[48] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[47] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[46] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[45] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[44] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[43] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[42] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[41] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[40] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[39] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[38] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[37] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[36] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[35] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[34] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[33] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[32] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[31] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[30] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[29] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[28] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[28] = w[29];
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 29:
w[63] = amd_bytealign_S (w[34], w[33], offset_minus_4);
w[62] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[61] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[60] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[59] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[58] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[57] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[56] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[55] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[54] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[53] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[52] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[51] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[50] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[49] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[48] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[47] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[46] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[45] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[44] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[43] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[42] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[41] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[40] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[39] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[38] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[37] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[36] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[35] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[34] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[33] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[32] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[31] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[30] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[29] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[29] = w[30];
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 30:
w[63] = amd_bytealign_S (w[33], w[32], offset_minus_4);
w[62] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[61] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[60] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[59] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[58] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[57] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[56] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[55] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[54] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[53] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[52] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[51] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[50] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[49] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[48] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[47] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[46] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[45] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[44] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[43] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[42] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[41] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[40] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[39] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[38] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[37] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[36] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[35] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[34] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[33] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[32] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[31] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[30] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[30] = w[31];
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 31:
w[63] = amd_bytealign_S (w[32], w[31], offset_minus_4);
w[62] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[61] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[60] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[59] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[58] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[57] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[56] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[55] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[54] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[53] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[52] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[51] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[50] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[49] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[48] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[47] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[46] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[45] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[44] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[43] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[42] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[41] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[40] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[39] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[38] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[37] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[36] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[35] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[34] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[33] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[32] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[31] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[31] = w[32];
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 32:
w[63] = amd_bytealign_S (w[31], w[30], offset_minus_4);
w[62] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[61] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[60] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[59] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[58] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[57] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[56] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[55] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[54] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[53] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[52] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[51] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[50] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[49] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[48] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[47] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[46] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[45] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[44] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[43] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[42] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[41] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[40] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[39] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[38] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[37] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[36] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[35] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[34] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[33] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[32] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[32] = w[33];
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 33:
w[63] = amd_bytealign_S (w[30], w[29], offset_minus_4);
w[62] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[61] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[60] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[59] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[58] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[57] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[56] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[55] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[54] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[53] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[52] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[51] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[50] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[49] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[48] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[47] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[46] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[45] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[44] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[43] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[42] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[41] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[40] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[39] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[38] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[37] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[36] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[35] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[34] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[33] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[33] = w[34];
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 34:
w[63] = amd_bytealign_S (w[29], w[28], offset_minus_4);
w[62] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[61] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[60] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[59] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[58] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[57] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[56] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[55] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[54] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[53] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[52] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[51] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[50] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[49] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[48] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[47] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[46] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[45] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[44] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[43] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[42] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[41] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[40] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[39] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[38] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[37] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[36] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[35] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[34] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[34] = w[35];
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 35:
w[63] = amd_bytealign_S (w[28], w[27], offset_minus_4);
w[62] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[61] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[60] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[59] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[58] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[57] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[56] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[55] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[54] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[53] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[52] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[51] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[50] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[49] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[48] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[47] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[46] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[45] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[44] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[43] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[42] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[41] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[40] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[39] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[38] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[37] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[36] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[35] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[35] = w[36];
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 36:
w[63] = amd_bytealign_S (w[27], w[26], offset_minus_4);
w[62] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[61] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[60] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[59] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[58] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[57] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[56] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[55] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[54] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[53] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[52] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[51] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[50] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[49] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[48] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[47] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[46] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[45] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[44] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[43] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[42] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[41] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[40] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[39] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[38] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[37] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[36] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[36] = w[37];
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 37:
w[63] = amd_bytealign_S (w[26], w[25], offset_minus_4);
w[62] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[61] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[60] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[59] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[58] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[57] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[56] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[55] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[54] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[53] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[52] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[51] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[50] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[49] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[48] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[47] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[46] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[45] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[44] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[43] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[42] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[41] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[40] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[39] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[38] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[37] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[37] = w[38];
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 38:
w[63] = amd_bytealign_S (w[25], w[24], offset_minus_4);
w[62] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[61] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[60] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[59] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[58] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[57] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[56] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[55] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[54] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[53] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[52] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[51] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[50] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[49] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[48] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[47] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[46] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[45] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[44] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[43] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[42] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[41] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[40] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[39] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[38] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[38] = w[39];
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 39:
w[63] = amd_bytealign_S (w[24], w[23], offset_minus_4);
w[62] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[61] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[60] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[59] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[58] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[57] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[56] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[55] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[54] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[53] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[52] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[51] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[50] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[49] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[48] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[47] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[46] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[45] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[44] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[43] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[42] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[41] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[40] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[39] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[39] = w[40];
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 40:
w[63] = amd_bytealign_S (w[23], w[22], offset_minus_4);
w[62] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[61] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[60] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[59] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[58] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[57] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[56] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[55] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[54] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[53] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[52] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[51] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[50] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[49] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[48] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[47] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[46] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[45] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[44] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[43] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[42] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[41] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[40] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[40] = w[41];
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 41:
w[63] = amd_bytealign_S (w[22], w[21], offset_minus_4);
w[62] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[61] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[60] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[59] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[58] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[57] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[56] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[55] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[54] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[53] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[52] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[51] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[50] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[49] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[48] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[47] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[46] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[45] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[44] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[43] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[42] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[41] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[41] = w[42];
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 42:
w[63] = amd_bytealign_S (w[21], w[20], offset_minus_4);
w[62] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[61] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[60] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[59] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[58] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[57] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[56] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[55] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[54] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[53] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[52] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[51] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[50] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[49] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[48] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[47] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[46] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[45] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[44] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[43] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[42] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[42] = w[43];
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 43:
w[63] = amd_bytealign_S (w[20], w[19], offset_minus_4);
w[62] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[61] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[60] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[59] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[58] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[57] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[56] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[55] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[54] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[53] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[52] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[51] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[50] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[49] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[48] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[47] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[46] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[45] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[44] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[43] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[43] = w[44];
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 44:
w[63] = amd_bytealign_S (w[19], w[18], offset_minus_4);
w[62] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[61] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[60] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[59] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[58] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[57] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[56] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[55] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[54] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[53] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[52] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[51] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[50] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[49] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[48] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[47] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[46] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[45] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[44] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[44] = w[45];
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 45:
w[63] = amd_bytealign_S (w[18], w[17], offset_minus_4);
w[62] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[61] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[60] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[59] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[58] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[57] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[56] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[55] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[54] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[53] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[52] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[51] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[50] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[49] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[48] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[47] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[46] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[45] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[45] = w[46];
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 46:
w[63] = amd_bytealign_S (w[17], w[16], offset_minus_4);
w[62] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[61] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[60] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[59] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[58] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[57] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[56] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[55] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[54] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[53] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[52] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[51] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[50] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[49] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[48] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[47] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[46] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[46] = w[47];
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 47:
w[63] = amd_bytealign_S (w[16], w[15], offset_minus_4);
w[62] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[61] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[60] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[59] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[58] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[57] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[56] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[55] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[54] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[53] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[52] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[51] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[50] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[49] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[48] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[47] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[47] = w[48];
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 48:
w[63] = amd_bytealign_S (w[15], w[14], offset_minus_4);
w[62] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[61] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[60] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[59] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[58] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[57] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[56] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[55] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[54] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[53] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[52] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[51] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[50] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[49] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[48] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[48] = w[49];
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 49:
w[63] = amd_bytealign_S (w[14], w[13], offset_minus_4);
w[62] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[61] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[60] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[59] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[58] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[57] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[56] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[55] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[54] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[53] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[52] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[51] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[50] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[49] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[49] = w[50];
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 50:
w[63] = amd_bytealign_S (w[13], w[12], offset_minus_4);
w[62] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[61] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[60] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[59] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[58] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[57] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[56] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[55] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[54] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[53] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[52] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[51] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[50] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[50] = w[51];
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 51:
w[63] = amd_bytealign_S (w[12], w[11], offset_minus_4);
w[62] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[61] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[60] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[59] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[58] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[57] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[56] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[55] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[54] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[53] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[52] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[51] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[51] = w[52];
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 52:
w[63] = amd_bytealign_S (w[11], w[10], offset_minus_4);
w[62] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[61] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[60] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[59] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[58] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[57] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[56] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[55] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[54] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[53] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[52] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[52] = w[53];
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 53:
w[63] = amd_bytealign_S (w[10], w[ 9], offset_minus_4);
w[62] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[61] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[60] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[59] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[58] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[57] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[56] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[55] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[54] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[53] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[53] = w[54];
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 54:
w[63] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4);
w[62] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[61] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[60] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[59] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[58] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[57] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[56] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[55] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[54] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[54] = w[55];
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 55:
w[63] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4);
w[62] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[61] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[60] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[59] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[58] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[57] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[56] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[55] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[55] = w[56];
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 56:
w[63] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4);
w[62] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[61] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[60] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[59] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[58] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[57] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[56] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[56] = w[57];
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 57:
w[63] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4);
w[62] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[61] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[60] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[59] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[58] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[57] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[57] = w[58];
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 58:
w[63] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4);
w[62] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[61] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[60] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[59] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[58] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[58] = w[59];
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 59:
w[63] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4);
w[62] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[61] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[60] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[59] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[59] = w[60];
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 60:
w[63] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4);
w[62] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[61] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[60] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[60] = w[61];
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 61:
w[63] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4);
w[62] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[61] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[61] = w[62];
w[62] = w[63];
w[63] = 0;
}
break;
case 62:
w[63] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4);
w[62] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[61] = 0;
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[62] = w[63];
w[63] = 0;
}
break;
case 63:
w[63] = amd_bytealign_S (w[ 0], 0, offset_minus_4);
w[62] = 0;
w[61] = 0;
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
if (offset_mod_4 == 0)
{
w[63] = 0;
}
break;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)
{
case 0:
w[63] = __byte_perm_S (w[62], w[63], selector);
w[62] = __byte_perm_S (w[61], w[62], selector);
w[61] = __byte_perm_S (w[60], w[61], selector);
w[60] = __byte_perm_S (w[59], w[60], selector);
w[59] = __byte_perm_S (w[58], w[59], selector);
w[58] = __byte_perm_S (w[57], w[58], selector);
w[57] = __byte_perm_S (w[56], w[57], selector);
w[56] = __byte_perm_S (w[55], w[56], selector);
w[55] = __byte_perm_S (w[54], w[55], selector);
w[54] = __byte_perm_S (w[53], w[54], selector);
w[53] = __byte_perm_S (w[52], w[53], selector);
w[52] = __byte_perm_S (w[51], w[52], selector);
w[51] = __byte_perm_S (w[50], w[51], selector);
w[50] = __byte_perm_S (w[49], w[50], selector);
w[49] = __byte_perm_S (w[48], w[49], selector);
w[48] = __byte_perm_S (w[47], w[48], selector);
w[47] = __byte_perm_S (w[46], w[47], selector);
w[46] = __byte_perm_S (w[45], w[46], selector);
w[45] = __byte_perm_S (w[44], w[45], selector);
w[44] = __byte_perm_S (w[43], w[44], selector);
w[43] = __byte_perm_S (w[42], w[43], selector);
w[42] = __byte_perm_S (w[41], w[42], selector);
w[41] = __byte_perm_S (w[40], w[41], selector);
w[40] = __byte_perm_S (w[39], w[40], selector);
w[39] = __byte_perm_S (w[38], w[39], selector);
w[38] = __byte_perm_S (w[37], w[38], selector);
w[37] = __byte_perm_S (w[36], w[37], selector);
w[36] = __byte_perm_S (w[35], w[36], selector);
w[35] = __byte_perm_S (w[34], w[35], selector);
w[34] = __byte_perm_S (w[33], w[34], selector);
w[33] = __byte_perm_S (w[32], w[33], selector);
w[32] = __byte_perm_S (w[31], w[32], selector);
w[31] = __byte_perm_S (w[30], w[31], selector);
w[30] = __byte_perm_S (w[29], w[30], selector);
w[29] = __byte_perm_S (w[28], w[29], selector);
w[28] = __byte_perm_S (w[27], w[28], selector);
w[27] = __byte_perm_S (w[26], w[27], selector);
w[26] = __byte_perm_S (w[25], w[26], selector);
w[25] = __byte_perm_S (w[24], w[25], selector);
w[24] = __byte_perm_S (w[23], w[24], selector);
w[23] = __byte_perm_S (w[22], w[23], selector);
w[22] = __byte_perm_S (w[21], w[22], selector);
w[21] = __byte_perm_S (w[20], w[21], selector);
w[20] = __byte_perm_S (w[19], w[20], selector);
w[19] = __byte_perm_S (w[18], w[19], selector);
w[18] = __byte_perm_S (w[17], w[18], selector);
w[17] = __byte_perm_S (w[16], w[17], selector);
w[16] = __byte_perm_S (w[15], w[16], selector);
w[15] = __byte_perm_S (w[14], w[15], selector);
w[14] = __byte_perm_S (w[13], w[14], selector);
w[13] = __byte_perm_S (w[12], w[13], selector);
w[12] = __byte_perm_S (w[11], w[12], selector);
w[11] = __byte_perm_S (w[10], w[11], selector);
w[10] = __byte_perm_S (w[ 9], w[10], selector);
w[ 9] = __byte_perm_S (w[ 8], w[ 9], selector);
w[ 8] = __byte_perm_S (w[ 7], w[ 8], selector);
w[ 7] = __byte_perm_S (w[ 6], w[ 7], selector);
w[ 6] = __byte_perm_S (w[ 5], w[ 6], selector);
w[ 5] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 4] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 3] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 2] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 1] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 0] = __byte_perm_S ( 0, w[ 0], selector);
break;
case 1:
w[63] = __byte_perm_S (w[61], w[62], selector);
w[62] = __byte_perm_S (w[60], w[61], selector);
w[61] = __byte_perm_S (w[59], w[60], selector);
w[60] = __byte_perm_S (w[58], w[59], selector);
w[59] = __byte_perm_S (w[57], w[58], selector);
w[58] = __byte_perm_S (w[56], w[57], selector);
w[57] = __byte_perm_S (w[55], w[56], selector);
w[56] = __byte_perm_S (w[54], w[55], selector);
w[55] = __byte_perm_S (w[53], w[54], selector);
w[54] = __byte_perm_S (w[52], w[53], selector);
w[53] = __byte_perm_S (w[51], w[52], selector);
w[52] = __byte_perm_S (w[50], w[51], selector);
w[51] = __byte_perm_S (w[49], w[50], selector);
w[50] = __byte_perm_S (w[48], w[49], selector);
w[49] = __byte_perm_S (w[47], w[48], selector);
w[48] = __byte_perm_S (w[46], w[47], selector);
w[47] = __byte_perm_S (w[45], w[46], selector);
w[46] = __byte_perm_S (w[44], w[45], selector);
w[45] = __byte_perm_S (w[43], w[44], selector);
w[44] = __byte_perm_S (w[42], w[43], selector);
w[43] = __byte_perm_S (w[41], w[42], selector);
w[42] = __byte_perm_S (w[40], w[41], selector);
w[41] = __byte_perm_S (w[39], w[40], selector);
w[40] = __byte_perm_S (w[38], w[39], selector);
w[39] = __byte_perm_S (w[37], w[38], selector);
w[38] = __byte_perm_S (w[36], w[37], selector);
w[37] = __byte_perm_S (w[35], w[36], selector);
w[36] = __byte_perm_S (w[34], w[35], selector);
w[35] = __byte_perm_S (w[33], w[34], selector);
w[34] = __byte_perm_S (w[32], w[33], selector);
w[33] = __byte_perm_S (w[31], w[32], selector);
w[32] = __byte_perm_S (w[30], w[31], selector);
w[31] = __byte_perm_S (w[29], w[30], selector);
w[30] = __byte_perm_S (w[28], w[29], selector);
w[29] = __byte_perm_S (w[27], w[28], selector);
w[28] = __byte_perm_S (w[26], w[27], selector);
w[27] = __byte_perm_S (w[25], w[26], selector);
w[26] = __byte_perm_S (w[24], w[25], selector);
w[25] = __byte_perm_S (w[23], w[24], selector);
w[24] = __byte_perm_S (w[22], w[23], selector);
w[23] = __byte_perm_S (w[21], w[22], selector);
w[22] = __byte_perm_S (w[20], w[21], selector);
w[21] = __byte_perm_S (w[19], w[20], selector);
w[20] = __byte_perm_S (w[18], w[19], selector);
w[19] = __byte_perm_S (w[17], w[18], selector);
w[18] = __byte_perm_S (w[16], w[17], selector);
w[17] = __byte_perm_S (w[15], w[16], selector);
w[16] = __byte_perm_S (w[14], w[15], selector);
w[15] = __byte_perm_S (w[13], w[14], selector);
w[14] = __byte_perm_S (w[12], w[13], selector);
w[13] = __byte_perm_S (w[11], w[12], selector);
w[12] = __byte_perm_S (w[10], w[11], selector);
w[11] = __byte_perm_S (w[ 9], w[10], selector);
w[10] = __byte_perm_S (w[ 8], w[ 9], selector);
w[ 9] = __byte_perm_S (w[ 7], w[ 8], selector);
w[ 8] = __byte_perm_S (w[ 6], w[ 7], selector);
w[ 7] = __byte_perm_S (w[ 5], w[ 6], selector);
w[ 6] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 5] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 4] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 3] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 2] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 1] = __byte_perm_S ( 0, w[ 0], selector);
w[ 0] = 0;
break;
case 2:
w[63] = __byte_perm_S (w[60], w[61], selector);
w[62] = __byte_perm_S (w[59], w[60], selector);
w[61] = __byte_perm_S (w[58], w[59], selector);
w[60] = __byte_perm_S (w[57], w[58], selector);
w[59] = __byte_perm_S (w[56], w[57], selector);
w[58] = __byte_perm_S (w[55], w[56], selector);
w[57] = __byte_perm_S (w[54], w[55], selector);
w[56] = __byte_perm_S (w[53], w[54], selector);
w[55] = __byte_perm_S (w[52], w[53], selector);
w[54] = __byte_perm_S (w[51], w[52], selector);
w[53] = __byte_perm_S (w[50], w[51], selector);
w[52] = __byte_perm_S (w[49], w[50], selector);
w[51] = __byte_perm_S (w[48], w[49], selector);
w[50] = __byte_perm_S (w[47], w[48], selector);
w[49] = __byte_perm_S (w[46], w[47], selector);
w[48] = __byte_perm_S (w[45], w[46], selector);
w[47] = __byte_perm_S (w[44], w[45], selector);
w[46] = __byte_perm_S (w[43], w[44], selector);
w[45] = __byte_perm_S (w[42], w[43], selector);
w[44] = __byte_perm_S (w[41], w[42], selector);
w[43] = __byte_perm_S (w[40], w[41], selector);
w[42] = __byte_perm_S (w[39], w[40], selector);
w[41] = __byte_perm_S (w[38], w[39], selector);
w[40] = __byte_perm_S (w[37], w[38], selector);
w[39] = __byte_perm_S (w[36], w[37], selector);
w[38] = __byte_perm_S (w[35], w[36], selector);
w[37] = __byte_perm_S (w[34], w[35], selector);
w[36] = __byte_perm_S (w[33], w[34], selector);
w[35] = __byte_perm_S (w[32], w[33], selector);
w[34] = __byte_perm_S (w[31], w[32], selector);
w[33] = __byte_perm_S (w[30], w[31], selector);
w[32] = __byte_perm_S (w[29], w[30], selector);
w[31] = __byte_perm_S (w[28], w[29], selector);
w[30] = __byte_perm_S (w[27], w[28], selector);
w[29] = __byte_perm_S (w[26], w[27], selector);
w[28] = __byte_perm_S (w[25], w[26], selector);
w[27] = __byte_perm_S (w[24], w[25], selector);
w[26] = __byte_perm_S (w[23], w[24], selector);
w[25] = __byte_perm_S (w[22], w[23], selector);
w[24] = __byte_perm_S (w[21], w[22], selector);
w[23] = __byte_perm_S (w[20], w[21], selector);
w[22] = __byte_perm_S (w[19], w[20], selector);
w[21] = __byte_perm_S (w[18], w[19], selector);
w[20] = __byte_perm_S (w[17], w[18], selector);
w[19] = __byte_perm_S (w[16], w[17], selector);
w[18] = __byte_perm_S (w[15], w[16], selector);
w[17] = __byte_perm_S (w[14], w[15], selector);
w[16] = __byte_perm_S (w[13], w[14], selector);
w[15] = __byte_perm_S (w[12], w[13], selector);
w[14] = __byte_perm_S (w[11], w[12], selector);
w[13] = __byte_perm_S (w[10], w[11], selector);
w[12] = __byte_perm_S (w[ 9], w[10], selector);
w[11] = __byte_perm_S (w[ 8], w[ 9], selector);
w[10] = __byte_perm_S (w[ 7], w[ 8], selector);
w[ 9] = __byte_perm_S (w[ 6], w[ 7], selector);
w[ 8] = __byte_perm_S (w[ 5], w[ 6], selector);
w[ 7] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 6] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 5] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 4] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 3] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 2] = __byte_perm_S ( 0, w[ 0], selector);
w[ 1] = 0;
w[ 0] = 0;
break;
case 3:
w[63] = __byte_perm_S (w[59], w[60], selector);
w[62] = __byte_perm_S (w[58], w[59], selector);
w[61] = __byte_perm_S (w[57], w[58], selector);
w[60] = __byte_perm_S (w[56], w[57], selector);
w[59] = __byte_perm_S (w[55], w[56], selector);
w[58] = __byte_perm_S (w[54], w[55], selector);
w[57] = __byte_perm_S (w[53], w[54], selector);
w[56] = __byte_perm_S (w[52], w[53], selector);
w[55] = __byte_perm_S (w[51], w[52], selector);
w[54] = __byte_perm_S (w[50], w[51], selector);
w[53] = __byte_perm_S (w[49], w[50], selector);
w[52] = __byte_perm_S (w[48], w[49], selector);
w[51] = __byte_perm_S (w[47], w[48], selector);
w[50] = __byte_perm_S (w[46], w[47], selector);
w[49] = __byte_perm_S (w[45], w[46], selector);
w[48] = __byte_perm_S (w[44], w[45], selector);
w[47] = __byte_perm_S (w[43], w[44], selector);
w[46] = __byte_perm_S (w[42], w[43], selector);
w[45] = __byte_perm_S (w[41], w[42], selector);
w[44] = __byte_perm_S (w[40], w[41], selector);
w[43] = __byte_perm_S (w[39], w[40], selector);
w[42] = __byte_perm_S (w[38], w[39], selector);
w[41] = __byte_perm_S (w[37], w[38], selector);
w[40] = __byte_perm_S (w[36], w[37], selector);
w[39] = __byte_perm_S (w[35], w[36], selector);
w[38] = __byte_perm_S (w[34], w[35], selector);
w[37] = __byte_perm_S (w[33], w[34], selector);
w[36] = __byte_perm_S (w[32], w[33], selector);
w[35] = __byte_perm_S (w[31], w[32], selector);
w[34] = __byte_perm_S (w[30], w[31], selector);
w[33] = __byte_perm_S (w[29], w[30], selector);
w[32] = __byte_perm_S (w[28], w[29], selector);
w[31] = __byte_perm_S (w[27], w[28], selector);
w[30] = __byte_perm_S (w[26], w[27], selector);
w[29] = __byte_perm_S (w[25], w[26], selector);
w[28] = __byte_perm_S (w[24], w[25], selector);
w[27] = __byte_perm_S (w[23], w[24], selector);
w[26] = __byte_perm_S (w[22], w[23], selector);
w[25] = __byte_perm_S (w[21], w[22], selector);
w[24] = __byte_perm_S (w[20], w[21], selector);
w[23] = __byte_perm_S (w[19], w[20], selector);
w[22] = __byte_perm_S (w[18], w[19], selector);
w[21] = __byte_perm_S (w[17], w[18], selector);
w[20] = __byte_perm_S (w[16], w[17], selector);
w[19] = __byte_perm_S (w[15], w[16], selector);
w[18] = __byte_perm_S (w[14], w[15], selector);
w[17] = __byte_perm_S (w[13], w[14], selector);
w[16] = __byte_perm_S (w[12], w[13], selector);
w[15] = __byte_perm_S (w[11], w[12], selector);
w[14] = __byte_perm_S (w[10], w[11], selector);
w[13] = __byte_perm_S (w[ 9], w[10], selector);
w[12] = __byte_perm_S (w[ 8], w[ 9], selector);
w[11] = __byte_perm_S (w[ 7], w[ 8], selector);
w[10] = __byte_perm_S (w[ 6], w[ 7], selector);
w[ 9] = __byte_perm_S (w[ 5], w[ 6], selector);
w[ 8] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 7] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 6] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 5] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 4] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 3] = __byte_perm_S ( 0, w[ 0], selector);
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 4:
w[63] = __byte_perm_S (w[58], w[59], selector);
w[62] = __byte_perm_S (w[57], w[58], selector);
w[61] = __byte_perm_S (w[56], w[57], selector);
w[60] = __byte_perm_S (w[55], w[56], selector);
w[59] = __byte_perm_S (w[54], w[55], selector);
w[58] = __byte_perm_S (w[53], w[54], selector);
w[57] = __byte_perm_S (w[52], w[53], selector);
w[56] = __byte_perm_S (w[51], w[52], selector);
w[55] = __byte_perm_S (w[50], w[51], selector);
w[54] = __byte_perm_S (w[49], w[50], selector);
w[53] = __byte_perm_S (w[48], w[49], selector);
w[52] = __byte_perm_S (w[47], w[48], selector);
w[51] = __byte_perm_S (w[46], w[47], selector);
w[50] = __byte_perm_S (w[45], w[46], selector);
w[49] = __byte_perm_S (w[44], w[45], selector);
w[48] = __byte_perm_S (w[43], w[44], selector);
w[47] = __byte_perm_S (w[42], w[43], selector);
w[46] = __byte_perm_S (w[41], w[42], selector);
w[45] = __byte_perm_S (w[40], w[41], selector);
w[44] = __byte_perm_S (w[39], w[40], selector);
w[43] = __byte_perm_S (w[38], w[39], selector);
w[42] = __byte_perm_S (w[37], w[38], selector);
w[41] = __byte_perm_S (w[36], w[37], selector);
w[40] = __byte_perm_S (w[35], w[36], selector);
w[39] = __byte_perm_S (w[34], w[35], selector);
w[38] = __byte_perm_S (w[33], w[34], selector);
w[37] = __byte_perm_S (w[32], w[33], selector);
w[36] = __byte_perm_S (w[31], w[32], selector);
w[35] = __byte_perm_S (w[30], w[31], selector);
w[34] = __byte_perm_S (w[29], w[30], selector);
w[33] = __byte_perm_S (w[28], w[29], selector);
w[32] = __byte_perm_S (w[27], w[28], selector);
w[31] = __byte_perm_S (w[26], w[27], selector);
w[30] = __byte_perm_S (w[25], w[26], selector);
w[29] = __byte_perm_S (w[24], w[25], selector);
w[28] = __byte_perm_S (w[23], w[24], selector);
w[27] = __byte_perm_S (w[22], w[23], selector);
w[26] = __byte_perm_S (w[21], w[22], selector);
w[25] = __byte_perm_S (w[20], w[21], selector);
w[24] = __byte_perm_S (w[19], w[20], selector);
w[23] = __byte_perm_S (w[18], w[19], selector);
w[22] = __byte_perm_S (w[17], w[18], selector);
w[21] = __byte_perm_S (w[16], w[17], selector);
w[20] = __byte_perm_S (w[15], w[16], selector);
w[19] = __byte_perm_S (w[14], w[15], selector);
w[18] = __byte_perm_S (w[13], w[14], selector);
w[17] = __byte_perm_S (w[12], w[13], selector);
w[16] = __byte_perm_S (w[11], w[12], selector);
w[15] = __byte_perm_S (w[10], w[11], selector);
w[14] = __byte_perm_S (w[ 9], w[10], selector);
w[13] = __byte_perm_S (w[ 8], w[ 9], selector);
w[12] = __byte_perm_S (w[ 7], w[ 8], selector);
w[11] = __byte_perm_S (w[ 6], w[ 7], selector);
w[10] = __byte_perm_S (w[ 5], w[ 6], selector);
w[ 9] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 8] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 7] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 6] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 5] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 4] = __byte_perm_S ( 0, w[ 0], selector);
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 5:
w[63] = __byte_perm_S (w[57], w[58], selector);
w[62] = __byte_perm_S (w[56], w[57], selector);
w[61] = __byte_perm_S (w[55], w[56], selector);
w[60] = __byte_perm_S (w[54], w[55], selector);
w[59] = __byte_perm_S (w[53], w[54], selector);
w[58] = __byte_perm_S (w[52], w[53], selector);
w[57] = __byte_perm_S (w[51], w[52], selector);
w[56] = __byte_perm_S (w[50], w[51], selector);
w[55] = __byte_perm_S (w[49], w[50], selector);
w[54] = __byte_perm_S (w[48], w[49], selector);
w[53] = __byte_perm_S (w[47], w[48], selector);
w[52] = __byte_perm_S (w[46], w[47], selector);
w[51] = __byte_perm_S (w[45], w[46], selector);
w[50] = __byte_perm_S (w[44], w[45], selector);
w[49] = __byte_perm_S (w[43], w[44], selector);
w[48] = __byte_perm_S (w[42], w[43], selector);
w[47] = __byte_perm_S (w[41], w[42], selector);
w[46] = __byte_perm_S (w[40], w[41], selector);
w[45] = __byte_perm_S (w[39], w[40], selector);
w[44] = __byte_perm_S (w[38], w[39], selector);
w[43] = __byte_perm_S (w[37], w[38], selector);
w[42] = __byte_perm_S (w[36], w[37], selector);
w[41] = __byte_perm_S (w[35], w[36], selector);
w[40] = __byte_perm_S (w[34], w[35], selector);
w[39] = __byte_perm_S (w[33], w[34], selector);
w[38] = __byte_perm_S (w[32], w[33], selector);
w[37] = __byte_perm_S (w[31], w[32], selector);
w[36] = __byte_perm_S (w[30], w[31], selector);
w[35] = __byte_perm_S (w[29], w[30], selector);
w[34] = __byte_perm_S (w[28], w[29], selector);
w[33] = __byte_perm_S (w[27], w[28], selector);
w[32] = __byte_perm_S (w[26], w[27], selector);
w[31] = __byte_perm_S (w[25], w[26], selector);
w[30] = __byte_perm_S (w[24], w[25], selector);
w[29] = __byte_perm_S (w[23], w[24], selector);
w[28] = __byte_perm_S (w[22], w[23], selector);
w[27] = __byte_perm_S (w[21], w[22], selector);
w[26] = __byte_perm_S (w[20], w[21], selector);
w[25] = __byte_perm_S (w[19], w[20], selector);
w[24] = __byte_perm_S (w[18], w[19], selector);
w[23] = __byte_perm_S (w[17], w[18], selector);
w[22] = __byte_perm_S (w[16], w[17], selector);
w[21] = __byte_perm_S (w[15], w[16], selector);
w[20] = __byte_perm_S (w[14], w[15], selector);
w[19] = __byte_perm_S (w[13], w[14], selector);
w[18] = __byte_perm_S (w[12], w[13], selector);
w[17] = __byte_perm_S (w[11], w[12], selector);
w[16] = __byte_perm_S (w[10], w[11], selector);
w[15] = __byte_perm_S (w[ 9], w[10], selector);
w[14] = __byte_perm_S (w[ 8], w[ 9], selector);
w[13] = __byte_perm_S (w[ 7], w[ 8], selector);
w[12] = __byte_perm_S (w[ 6], w[ 7], selector);
w[11] = __byte_perm_S (w[ 5], w[ 6], selector);
w[10] = __byte_perm_S (w[ 4], w[ 5], selector);
w[ 9] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 8] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 7] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 6] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 5] = __byte_perm_S ( 0, w[ 0], selector);
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 6:
w[63] = __byte_perm_S (w[56], w[57], selector);
w[62] = __byte_perm_S (w[55], w[56], selector);
w[61] = __byte_perm_S (w[54], w[55], selector);
w[60] = __byte_perm_S (w[53], w[54], selector);
w[59] = __byte_perm_S (w[52], w[53], selector);
w[58] = __byte_perm_S (w[51], w[52], selector);
w[57] = __byte_perm_S (w[50], w[51], selector);
w[56] = __byte_perm_S (w[49], w[50], selector);
w[55] = __byte_perm_S (w[48], w[49], selector);
w[54] = __byte_perm_S (w[47], w[48], selector);
w[53] = __byte_perm_S (w[46], w[47], selector);
w[52] = __byte_perm_S (w[45], w[46], selector);
w[51] = __byte_perm_S (w[44], w[45], selector);
w[50] = __byte_perm_S (w[43], w[44], selector);
w[49] = __byte_perm_S (w[42], w[43], selector);
w[48] = __byte_perm_S (w[41], w[42], selector);
w[47] = __byte_perm_S (w[40], w[41], selector);
w[46] = __byte_perm_S (w[39], w[40], selector);
w[45] = __byte_perm_S (w[38], w[39], selector);
w[44] = __byte_perm_S (w[37], w[38], selector);
w[43] = __byte_perm_S (w[36], w[37], selector);
w[42] = __byte_perm_S (w[35], w[36], selector);
w[41] = __byte_perm_S (w[34], w[35], selector);
w[40] = __byte_perm_S (w[33], w[34], selector);
w[39] = __byte_perm_S (w[32], w[33], selector);
w[38] = __byte_perm_S (w[31], w[32], selector);
w[37] = __byte_perm_S (w[30], w[31], selector);
w[36] = __byte_perm_S (w[29], w[30], selector);
w[35] = __byte_perm_S (w[28], w[29], selector);
w[34] = __byte_perm_S (w[27], w[28], selector);
w[33] = __byte_perm_S (w[26], w[27], selector);
w[32] = __byte_perm_S (w[25], w[26], selector);
w[31] = __byte_perm_S (w[24], w[25], selector);
w[30] = __byte_perm_S (w[23], w[24], selector);
w[29] = __byte_perm_S (w[22], w[23], selector);
w[28] = __byte_perm_S (w[21], w[22], selector);
w[27] = __byte_perm_S (w[20], w[21], selector);
w[26] = __byte_perm_S (w[19], w[20], selector);
w[25] = __byte_perm_S (w[18], w[19], selector);
w[24] = __byte_perm_S (w[17], w[18], selector);
w[23] = __byte_perm_S (w[16], w[17], selector);
w[22] = __byte_perm_S (w[15], w[16], selector);
w[21] = __byte_perm_S (w[14], w[15], selector);
w[20] = __byte_perm_S (w[13], w[14], selector);
w[19] = __byte_perm_S (w[12], w[13], selector);
w[18] = __byte_perm_S (w[11], w[12], selector);
w[17] = __byte_perm_S (w[10], w[11], selector);
w[16] = __byte_perm_S (w[ 9], w[10], selector);
w[15] = __byte_perm_S (w[ 8], w[ 9], selector);
w[14] = __byte_perm_S (w[ 7], w[ 8], selector);
w[13] = __byte_perm_S (w[ 6], w[ 7], selector);
w[12] = __byte_perm_S (w[ 5], w[ 6], selector);
w[11] = __byte_perm_S (w[ 4], w[ 5], selector);
w[10] = __byte_perm_S (w[ 3], w[ 4], selector);
w[ 9] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 8] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 7] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 6] = __byte_perm_S ( 0, w[ 0], selector);
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 7:
w[63] = __byte_perm_S (w[55], w[56], selector);
w[62] = __byte_perm_S (w[54], w[55], selector);
w[61] = __byte_perm_S (w[53], w[54], selector);
w[60] = __byte_perm_S (w[52], w[53], selector);
w[59] = __byte_perm_S (w[51], w[52], selector);
w[58] = __byte_perm_S (w[50], w[51], selector);
w[57] = __byte_perm_S (w[49], w[50], selector);
w[56] = __byte_perm_S (w[48], w[49], selector);
w[55] = __byte_perm_S (w[47], w[48], selector);
w[54] = __byte_perm_S (w[46], w[47], selector);
w[53] = __byte_perm_S (w[45], w[46], selector);
w[52] = __byte_perm_S (w[44], w[45], selector);
w[51] = __byte_perm_S (w[43], w[44], selector);
w[50] = __byte_perm_S (w[42], w[43], selector);
w[49] = __byte_perm_S (w[41], w[42], selector);
w[48] = __byte_perm_S (w[40], w[41], selector);
w[47] = __byte_perm_S (w[39], w[40], selector);
w[46] = __byte_perm_S (w[38], w[39], selector);
w[45] = __byte_perm_S (w[37], w[38], selector);
w[44] = __byte_perm_S (w[36], w[37], selector);
w[43] = __byte_perm_S (w[35], w[36], selector);
w[42] = __byte_perm_S (w[34], w[35], selector);
w[41] = __byte_perm_S (w[33], w[34], selector);
w[40] = __byte_perm_S (w[32], w[33], selector);
w[39] = __byte_perm_S (w[31], w[32], selector);
w[38] = __byte_perm_S (w[30], w[31], selector);
w[37] = __byte_perm_S (w[29], w[30], selector);
w[36] = __byte_perm_S (w[28], w[29], selector);
w[35] = __byte_perm_S (w[27], w[28], selector);
w[34] = __byte_perm_S (w[26], w[27], selector);
w[33] = __byte_perm_S (w[25], w[26], selector);
w[32] = __byte_perm_S (w[24], w[25], selector);
w[31] = __byte_perm_S (w[23], w[24], selector);
w[30] = __byte_perm_S (w[22], w[23], selector);
w[29] = __byte_perm_S (w[21], w[22], selector);
w[28] = __byte_perm_S (w[20], w[21], selector);
w[27] = __byte_perm_S (w[19], w[20], selector);
w[26] = __byte_perm_S (w[18], w[19], selector);
w[25] = __byte_perm_S (w[17], w[18], selector);
w[24] = __byte_perm_S (w[16], w[17], selector);
w[23] = __byte_perm_S (w[15], w[16], selector);
w[22] = __byte_perm_S (w[14], w[15], selector);
w[21] = __byte_perm_S (w[13], w[14], selector);
w[20] = __byte_perm_S (w[12], w[13], selector);
w[19] = __byte_perm_S (w[11], w[12], selector);
w[18] = __byte_perm_S (w[10], w[11], selector);
w[17] = __byte_perm_S (w[ 9], w[10], selector);
w[16] = __byte_perm_S (w[ 8], w[ 9], selector);
w[15] = __byte_perm_S (w[ 7], w[ 8], selector);
w[14] = __byte_perm_S (w[ 6], w[ 7], selector);
w[13] = __byte_perm_S (w[ 5], w[ 6], selector);
w[12] = __byte_perm_S (w[ 4], w[ 5], selector);
w[11] = __byte_perm_S (w[ 3], w[ 4], selector);
w[10] = __byte_perm_S (w[ 2], w[ 3], selector);
w[ 9] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 8] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 7] = __byte_perm_S ( 0, w[ 0], selector);
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 8:
w[63] = __byte_perm_S (w[54], w[55], selector);
w[62] = __byte_perm_S (w[53], w[54], selector);
w[61] = __byte_perm_S (w[52], w[53], selector);
w[60] = __byte_perm_S (w[51], w[52], selector);
w[59] = __byte_perm_S (w[50], w[51], selector);
w[58] = __byte_perm_S (w[49], w[50], selector);
w[57] = __byte_perm_S (w[48], w[49], selector);
w[56] = __byte_perm_S (w[47], w[48], selector);
w[55] = __byte_perm_S (w[46], w[47], selector);
w[54] = __byte_perm_S (w[45], w[46], selector);
w[53] = __byte_perm_S (w[44], w[45], selector);
w[52] = __byte_perm_S (w[43], w[44], selector);
w[51] = __byte_perm_S (w[42], w[43], selector);
w[50] = __byte_perm_S (w[41], w[42], selector);
w[49] = __byte_perm_S (w[40], w[41], selector);
w[48] = __byte_perm_S (w[39], w[40], selector);
w[47] = __byte_perm_S (w[38], w[39], selector);
w[46] = __byte_perm_S (w[37], w[38], selector);
w[45] = __byte_perm_S (w[36], w[37], selector);
w[44] = __byte_perm_S (w[35], w[36], selector);
w[43] = __byte_perm_S (w[34], w[35], selector);
w[42] = __byte_perm_S (w[33], w[34], selector);
w[41] = __byte_perm_S (w[32], w[33], selector);
w[40] = __byte_perm_S (w[31], w[32], selector);
w[39] = __byte_perm_S (w[30], w[31], selector);
w[38] = __byte_perm_S (w[29], w[30], selector);
w[37] = __byte_perm_S (w[28], w[29], selector);
w[36] = __byte_perm_S (w[27], w[28], selector);
w[35] = __byte_perm_S (w[26], w[27], selector);
w[34] = __byte_perm_S (w[25], w[26], selector);
w[33] = __byte_perm_S (w[24], w[25], selector);
w[32] = __byte_perm_S (w[23], w[24], selector);
w[31] = __byte_perm_S (w[22], w[23], selector);
w[30] = __byte_perm_S (w[21], w[22], selector);
w[29] = __byte_perm_S (w[20], w[21], selector);
w[28] = __byte_perm_S (w[19], w[20], selector);
w[27] = __byte_perm_S (w[18], w[19], selector);
w[26] = __byte_perm_S (w[17], w[18], selector);
w[25] = __byte_perm_S (w[16], w[17], selector);
w[24] = __byte_perm_S (w[15], w[16], selector);
w[23] = __byte_perm_S (w[14], w[15], selector);
w[22] = __byte_perm_S (w[13], w[14], selector);
w[21] = __byte_perm_S (w[12], w[13], selector);
w[20] = __byte_perm_S (w[11], w[12], selector);
w[19] = __byte_perm_S (w[10], w[11], selector);
w[18] = __byte_perm_S (w[ 9], w[10], selector);
w[17] = __byte_perm_S (w[ 8], w[ 9], selector);
w[16] = __byte_perm_S (w[ 7], w[ 8], selector);
w[15] = __byte_perm_S (w[ 6], w[ 7], selector);
w[14] = __byte_perm_S (w[ 5], w[ 6], selector);
w[13] = __byte_perm_S (w[ 4], w[ 5], selector);
w[12] = __byte_perm_S (w[ 3], w[ 4], selector);
w[11] = __byte_perm_S (w[ 2], w[ 3], selector);
w[10] = __byte_perm_S (w[ 1], w[ 2], selector);
w[ 9] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 8] = __byte_perm_S ( 0, w[ 0], selector);
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 9:
w[63] = __byte_perm_S (w[53], w[54], selector);
w[62] = __byte_perm_S (w[52], w[53], selector);
w[61] = __byte_perm_S (w[51], w[52], selector);
w[60] = __byte_perm_S (w[50], w[51], selector);
w[59] = __byte_perm_S (w[49], w[50], selector);
w[58] = __byte_perm_S (w[48], w[49], selector);
w[57] = __byte_perm_S (w[47], w[48], selector);
w[56] = __byte_perm_S (w[46], w[47], selector);
w[55] = __byte_perm_S (w[45], w[46], selector);
w[54] = __byte_perm_S (w[44], w[45], selector);
w[53] = __byte_perm_S (w[43], w[44], selector);
w[52] = __byte_perm_S (w[42], w[43], selector);
w[51] = __byte_perm_S (w[41], w[42], selector);
w[50] = __byte_perm_S (w[40], w[41], selector);
w[49] = __byte_perm_S (w[39], w[40], selector);
w[48] = __byte_perm_S (w[38], w[39], selector);
w[47] = __byte_perm_S (w[37], w[38], selector);
w[46] = __byte_perm_S (w[36], w[37], selector);
w[45] = __byte_perm_S (w[35], w[36], selector);
w[44] = __byte_perm_S (w[34], w[35], selector);
w[43] = __byte_perm_S (w[33], w[34], selector);
w[42] = __byte_perm_S (w[32], w[33], selector);
w[41] = __byte_perm_S (w[31], w[32], selector);
w[40] = __byte_perm_S (w[30], w[31], selector);
w[39] = __byte_perm_S (w[29], w[30], selector);
w[38] = __byte_perm_S (w[28], w[29], selector);
w[37] = __byte_perm_S (w[27], w[28], selector);
w[36] = __byte_perm_S (w[26], w[27], selector);
w[35] = __byte_perm_S (w[25], w[26], selector);
w[34] = __byte_perm_S (w[24], w[25], selector);
w[33] = __byte_perm_S (w[23], w[24], selector);
w[32] = __byte_perm_S (w[22], w[23], selector);
w[31] = __byte_perm_S (w[21], w[22], selector);
w[30] = __byte_perm_S (w[20], w[21], selector);
w[29] = __byte_perm_S (w[19], w[20], selector);
w[28] = __byte_perm_S (w[18], w[19], selector);
w[27] = __byte_perm_S (w[17], w[18], selector);
w[26] = __byte_perm_S (w[16], w[17], selector);
w[25] = __byte_perm_S (w[15], w[16], selector);
w[24] = __byte_perm_S (w[14], w[15], selector);
w[23] = __byte_perm_S (w[13], w[14], selector);
w[22] = __byte_perm_S (w[12], w[13], selector);
w[21] = __byte_perm_S (w[11], w[12], selector);
w[20] = __byte_perm_S (w[10], w[11], selector);
w[19] = __byte_perm_S (w[ 9], w[10], selector);
w[18] = __byte_perm_S (w[ 8], w[ 9], selector);
w[17] = __byte_perm_S (w[ 7], w[ 8], selector);
w[16] = __byte_perm_S (w[ 6], w[ 7], selector);
w[15] = __byte_perm_S (w[ 5], w[ 6], selector);
w[14] = __byte_perm_S (w[ 4], w[ 5], selector);
w[13] = __byte_perm_S (w[ 3], w[ 4], selector);
w[12] = __byte_perm_S (w[ 2], w[ 3], selector);
w[11] = __byte_perm_S (w[ 1], w[ 2], selector);
w[10] = __byte_perm_S (w[ 0], w[ 1], selector);
w[ 9] = __byte_perm_S ( 0, w[ 0], selector);
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 10:
w[63] = __byte_perm_S (w[52], w[53], selector);
w[62] = __byte_perm_S (w[51], w[52], selector);
w[61] = __byte_perm_S (w[50], w[51], selector);
w[60] = __byte_perm_S (w[49], w[50], selector);
w[59] = __byte_perm_S (w[48], w[49], selector);
w[58] = __byte_perm_S (w[47], w[48], selector);
w[57] = __byte_perm_S (w[46], w[47], selector);
w[56] = __byte_perm_S (w[45], w[46], selector);
w[55] = __byte_perm_S (w[44], w[45], selector);
w[54] = __byte_perm_S (w[43], w[44], selector);
w[53] = __byte_perm_S (w[42], w[43], selector);
w[52] = __byte_perm_S (w[41], w[42], selector);
w[51] = __byte_perm_S (w[40], w[41], selector);
w[50] = __byte_perm_S (w[39], w[40], selector);
w[49] = __byte_perm_S (w[38], w[39], selector);
w[48] = __byte_perm_S (w[37], w[38], selector);
w[47] = __byte_perm_S (w[36], w[37], selector);
w[46] = __byte_perm_S (w[35], w[36], selector);
w[45] = __byte_perm_S (w[34], w[35], selector);
w[44] = __byte_perm_S (w[33], w[34], selector);
w[43] = __byte_perm_S (w[32], w[33], selector);
w[42] = __byte_perm_S (w[31], w[32], selector);
w[41] = __byte_perm_S (w[30], w[31], selector);
w[40] = __byte_perm_S (w[29], w[30], selector);
w[39] = __byte_perm_S (w[28], w[29], selector);
w[38] = __byte_perm_S (w[27], w[28], selector);
w[37] = __byte_perm_S (w[26], w[27], selector);
w[36] = __byte_perm_S (w[25], w[26], selector);
w[35] = __byte_perm_S (w[24], w[25], selector);
w[34] = __byte_perm_S (w[23], w[24], selector);
w[33] = __byte_perm_S (w[22], w[23], selector);
w[32] = __byte_perm_S (w[21], w[22], selector);
w[31] = __byte_perm_S (w[20], w[21], selector);
w[30] = __byte_perm_S (w[19], w[20], selector);
w[29] = __byte_perm_S (w[18], w[19], selector);
w[28] = __byte_perm_S (w[17], w[18], selector);
w[27] = __byte_perm_S (w[16], w[17], selector);
w[26] = __byte_perm_S (w[15], w[16], selector);
w[25] = __byte_perm_S (w[14], w[15], selector);
w[24] = __byte_perm_S (w[13], w[14], selector);
w[23] = __byte_perm_S (w[12], w[13], selector);
w[22] = __byte_perm_S (w[11], w[12], selector);
w[21] = __byte_perm_S (w[10], w[11], selector);
w[20] = __byte_perm_S (w[ 9], w[10], selector);
w[19] = __byte_perm_S (w[ 8], w[ 9], selector);
w[18] = __byte_perm_S (w[ 7], w[ 8], selector);
w[17] = __byte_perm_S (w[ 6], w[ 7], selector);
w[16] = __byte_perm_S (w[ 5], w[ 6], selector);
w[15] = __byte_perm_S (w[ 4], w[ 5], selector);
w[14] = __byte_perm_S (w[ 3], w[ 4], selector);
w[13] = __byte_perm_S (w[ 2], w[ 3], selector);
w[12] = __byte_perm_S (w[ 1], w[ 2], selector);
w[11] = __byte_perm_S (w[ 0], w[ 1], selector);
w[10] = __byte_perm_S ( 0, w[ 0], selector);
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 11:
w[63] = __byte_perm_S (w[51], w[52], selector);
w[62] = __byte_perm_S (w[50], w[51], selector);
w[61] = __byte_perm_S (w[49], w[50], selector);
w[60] = __byte_perm_S (w[48], w[49], selector);
w[59] = __byte_perm_S (w[47], w[48], selector);
w[58] = __byte_perm_S (w[46], w[47], selector);
w[57] = __byte_perm_S (w[45], w[46], selector);
w[56] = __byte_perm_S (w[44], w[45], selector);
w[55] = __byte_perm_S (w[43], w[44], selector);
w[54] = __byte_perm_S (w[42], w[43], selector);
w[53] = __byte_perm_S (w[41], w[42], selector);
w[52] = __byte_perm_S (w[40], w[41], selector);
w[51] = __byte_perm_S (w[39], w[40], selector);
w[50] = __byte_perm_S (w[38], w[39], selector);
w[49] = __byte_perm_S (w[37], w[38], selector);
w[48] = __byte_perm_S (w[36], w[37], selector);
w[47] = __byte_perm_S (w[35], w[36], selector);
w[46] = __byte_perm_S (w[34], w[35], selector);
w[45] = __byte_perm_S (w[33], w[34], selector);
w[44] = __byte_perm_S (w[32], w[33], selector);
w[43] = __byte_perm_S (w[31], w[32], selector);
w[42] = __byte_perm_S (w[30], w[31], selector);
w[41] = __byte_perm_S (w[29], w[30], selector);
w[40] = __byte_perm_S (w[28], w[29], selector);
w[39] = __byte_perm_S (w[27], w[28], selector);
w[38] = __byte_perm_S (w[26], w[27], selector);
w[37] = __byte_perm_S (w[25], w[26], selector);
w[36] = __byte_perm_S (w[24], w[25], selector);
w[35] = __byte_perm_S (w[23], w[24], selector);
w[34] = __byte_perm_S (w[22], w[23], selector);
w[33] = __byte_perm_S (w[21], w[22], selector);
w[32] = __byte_perm_S (w[20], w[21], selector);
w[31] = __byte_perm_S (w[19], w[20], selector);
w[30] = __byte_perm_S (w[18], w[19], selector);
w[29] = __byte_perm_S (w[17], w[18], selector);
w[28] = __byte_perm_S (w[16], w[17], selector);
w[27] = __byte_perm_S (w[15], w[16], selector);
w[26] = __byte_perm_S (w[14], w[15], selector);
w[25] = __byte_perm_S (w[13], w[14], selector);
w[24] = __byte_perm_S (w[12], w[13], selector);
w[23] = __byte_perm_S (w[11], w[12], selector);
w[22] = __byte_perm_S (w[10], w[11], selector);
w[21] = __byte_perm_S (w[ 9], w[10], selector);
w[20] = __byte_perm_S (w[ 8], w[ 9], selector);
w[19] = __byte_perm_S (w[ 7], w[ 8], selector);
w[18] = __byte_perm_S (w[ 6], w[ 7], selector);
w[17] = __byte_perm_S (w[ 5], w[ 6], selector);
w[16] = __byte_perm_S (w[ 4], w[ 5], selector);
w[15] = __byte_perm_S (w[ 3], w[ 4], selector);
w[14] = __byte_perm_S (w[ 2], w[ 3], selector);
w[13] = __byte_perm_S (w[ 1], w[ 2], selector);
w[12] = __byte_perm_S (w[ 0], w[ 1], selector);
w[11] = __byte_perm_S ( 0, w[ 0], selector);
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 12:
w[63] = __byte_perm_S (w[50], w[51], selector);
w[62] = __byte_perm_S (w[49], w[50], selector);
w[61] = __byte_perm_S (w[48], w[49], selector);
w[60] = __byte_perm_S (w[47], w[48], selector);
w[59] = __byte_perm_S (w[46], w[47], selector);
w[58] = __byte_perm_S (w[45], w[46], selector);
w[57] = __byte_perm_S (w[44], w[45], selector);
w[56] = __byte_perm_S (w[43], w[44], selector);
w[55] = __byte_perm_S (w[42], w[43], selector);
w[54] = __byte_perm_S (w[41], w[42], selector);
w[53] = __byte_perm_S (w[40], w[41], selector);
w[52] = __byte_perm_S (w[39], w[40], selector);
w[51] = __byte_perm_S (w[38], w[39], selector);
w[50] = __byte_perm_S (w[37], w[38], selector);
w[49] = __byte_perm_S (w[36], w[37], selector);
w[48] = __byte_perm_S (w[35], w[36], selector);
w[47] = __byte_perm_S (w[34], w[35], selector);
w[46] = __byte_perm_S (w[33], w[34], selector);
w[45] = __byte_perm_S (w[32], w[33], selector);
w[44] = __byte_perm_S (w[31], w[32], selector);
w[43] = __byte_perm_S (w[30], w[31], selector);
w[42] = __byte_perm_S (w[29], w[30], selector);
w[41] = __byte_perm_S (w[28], w[29], selector);
w[40] = __byte_perm_S (w[27], w[28], selector);
w[39] = __byte_perm_S (w[26], w[27], selector);
w[38] = __byte_perm_S (w[25], w[26], selector);
w[37] = __byte_perm_S (w[24], w[25], selector);
w[36] = __byte_perm_S (w[23], w[24], selector);
w[35] = __byte_perm_S (w[22], w[23], selector);
w[34] = __byte_perm_S (w[21], w[22], selector);
w[33] = __byte_perm_S (w[20], w[21], selector);
w[32] = __byte_perm_S (w[19], w[20], selector);
w[31] = __byte_perm_S (w[18], w[19], selector);
w[30] = __byte_perm_S (w[17], w[18], selector);
w[29] = __byte_perm_S (w[16], w[17], selector);
w[28] = __byte_perm_S (w[15], w[16], selector);
w[27] = __byte_perm_S (w[14], w[15], selector);
w[26] = __byte_perm_S (w[13], w[14], selector);
w[25] = __byte_perm_S (w[12], w[13], selector);
w[24] = __byte_perm_S (w[11], w[12], selector);
w[23] = __byte_perm_S (w[10], w[11], selector);
w[22] = __byte_perm_S (w[ 9], w[10], selector);
w[21] = __byte_perm_S (w[ 8], w[ 9], selector);
w[20] = __byte_perm_S (w[ 7], w[ 8], selector);
w[19] = __byte_perm_S (w[ 6], w[ 7], selector);
w[18] = __byte_perm_S (w[ 5], w[ 6], selector);
w[17] = __byte_perm_S (w[ 4], w[ 5], selector);
w[16] = __byte_perm_S (w[ 3], w[ 4], selector);
w[15] = __byte_perm_S (w[ 2], w[ 3], selector);
w[14] = __byte_perm_S (w[ 1], w[ 2], selector);
w[13] = __byte_perm_S (w[ 0], w[ 1], selector);
w[12] = __byte_perm_S ( 0, w[ 0], selector);
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 13:
w[63] = __byte_perm_S (w[49], w[50], selector);
w[62] = __byte_perm_S (w[48], w[49], selector);
w[61] = __byte_perm_S (w[47], w[48], selector);
w[60] = __byte_perm_S (w[46], w[47], selector);
w[59] = __byte_perm_S (w[45], w[46], selector);
w[58] = __byte_perm_S (w[44], w[45], selector);
w[57] = __byte_perm_S (w[43], w[44], selector);
w[56] = __byte_perm_S (w[42], w[43], selector);
w[55] = __byte_perm_S (w[41], w[42], selector);
w[54] = __byte_perm_S (w[40], w[41], selector);
w[53] = __byte_perm_S (w[39], w[40], selector);
w[52] = __byte_perm_S (w[38], w[39], selector);
w[51] = __byte_perm_S (w[37], w[38], selector);
w[50] = __byte_perm_S (w[36], w[37], selector);
w[49] = __byte_perm_S (w[35], w[36], selector);
w[48] = __byte_perm_S (w[34], w[35], selector);
w[47] = __byte_perm_S (w[33], w[34], selector);
w[46] = __byte_perm_S (w[32], w[33], selector);
w[45] = __byte_perm_S (w[31], w[32], selector);
w[44] = __byte_perm_S (w[30], w[31], selector);
w[43] = __byte_perm_S (w[29], w[30], selector);
w[42] = __byte_perm_S (w[28], w[29], selector);
w[41] = __byte_perm_S (w[27], w[28], selector);
w[40] = __byte_perm_S (w[26], w[27], selector);
w[39] = __byte_perm_S (w[25], w[26], selector);
w[38] = __byte_perm_S (w[24], w[25], selector);
w[37] = __byte_perm_S (w[23], w[24], selector);
w[36] = __byte_perm_S (w[22], w[23], selector);
w[35] = __byte_perm_S (w[21], w[22], selector);
w[34] = __byte_perm_S (w[20], w[21], selector);
w[33] = __byte_perm_S (w[19], w[20], selector);
w[32] = __byte_perm_S (w[18], w[19], selector);
w[31] = __byte_perm_S (w[17], w[18], selector);
w[30] = __byte_perm_S (w[16], w[17], selector);
w[29] = __byte_perm_S (w[15], w[16], selector);
w[28] = __byte_perm_S (w[14], w[15], selector);
w[27] = __byte_perm_S (w[13], w[14], selector);
w[26] = __byte_perm_S (w[12], w[13], selector);
w[25] = __byte_perm_S (w[11], w[12], selector);
w[24] = __byte_perm_S (w[10], w[11], selector);
w[23] = __byte_perm_S (w[ 9], w[10], selector);
w[22] = __byte_perm_S (w[ 8], w[ 9], selector);
w[21] = __byte_perm_S (w[ 7], w[ 8], selector);
w[20] = __byte_perm_S (w[ 6], w[ 7], selector);
w[19] = __byte_perm_S (w[ 5], w[ 6], selector);
w[18] = __byte_perm_S (w[ 4], w[ 5], selector);
w[17] = __byte_perm_S (w[ 3], w[ 4], selector);
w[16] = __byte_perm_S (w[ 2], w[ 3], selector);
w[15] = __byte_perm_S (w[ 1], w[ 2], selector);
w[14] = __byte_perm_S (w[ 0], w[ 1], selector);
w[13] = __byte_perm_S ( 0, w[ 0], selector);
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 14:
w[63] = __byte_perm_S (w[48], w[49], selector);
w[62] = __byte_perm_S (w[47], w[48], selector);
w[61] = __byte_perm_S (w[46], w[47], selector);
w[60] = __byte_perm_S (w[45], w[46], selector);
w[59] = __byte_perm_S (w[44], w[45], selector);
w[58] = __byte_perm_S (w[43], w[44], selector);
w[57] = __byte_perm_S (w[42], w[43], selector);
w[56] = __byte_perm_S (w[41], w[42], selector);
w[55] = __byte_perm_S (w[40], w[41], selector);
w[54] = __byte_perm_S (w[39], w[40], selector);
w[53] = __byte_perm_S (w[38], w[39], selector);
w[52] = __byte_perm_S (w[37], w[38], selector);
w[51] = __byte_perm_S (w[36], w[37], selector);
w[50] = __byte_perm_S (w[35], w[36], selector);
w[49] = __byte_perm_S (w[34], w[35], selector);
w[48] = __byte_perm_S (w[33], w[34], selector);
w[47] = __byte_perm_S (w[32], w[33], selector);
w[46] = __byte_perm_S (w[31], w[32], selector);
w[45] = __byte_perm_S (w[30], w[31], selector);
w[44] = __byte_perm_S (w[29], w[30], selector);
w[43] = __byte_perm_S (w[28], w[29], selector);
w[42] = __byte_perm_S (w[27], w[28], selector);
w[41] = __byte_perm_S (w[26], w[27], selector);
w[40] = __byte_perm_S (w[25], w[26], selector);
w[39] = __byte_perm_S (w[24], w[25], selector);
w[38] = __byte_perm_S (w[23], w[24], selector);
w[37] = __byte_perm_S (w[22], w[23], selector);
w[36] = __byte_perm_S (w[21], w[22], selector);
w[35] = __byte_perm_S (w[20], w[21], selector);
w[34] = __byte_perm_S (w[19], w[20], selector);
w[33] = __byte_perm_S (w[18], w[19], selector);
w[32] = __byte_perm_S (w[17], w[18], selector);
w[31] = __byte_perm_S (w[16], w[17], selector);
w[30] = __byte_perm_S (w[15], w[16], selector);
w[29] = __byte_perm_S (w[14], w[15], selector);
w[28] = __byte_perm_S (w[13], w[14], selector);
w[27] = __byte_perm_S (w[12], w[13], selector);
w[26] = __byte_perm_S (w[11], w[12], selector);
w[25] = __byte_perm_S (w[10], w[11], selector);
w[24] = __byte_perm_S (w[ 9], w[10], selector);
w[23] = __byte_perm_S (w[ 8], w[ 9], selector);
w[22] = __byte_perm_S (w[ 7], w[ 8], selector);
w[21] = __byte_perm_S (w[ 6], w[ 7], selector);
w[20] = __byte_perm_S (w[ 5], w[ 6], selector);
w[19] = __byte_perm_S (w[ 4], w[ 5], selector);
w[18] = __byte_perm_S (w[ 3], w[ 4], selector);
w[17] = __byte_perm_S (w[ 2], w[ 3], selector);
w[16] = __byte_perm_S (w[ 1], w[ 2], selector);
w[15] = __byte_perm_S (w[ 0], w[ 1], selector);
w[14] = __byte_perm_S ( 0, w[ 0], selector);
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 15:
w[63] = __byte_perm_S (w[47], w[48], selector);
w[62] = __byte_perm_S (w[46], w[47], selector);
w[61] = __byte_perm_S (w[45], w[46], selector);
w[60] = __byte_perm_S (w[44], w[45], selector);
w[59] = __byte_perm_S (w[43], w[44], selector);
w[58] = __byte_perm_S (w[42], w[43], selector);
w[57] = __byte_perm_S (w[41], w[42], selector);
w[56] = __byte_perm_S (w[40], w[41], selector);
w[55] = __byte_perm_S (w[39], w[40], selector);
w[54] = __byte_perm_S (w[38], w[39], selector);
w[53] = __byte_perm_S (w[37], w[38], selector);
w[52] = __byte_perm_S (w[36], w[37], selector);
w[51] = __byte_perm_S (w[35], w[36], selector);
w[50] = __byte_perm_S (w[34], w[35], selector);
w[49] = __byte_perm_S (w[33], w[34], selector);
w[48] = __byte_perm_S (w[32], w[33], selector);
w[47] = __byte_perm_S (w[31], w[32], selector);
w[46] = __byte_perm_S (w[30], w[31], selector);
w[45] = __byte_perm_S (w[29], w[30], selector);
w[44] = __byte_perm_S (w[28], w[29], selector);
w[43] = __byte_perm_S (w[27], w[28], selector);
w[42] = __byte_perm_S (w[26], w[27], selector);
w[41] = __byte_perm_S (w[25], w[26], selector);
w[40] = __byte_perm_S (w[24], w[25], selector);
w[39] = __byte_perm_S (w[23], w[24], selector);
w[38] = __byte_perm_S (w[22], w[23], selector);
w[37] = __byte_perm_S (w[21], w[22], selector);
w[36] = __byte_perm_S (w[20], w[21], selector);
w[35] = __byte_perm_S (w[19], w[20], selector);
w[34] = __byte_perm_S (w[18], w[19], selector);
w[33] = __byte_perm_S (w[17], w[18], selector);
w[32] = __byte_perm_S (w[16], w[17], selector);
w[31] = __byte_perm_S (w[15], w[16], selector);
w[30] = __byte_perm_S (w[14], w[15], selector);
w[29] = __byte_perm_S (w[13], w[14], selector);
w[28] = __byte_perm_S (w[12], w[13], selector);
w[27] = __byte_perm_S (w[11], w[12], selector);
w[26] = __byte_perm_S (w[10], w[11], selector);
w[25] = __byte_perm_S (w[ 9], w[10], selector);
w[24] = __byte_perm_S (w[ 8], w[ 9], selector);
w[23] = __byte_perm_S (w[ 7], w[ 8], selector);
w[22] = __byte_perm_S (w[ 6], w[ 7], selector);
w[21] = __byte_perm_S (w[ 5], w[ 6], selector);
w[20] = __byte_perm_S (w[ 4], w[ 5], selector);
w[19] = __byte_perm_S (w[ 3], w[ 4], selector);
w[18] = __byte_perm_S (w[ 2], w[ 3], selector);
w[17] = __byte_perm_S (w[ 1], w[ 2], selector);
w[16] = __byte_perm_S (w[ 0], w[ 1], selector);
w[15] = __byte_perm_S ( 0, w[ 0], selector);
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 16:
w[63] = __byte_perm_S (w[46], w[47], selector);
w[62] = __byte_perm_S (w[45], w[46], selector);
w[61] = __byte_perm_S (w[44], w[45], selector);
w[60] = __byte_perm_S (w[43], w[44], selector);
w[59] = __byte_perm_S (w[42], w[43], selector);
w[58] = __byte_perm_S (w[41], w[42], selector);
w[57] = __byte_perm_S (w[40], w[41], selector);
w[56] = __byte_perm_S (w[39], w[40], selector);
w[55] = __byte_perm_S (w[38], w[39], selector);
w[54] = __byte_perm_S (w[37], w[38], selector);
w[53] = __byte_perm_S (w[36], w[37], selector);
w[52] = __byte_perm_S (w[35], w[36], selector);
w[51] = __byte_perm_S (w[34], w[35], selector);
w[50] = __byte_perm_S (w[33], w[34], selector);
w[49] = __byte_perm_S (w[32], w[33], selector);
w[48] = __byte_perm_S (w[31], w[32], selector);
w[47] = __byte_perm_S (w[30], w[31], selector);
w[46] = __byte_perm_S (w[29], w[30], selector);
w[45] = __byte_perm_S (w[28], w[29], selector);
w[44] = __byte_perm_S (w[27], w[28], selector);
w[43] = __byte_perm_S (w[26], w[27], selector);
w[42] = __byte_perm_S (w[25], w[26], selector);
w[41] = __byte_perm_S (w[24], w[25], selector);
w[40] = __byte_perm_S (w[23], w[24], selector);
w[39] = __byte_perm_S (w[22], w[23], selector);
w[38] = __byte_perm_S (w[21], w[22], selector);
w[37] = __byte_perm_S (w[20], w[21], selector);
w[36] = __byte_perm_S (w[19], w[20], selector);
w[35] = __byte_perm_S (w[18], w[19], selector);
w[34] = __byte_perm_S (w[17], w[18], selector);
w[33] = __byte_perm_S (w[16], w[17], selector);
w[32] = __byte_perm_S (w[15], w[16], selector);
w[31] = __byte_perm_S (w[14], w[15], selector);
w[30] = __byte_perm_S (w[13], w[14], selector);
w[29] = __byte_perm_S (w[12], w[13], selector);
w[28] = __byte_perm_S (w[11], w[12], selector);
w[27] = __byte_perm_S (w[10], w[11], selector);
w[26] = __byte_perm_S (w[ 9], w[10], selector);
w[25] = __byte_perm_S (w[ 8], w[ 9], selector);
w[24] = __byte_perm_S (w[ 7], w[ 8], selector);
w[23] = __byte_perm_S (w[ 6], w[ 7], selector);
w[22] = __byte_perm_S (w[ 5], w[ 6], selector);
w[21] = __byte_perm_S (w[ 4], w[ 5], selector);
w[20] = __byte_perm_S (w[ 3], w[ 4], selector);
w[19] = __byte_perm_S (w[ 2], w[ 3], selector);
w[18] = __byte_perm_S (w[ 1], w[ 2], selector);
w[17] = __byte_perm_S (w[ 0], w[ 1], selector);
w[16] = __byte_perm_S ( 0, w[ 0], selector);
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 17:
w[63] = __byte_perm_S (w[45], w[46], selector);
w[62] = __byte_perm_S (w[44], w[45], selector);
w[61] = __byte_perm_S (w[43], w[44], selector);
w[60] = __byte_perm_S (w[42], w[43], selector);
w[59] = __byte_perm_S (w[41], w[42], selector);
w[58] = __byte_perm_S (w[40], w[41], selector);
w[57] = __byte_perm_S (w[39], w[40], selector);
w[56] = __byte_perm_S (w[38], w[39], selector);
w[55] = __byte_perm_S (w[37], w[38], selector);
w[54] = __byte_perm_S (w[36], w[37], selector);
w[53] = __byte_perm_S (w[35], w[36], selector);
w[52] = __byte_perm_S (w[34], w[35], selector);
w[51] = __byte_perm_S (w[33], w[34], selector);
w[50] = __byte_perm_S (w[32], w[33], selector);
w[49] = __byte_perm_S (w[31], w[32], selector);
w[48] = __byte_perm_S (w[30], w[31], selector);
w[47] = __byte_perm_S (w[29], w[30], selector);
w[46] = __byte_perm_S (w[28], w[29], selector);
w[45] = __byte_perm_S (w[27], w[28], selector);
w[44] = __byte_perm_S (w[26], w[27], selector);
w[43] = __byte_perm_S (w[25], w[26], selector);
w[42] = __byte_perm_S (w[24], w[25], selector);
w[41] = __byte_perm_S (w[23], w[24], selector);
w[40] = __byte_perm_S (w[22], w[23], selector);
w[39] = __byte_perm_S (w[21], w[22], selector);
w[38] = __byte_perm_S (w[20], w[21], selector);
w[37] = __byte_perm_S (w[19], w[20], selector);
w[36] = __byte_perm_S (w[18], w[19], selector);
w[35] = __byte_perm_S (w[17], w[18], selector);
w[34] = __byte_perm_S (w[16], w[17], selector);
w[33] = __byte_perm_S (w[15], w[16], selector);
w[32] = __byte_perm_S (w[14], w[15], selector);
w[31] = __byte_perm_S (w[13], w[14], selector);
w[30] = __byte_perm_S (w[12], w[13], selector);
w[29] = __byte_perm_S (w[11], w[12], selector);
w[28] = __byte_perm_S (w[10], w[11], selector);
w[27] = __byte_perm_S (w[ 9], w[10], selector);
w[26] = __byte_perm_S (w[ 8], w[ 9], selector);
w[25] = __byte_perm_S (w[ 7], w[ 8], selector);
w[24] = __byte_perm_S (w[ 6], w[ 7], selector);
w[23] = __byte_perm_S (w[ 5], w[ 6], selector);
w[22] = __byte_perm_S (w[ 4], w[ 5], selector);
w[21] = __byte_perm_S (w[ 3], w[ 4], selector);
w[20] = __byte_perm_S (w[ 2], w[ 3], selector);
w[19] = __byte_perm_S (w[ 1], w[ 2], selector);
w[18] = __byte_perm_S (w[ 0], w[ 1], selector);
w[17] = __byte_perm_S ( 0, w[ 0], selector);
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 18:
w[63] = __byte_perm_S (w[44], w[45], selector);
w[62] = __byte_perm_S (w[43], w[44], selector);
w[61] = __byte_perm_S (w[42], w[43], selector);
w[60] = __byte_perm_S (w[41], w[42], selector);
w[59] = __byte_perm_S (w[40], w[41], selector);
w[58] = __byte_perm_S (w[39], w[40], selector);
w[57] = __byte_perm_S (w[38], w[39], selector);
w[56] = __byte_perm_S (w[37], w[38], selector);
w[55] = __byte_perm_S (w[36], w[37], selector);
w[54] = __byte_perm_S (w[35], w[36], selector);
w[53] = __byte_perm_S (w[34], w[35], selector);
w[52] = __byte_perm_S (w[33], w[34], selector);
w[51] = __byte_perm_S (w[32], w[33], selector);
w[50] = __byte_perm_S (w[31], w[32], selector);
w[49] = __byte_perm_S (w[30], w[31], selector);
w[48] = __byte_perm_S (w[29], w[30], selector);
w[47] = __byte_perm_S (w[28], w[29], selector);
w[46] = __byte_perm_S (w[27], w[28], selector);
w[45] = __byte_perm_S (w[26], w[27], selector);
w[44] = __byte_perm_S (w[25], w[26], selector);
w[43] = __byte_perm_S (w[24], w[25], selector);
w[42] = __byte_perm_S (w[23], w[24], selector);
w[41] = __byte_perm_S (w[22], w[23], selector);
w[40] = __byte_perm_S (w[21], w[22], selector);
w[39] = __byte_perm_S (w[20], w[21], selector);
w[38] = __byte_perm_S (w[19], w[20], selector);
w[37] = __byte_perm_S (w[18], w[19], selector);
w[36] = __byte_perm_S (w[17], w[18], selector);
w[35] = __byte_perm_S (w[16], w[17], selector);
w[34] = __byte_perm_S (w[15], w[16], selector);
w[33] = __byte_perm_S (w[14], w[15], selector);
w[32] = __byte_perm_S (w[13], w[14], selector);
w[31] = __byte_perm_S (w[12], w[13], selector);
w[30] = __byte_perm_S (w[11], w[12], selector);
w[29] = __byte_perm_S (w[10], w[11], selector);
w[28] = __byte_perm_S (w[ 9], w[10], selector);
w[27] = __byte_perm_S (w[ 8], w[ 9], selector);
w[26] = __byte_perm_S (w[ 7], w[ 8], selector);
w[25] = __byte_perm_S (w[ 6], w[ 7], selector);
w[24] = __byte_perm_S (w[ 5], w[ 6], selector);
w[23] = __byte_perm_S (w[ 4], w[ 5], selector);
w[22] = __byte_perm_S (w[ 3], w[ 4], selector);
w[21] = __byte_perm_S (w[ 2], w[ 3], selector);
w[20] = __byte_perm_S (w[ 1], w[ 2], selector);
w[19] = __byte_perm_S (w[ 0], w[ 1], selector);
w[18] = __byte_perm_S ( 0, w[ 0], selector);
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 19:
w[63] = __byte_perm_S (w[43], w[44], selector);
w[62] = __byte_perm_S (w[42], w[43], selector);
w[61] = __byte_perm_S (w[41], w[42], selector);
w[60] = __byte_perm_S (w[40], w[41], selector);
w[59] = __byte_perm_S (w[39], w[40], selector);
w[58] = __byte_perm_S (w[38], w[39], selector);
w[57] = __byte_perm_S (w[37], w[38], selector);
w[56] = __byte_perm_S (w[36], w[37], selector);
w[55] = __byte_perm_S (w[35], w[36], selector);
w[54] = __byte_perm_S (w[34], w[35], selector);
w[53] = __byte_perm_S (w[33], w[34], selector);
w[52] = __byte_perm_S (w[32], w[33], selector);
w[51] = __byte_perm_S (w[31], w[32], selector);
w[50] = __byte_perm_S (w[30], w[31], selector);
w[49] = __byte_perm_S (w[29], w[30], selector);
w[48] = __byte_perm_S (w[28], w[29], selector);
w[47] = __byte_perm_S (w[27], w[28], selector);
w[46] = __byte_perm_S (w[26], w[27], selector);
w[45] = __byte_perm_S (w[25], w[26], selector);
w[44] = __byte_perm_S (w[24], w[25], selector);
w[43] = __byte_perm_S (w[23], w[24], selector);
w[42] = __byte_perm_S (w[22], w[23], selector);
w[41] = __byte_perm_S (w[21], w[22], selector);
w[40] = __byte_perm_S (w[20], w[21], selector);
w[39] = __byte_perm_S (w[19], w[20], selector);
w[38] = __byte_perm_S (w[18], w[19], selector);
w[37] = __byte_perm_S (w[17], w[18], selector);
w[36] = __byte_perm_S (w[16], w[17], selector);
w[35] = __byte_perm_S (w[15], w[16], selector);
w[34] = __byte_perm_S (w[14], w[15], selector);
w[33] = __byte_perm_S (w[13], w[14], selector);
w[32] = __byte_perm_S (w[12], w[13], selector);
w[31] = __byte_perm_S (w[11], w[12], selector);
w[30] = __byte_perm_S (w[10], w[11], selector);
w[29] = __byte_perm_S (w[ 9], w[10], selector);
w[28] = __byte_perm_S (w[ 8], w[ 9], selector);
w[27] = __byte_perm_S (w[ 7], w[ 8], selector);
w[26] = __byte_perm_S (w[ 6], w[ 7], selector);
w[25] = __byte_perm_S (w[ 5], w[ 6], selector);
w[24] = __byte_perm_S (w[ 4], w[ 5], selector);
w[23] = __byte_perm_S (w[ 3], w[ 4], selector);
w[22] = __byte_perm_S (w[ 2], w[ 3], selector);
w[21] = __byte_perm_S (w[ 1], w[ 2], selector);
w[20] = __byte_perm_S (w[ 0], w[ 1], selector);
w[19] = __byte_perm_S ( 0, w[ 0], selector);
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 20:
w[63] = __byte_perm_S (w[42], w[43], selector);
w[62] = __byte_perm_S (w[41], w[42], selector);
w[61] = __byte_perm_S (w[40], w[41], selector);
w[60] = __byte_perm_S (w[39], w[40], selector);
w[59] = __byte_perm_S (w[38], w[39], selector);
w[58] = __byte_perm_S (w[37], w[38], selector);
w[57] = __byte_perm_S (w[36], w[37], selector);
w[56] = __byte_perm_S (w[35], w[36], selector);
w[55] = __byte_perm_S (w[34], w[35], selector);
w[54] = __byte_perm_S (w[33], w[34], selector);
w[53] = __byte_perm_S (w[32], w[33], selector);
w[52] = __byte_perm_S (w[31], w[32], selector);
w[51] = __byte_perm_S (w[30], w[31], selector);
w[50] = __byte_perm_S (w[29], w[30], selector);
w[49] = __byte_perm_S (w[28], w[29], selector);
w[48] = __byte_perm_S (w[27], w[28], selector);
w[47] = __byte_perm_S (w[26], w[27], selector);
w[46] = __byte_perm_S (w[25], w[26], selector);
w[45] = __byte_perm_S (w[24], w[25], selector);
w[44] = __byte_perm_S (w[23], w[24], selector);
w[43] = __byte_perm_S (w[22], w[23], selector);
w[42] = __byte_perm_S (w[21], w[22], selector);
w[41] = __byte_perm_S (w[20], w[21], selector);
w[40] = __byte_perm_S (w[19], w[20], selector);
w[39] = __byte_perm_S (w[18], w[19], selector);
w[38] = __byte_perm_S (w[17], w[18], selector);
w[37] = __byte_perm_S (w[16], w[17], selector);
w[36] = __byte_perm_S (w[15], w[16], selector);
w[35] = __byte_perm_S (w[14], w[15], selector);
w[34] = __byte_perm_S (w[13], w[14], selector);
w[33] = __byte_perm_S (w[12], w[13], selector);
w[32] = __byte_perm_S (w[11], w[12], selector);
w[31] = __byte_perm_S (w[10], w[11], selector);
w[30] = __byte_perm_S (w[ 9], w[10], selector);
w[29] = __byte_perm_S (w[ 8], w[ 9], selector);
w[28] = __byte_perm_S (w[ 7], w[ 8], selector);
w[27] = __byte_perm_S (w[ 6], w[ 7], selector);
w[26] = __byte_perm_S (w[ 5], w[ 6], selector);
w[25] = __byte_perm_S (w[ 4], w[ 5], selector);
w[24] = __byte_perm_S (w[ 3], w[ 4], selector);
w[23] = __byte_perm_S (w[ 2], w[ 3], selector);
w[22] = __byte_perm_S (w[ 1], w[ 2], selector);
w[21] = __byte_perm_S (w[ 0], w[ 1], selector);
w[20] = __byte_perm_S ( 0, w[ 0], selector);
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 21:
w[63] = __byte_perm_S (w[41], w[42], selector);
w[62] = __byte_perm_S (w[40], w[41], selector);
w[61] = __byte_perm_S (w[39], w[40], selector);
w[60] = __byte_perm_S (w[38], w[39], selector);
w[59] = __byte_perm_S (w[37], w[38], selector);
w[58] = __byte_perm_S (w[36], w[37], selector);
w[57] = __byte_perm_S (w[35], w[36], selector);
w[56] = __byte_perm_S (w[34], w[35], selector);
w[55] = __byte_perm_S (w[33], w[34], selector);
w[54] = __byte_perm_S (w[32], w[33], selector);
w[53] = __byte_perm_S (w[31], w[32], selector);
w[52] = __byte_perm_S (w[30], w[31], selector);
w[51] = __byte_perm_S (w[29], w[30], selector);
w[50] = __byte_perm_S (w[28], w[29], selector);
w[49] = __byte_perm_S (w[27], w[28], selector);
w[48] = __byte_perm_S (w[26], w[27], selector);
w[47] = __byte_perm_S (w[25], w[26], selector);
w[46] = __byte_perm_S (w[24], w[25], selector);
w[45] = __byte_perm_S (w[23], w[24], selector);
w[44] = __byte_perm_S (w[22], w[23], selector);
w[43] = __byte_perm_S (w[21], w[22], selector);
w[42] = __byte_perm_S (w[20], w[21], selector);
w[41] = __byte_perm_S (w[19], w[20], selector);
w[40] = __byte_perm_S (w[18], w[19], selector);
w[39] = __byte_perm_S (w[17], w[18], selector);
w[38] = __byte_perm_S (w[16], w[17], selector);
w[37] = __byte_perm_S (w[15], w[16], selector);
w[36] = __byte_perm_S (w[14], w[15], selector);
w[35] = __byte_perm_S (w[13], w[14], selector);
w[34] = __byte_perm_S (w[12], w[13], selector);
w[33] = __byte_perm_S (w[11], w[12], selector);
w[32] = __byte_perm_S (w[10], w[11], selector);
w[31] = __byte_perm_S (w[ 9], w[10], selector);
w[30] = __byte_perm_S (w[ 8], w[ 9], selector);
w[29] = __byte_perm_S (w[ 7], w[ 8], selector);
w[28] = __byte_perm_S (w[ 6], w[ 7], selector);
w[27] = __byte_perm_S (w[ 5], w[ 6], selector);
w[26] = __byte_perm_S (w[ 4], w[ 5], selector);
w[25] = __byte_perm_S (w[ 3], w[ 4], selector);
w[24] = __byte_perm_S (w[ 2], w[ 3], selector);
w[23] = __byte_perm_S (w[ 1], w[ 2], selector);
w[22] = __byte_perm_S (w[ 0], w[ 1], selector);
w[21] = __byte_perm_S ( 0, w[ 0], selector);
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 22:
w[63] = __byte_perm_S (w[40], w[41], selector);
w[62] = __byte_perm_S (w[39], w[40], selector);
w[61] = __byte_perm_S (w[38], w[39], selector);
w[60] = __byte_perm_S (w[37], w[38], selector);
w[59] = __byte_perm_S (w[36], w[37], selector);
w[58] = __byte_perm_S (w[35], w[36], selector);
w[57] = __byte_perm_S (w[34], w[35], selector);
w[56] = __byte_perm_S (w[33], w[34], selector);
w[55] = __byte_perm_S (w[32], w[33], selector);
w[54] = __byte_perm_S (w[31], w[32], selector);
w[53] = __byte_perm_S (w[30], w[31], selector);
w[52] = __byte_perm_S (w[29], w[30], selector);
w[51] = __byte_perm_S (w[28], w[29], selector);
w[50] = __byte_perm_S (w[27], w[28], selector);
w[49] = __byte_perm_S (w[26], w[27], selector);
w[48] = __byte_perm_S (w[25], w[26], selector);
w[47] = __byte_perm_S (w[24], w[25], selector);
w[46] = __byte_perm_S (w[23], w[24], selector);
w[45] = __byte_perm_S (w[22], w[23], selector);
w[44] = __byte_perm_S (w[21], w[22], selector);
w[43] = __byte_perm_S (w[20], w[21], selector);
w[42] = __byte_perm_S (w[19], w[20], selector);
w[41] = __byte_perm_S (w[18], w[19], selector);
w[40] = __byte_perm_S (w[17], w[18], selector);
w[39] = __byte_perm_S (w[16], w[17], selector);
w[38] = __byte_perm_S (w[15], w[16], selector);
w[37] = __byte_perm_S (w[14], w[15], selector);
w[36] = __byte_perm_S (w[13], w[14], selector);
w[35] = __byte_perm_S (w[12], w[13], selector);
w[34] = __byte_perm_S (w[11], w[12], selector);
w[33] = __byte_perm_S (w[10], w[11], selector);
w[32] = __byte_perm_S (w[ 9], w[10], selector);
w[31] = __byte_perm_S (w[ 8], w[ 9], selector);
w[30] = __byte_perm_S (w[ 7], w[ 8], selector);
w[29] = __byte_perm_S (w[ 6], w[ 7], selector);
w[28] = __byte_perm_S (w[ 5], w[ 6], selector);
w[27] = __byte_perm_S (w[ 4], w[ 5], selector);
w[26] = __byte_perm_S (w[ 3], w[ 4], selector);
w[25] = __byte_perm_S (w[ 2], w[ 3], selector);
w[24] = __byte_perm_S (w[ 1], w[ 2], selector);
w[23] = __byte_perm_S (w[ 0], w[ 1], selector);
w[22] = __byte_perm_S ( 0, w[ 0], selector);
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 23:
w[63] = __byte_perm_S (w[39], w[40], selector);
w[62] = __byte_perm_S (w[38], w[39], selector);
w[61] = __byte_perm_S (w[37], w[38], selector);
w[60] = __byte_perm_S (w[36], w[37], selector);
w[59] = __byte_perm_S (w[35], w[36], selector);
w[58] = __byte_perm_S (w[34], w[35], selector);
w[57] = __byte_perm_S (w[33], w[34], selector);
w[56] = __byte_perm_S (w[32], w[33], selector);
w[55] = __byte_perm_S (w[31], w[32], selector);
w[54] = __byte_perm_S (w[30], w[31], selector);
w[53] = __byte_perm_S (w[29], w[30], selector);
w[52] = __byte_perm_S (w[28], w[29], selector);
w[51] = __byte_perm_S (w[27], w[28], selector);
w[50] = __byte_perm_S (w[26], w[27], selector);
w[49] = __byte_perm_S (w[25], w[26], selector);
w[48] = __byte_perm_S (w[24], w[25], selector);
w[47] = __byte_perm_S (w[23], w[24], selector);
w[46] = __byte_perm_S (w[22], w[23], selector);
w[45] = __byte_perm_S (w[21], w[22], selector);
w[44] = __byte_perm_S (w[20], w[21], selector);
w[43] = __byte_perm_S (w[19], w[20], selector);
w[42] = __byte_perm_S (w[18], w[19], selector);
w[41] = __byte_perm_S (w[17], w[18], selector);
w[40] = __byte_perm_S (w[16], w[17], selector);
w[39] = __byte_perm_S (w[15], w[16], selector);
w[38] = __byte_perm_S (w[14], w[15], selector);
w[37] = __byte_perm_S (w[13], w[14], selector);
w[36] = __byte_perm_S (w[12], w[13], selector);
w[35] = __byte_perm_S (w[11], w[12], selector);
w[34] = __byte_perm_S (w[10], w[11], selector);
w[33] = __byte_perm_S (w[ 9], w[10], selector);
w[32] = __byte_perm_S (w[ 8], w[ 9], selector);
w[31] = __byte_perm_S (w[ 7], w[ 8], selector);
w[30] = __byte_perm_S (w[ 6], w[ 7], selector);
w[29] = __byte_perm_S (w[ 5], w[ 6], selector);
w[28] = __byte_perm_S (w[ 4], w[ 5], selector);
w[27] = __byte_perm_S (w[ 3], w[ 4], selector);
w[26] = __byte_perm_S (w[ 2], w[ 3], selector);
w[25] = __byte_perm_S (w[ 1], w[ 2], selector);
w[24] = __byte_perm_S (w[ 0], w[ 1], selector);
w[23] = __byte_perm_S ( 0, w[ 0], selector);
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 24:
w[63] = __byte_perm_S (w[38], w[39], selector);
w[62] = __byte_perm_S (w[37], w[38], selector);
w[61] = __byte_perm_S (w[36], w[37], selector);
w[60] = __byte_perm_S (w[35], w[36], selector);
w[59] = __byte_perm_S (w[34], w[35], selector);
w[58] = __byte_perm_S (w[33], w[34], selector);
w[57] = __byte_perm_S (w[32], w[33], selector);
w[56] = __byte_perm_S (w[31], w[32], selector);
w[55] = __byte_perm_S (w[30], w[31], selector);
w[54] = __byte_perm_S (w[29], w[30], selector);
w[53] = __byte_perm_S (w[28], w[29], selector);
w[52] = __byte_perm_S (w[27], w[28], selector);
w[51] = __byte_perm_S (w[26], w[27], selector);
w[50] = __byte_perm_S (w[25], w[26], selector);
w[49] = __byte_perm_S (w[24], w[25], selector);
w[48] = __byte_perm_S (w[23], w[24], selector);
w[47] = __byte_perm_S (w[22], w[23], selector);
w[46] = __byte_perm_S (w[21], w[22], selector);
w[45] = __byte_perm_S (w[20], w[21], selector);
w[44] = __byte_perm_S (w[19], w[20], selector);
w[43] = __byte_perm_S (w[18], w[19], selector);
w[42] = __byte_perm_S (w[17], w[18], selector);
w[41] = __byte_perm_S (w[16], w[17], selector);
w[40] = __byte_perm_S (w[15], w[16], selector);
w[39] = __byte_perm_S (w[14], w[15], selector);
w[38] = __byte_perm_S (w[13], w[14], selector);
w[37] = __byte_perm_S (w[12], w[13], selector);
w[36] = __byte_perm_S (w[11], w[12], selector);
w[35] = __byte_perm_S (w[10], w[11], selector);
w[34] = __byte_perm_S (w[ 9], w[10], selector);
w[33] = __byte_perm_S (w[ 8], w[ 9], selector);
w[32] = __byte_perm_S (w[ 7], w[ 8], selector);
w[31] = __byte_perm_S (w[ 6], w[ 7], selector);
w[30] = __byte_perm_S (w[ 5], w[ 6], selector);
w[29] = __byte_perm_S (w[ 4], w[ 5], selector);
w[28] = __byte_perm_S (w[ 3], w[ 4], selector);
w[27] = __byte_perm_S (w[ 2], w[ 3], selector);
w[26] = __byte_perm_S (w[ 1], w[ 2], selector);
w[25] = __byte_perm_S (w[ 0], w[ 1], selector);
w[24] = __byte_perm_S ( 0, w[ 0], selector);
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 25:
w[63] = __byte_perm_S (w[37], w[38], selector);
w[62] = __byte_perm_S (w[36], w[37], selector);
w[61] = __byte_perm_S (w[35], w[36], selector);
w[60] = __byte_perm_S (w[34], w[35], selector);
w[59] = __byte_perm_S (w[33], w[34], selector);
w[58] = __byte_perm_S (w[32], w[33], selector);
w[57] = __byte_perm_S (w[31], w[32], selector);
w[56] = __byte_perm_S (w[30], w[31], selector);
w[55] = __byte_perm_S (w[29], w[30], selector);
w[54] = __byte_perm_S (w[28], w[29], selector);
w[53] = __byte_perm_S (w[27], w[28], selector);
w[52] = __byte_perm_S (w[26], w[27], selector);
w[51] = __byte_perm_S (w[25], w[26], selector);
w[50] = __byte_perm_S (w[24], w[25], selector);
w[49] = __byte_perm_S (w[23], w[24], selector);
w[48] = __byte_perm_S (w[22], w[23], selector);
w[47] = __byte_perm_S (w[21], w[22], selector);
w[46] = __byte_perm_S (w[20], w[21], selector);
w[45] = __byte_perm_S (w[19], w[20], selector);
w[44] = __byte_perm_S (w[18], w[19], selector);
w[43] = __byte_perm_S (w[17], w[18], selector);
w[42] = __byte_perm_S (w[16], w[17], selector);
w[41] = __byte_perm_S (w[15], w[16], selector);
w[40] = __byte_perm_S (w[14], w[15], selector);
w[39] = __byte_perm_S (w[13], w[14], selector);
w[38] = __byte_perm_S (w[12], w[13], selector);
w[37] = __byte_perm_S (w[11], w[12], selector);
w[36] = __byte_perm_S (w[10], w[11], selector);
w[35] = __byte_perm_S (w[ 9], w[10], selector);
w[34] = __byte_perm_S (w[ 8], w[ 9], selector);
w[33] = __byte_perm_S (w[ 7], w[ 8], selector);
w[32] = __byte_perm_S (w[ 6], w[ 7], selector);
w[31] = __byte_perm_S (w[ 5], w[ 6], selector);
w[30] = __byte_perm_S (w[ 4], w[ 5], selector);
w[29] = __byte_perm_S (w[ 3], w[ 4], selector);
w[28] = __byte_perm_S (w[ 2], w[ 3], selector);
w[27] = __byte_perm_S (w[ 1], w[ 2], selector);
w[26] = __byte_perm_S (w[ 0], w[ 1], selector);
w[25] = __byte_perm_S ( 0, w[ 0], selector);
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 26:
w[63] = __byte_perm_S (w[36], w[37], selector);
w[62] = __byte_perm_S (w[35], w[36], selector);
w[61] = __byte_perm_S (w[34], w[35], selector);
w[60] = __byte_perm_S (w[33], w[34], selector);
w[59] = __byte_perm_S (w[32], w[33], selector);
w[58] = __byte_perm_S (w[31], w[32], selector);
w[57] = __byte_perm_S (w[30], w[31], selector);
w[56] = __byte_perm_S (w[29], w[30], selector);
w[55] = __byte_perm_S (w[28], w[29], selector);
w[54] = __byte_perm_S (w[27], w[28], selector);
w[53] = __byte_perm_S (w[26], w[27], selector);
w[52] = __byte_perm_S (w[25], w[26], selector);
w[51] = __byte_perm_S (w[24], w[25], selector);
w[50] = __byte_perm_S (w[23], w[24], selector);
w[49] = __byte_perm_S (w[22], w[23], selector);
w[48] = __byte_perm_S (w[21], w[22], selector);
w[47] = __byte_perm_S (w[20], w[21], selector);
w[46] = __byte_perm_S (w[19], w[20], selector);
w[45] = __byte_perm_S (w[18], w[19], selector);
w[44] = __byte_perm_S (w[17], w[18], selector);
w[43] = __byte_perm_S (w[16], w[17], selector);
w[42] = __byte_perm_S (w[15], w[16], selector);
w[41] = __byte_perm_S (w[14], w[15], selector);
w[40] = __byte_perm_S (w[13], w[14], selector);
w[39] = __byte_perm_S (w[12], w[13], selector);
w[38] = __byte_perm_S (w[11], w[12], selector);
w[37] = __byte_perm_S (w[10], w[11], selector);
w[36] = __byte_perm_S (w[ 9], w[10], selector);
w[35] = __byte_perm_S (w[ 8], w[ 9], selector);
w[34] = __byte_perm_S (w[ 7], w[ 8], selector);
w[33] = __byte_perm_S (w[ 6], w[ 7], selector);
w[32] = __byte_perm_S (w[ 5], w[ 6], selector);
w[31] = __byte_perm_S (w[ 4], w[ 5], selector);
w[30] = __byte_perm_S (w[ 3], w[ 4], selector);
w[29] = __byte_perm_S (w[ 2], w[ 3], selector);
w[28] = __byte_perm_S (w[ 1], w[ 2], selector);
w[27] = __byte_perm_S (w[ 0], w[ 1], selector);
w[26] = __byte_perm_S ( 0, w[ 0], selector);
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 27:
w[63] = __byte_perm_S (w[35], w[36], selector);
w[62] = __byte_perm_S (w[34], w[35], selector);
w[61] = __byte_perm_S (w[33], w[34], selector);
w[60] = __byte_perm_S (w[32], w[33], selector);
w[59] = __byte_perm_S (w[31], w[32], selector);
w[58] = __byte_perm_S (w[30], w[31], selector);
w[57] = __byte_perm_S (w[29], w[30], selector);
w[56] = __byte_perm_S (w[28], w[29], selector);
w[55] = __byte_perm_S (w[27], w[28], selector);
w[54] = __byte_perm_S (w[26], w[27], selector);
w[53] = __byte_perm_S (w[25], w[26], selector);
w[52] = __byte_perm_S (w[24], w[25], selector);
w[51] = __byte_perm_S (w[23], w[24], selector);
w[50] = __byte_perm_S (w[22], w[23], selector);
w[49] = __byte_perm_S (w[21], w[22], selector);
w[48] = __byte_perm_S (w[20], w[21], selector);
w[47] = __byte_perm_S (w[19], w[20], selector);
w[46] = __byte_perm_S (w[18], w[19], selector);
w[45] = __byte_perm_S (w[17], w[18], selector);
w[44] = __byte_perm_S (w[16], w[17], selector);
w[43] = __byte_perm_S (w[15], w[16], selector);
w[42] = __byte_perm_S (w[14], w[15], selector);
w[41] = __byte_perm_S (w[13], w[14], selector);
w[40] = __byte_perm_S (w[12], w[13], selector);
w[39] = __byte_perm_S (w[11], w[12], selector);
w[38] = __byte_perm_S (w[10], w[11], selector);
w[37] = __byte_perm_S (w[ 9], w[10], selector);
w[36] = __byte_perm_S (w[ 8], w[ 9], selector);
w[35] = __byte_perm_S (w[ 7], w[ 8], selector);
w[34] = __byte_perm_S (w[ 6], w[ 7], selector);
w[33] = __byte_perm_S (w[ 5], w[ 6], selector);
w[32] = __byte_perm_S (w[ 4], w[ 5], selector);
w[31] = __byte_perm_S (w[ 3], w[ 4], selector);
w[30] = __byte_perm_S (w[ 2], w[ 3], selector);
w[29] = __byte_perm_S (w[ 1], w[ 2], selector);
w[28] = __byte_perm_S (w[ 0], w[ 1], selector);
w[27] = __byte_perm_S ( 0, w[ 0], selector);
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 28:
w[63] = __byte_perm_S (w[34], w[35], selector);
w[62] = __byte_perm_S (w[33], w[34], selector);
w[61] = __byte_perm_S (w[32], w[33], selector);
w[60] = __byte_perm_S (w[31], w[32], selector);
w[59] = __byte_perm_S (w[30], w[31], selector);
w[58] = __byte_perm_S (w[29], w[30], selector);
w[57] = __byte_perm_S (w[28], w[29], selector);
w[56] = __byte_perm_S (w[27], w[28], selector);
w[55] = __byte_perm_S (w[26], w[27], selector);
w[54] = __byte_perm_S (w[25], w[26], selector);
w[53] = __byte_perm_S (w[24], w[25], selector);
w[52] = __byte_perm_S (w[23], w[24], selector);
w[51] = __byte_perm_S (w[22], w[23], selector);
w[50] = __byte_perm_S (w[21], w[22], selector);
w[49] = __byte_perm_S (w[20], w[21], selector);
w[48] = __byte_perm_S (w[19], w[20], selector);
w[47] = __byte_perm_S (w[18], w[19], selector);
w[46] = __byte_perm_S (w[17], w[18], selector);
w[45] = __byte_perm_S (w[16], w[17], selector);
w[44] = __byte_perm_S (w[15], w[16], selector);
w[43] = __byte_perm_S (w[14], w[15], selector);
w[42] = __byte_perm_S (w[13], w[14], selector);
w[41] = __byte_perm_S (w[12], w[13], selector);
w[40] = __byte_perm_S (w[11], w[12], selector);
w[39] = __byte_perm_S (w[10], w[11], selector);
w[38] = __byte_perm_S (w[ 9], w[10], selector);
w[37] = __byte_perm_S (w[ 8], w[ 9], selector);
w[36] = __byte_perm_S (w[ 7], w[ 8], selector);
w[35] = __byte_perm_S (w[ 6], w[ 7], selector);
w[34] = __byte_perm_S (w[ 5], w[ 6], selector);
w[33] = __byte_perm_S (w[ 4], w[ 5], selector);
w[32] = __byte_perm_S (w[ 3], w[ 4], selector);
w[31] = __byte_perm_S (w[ 2], w[ 3], selector);
w[30] = __byte_perm_S (w[ 1], w[ 2], selector);
w[29] = __byte_perm_S (w[ 0], w[ 1], selector);
w[28] = __byte_perm_S ( 0, w[ 0], selector);
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 29:
w[63] = __byte_perm_S (w[33], w[34], selector);
w[62] = __byte_perm_S (w[32], w[33], selector);
w[61] = __byte_perm_S (w[31], w[32], selector);
w[60] = __byte_perm_S (w[30], w[31], selector);
w[59] = __byte_perm_S (w[29], w[30], selector);
w[58] = __byte_perm_S (w[28], w[29], selector);
w[57] = __byte_perm_S (w[27], w[28], selector);
w[56] = __byte_perm_S (w[26], w[27], selector);
w[55] = __byte_perm_S (w[25], w[26], selector);
w[54] = __byte_perm_S (w[24], w[25], selector);
w[53] = __byte_perm_S (w[23], w[24], selector);
w[52] = __byte_perm_S (w[22], w[23], selector);
w[51] = __byte_perm_S (w[21], w[22], selector);
w[50] = __byte_perm_S (w[20], w[21], selector);
w[49] = __byte_perm_S (w[19], w[20], selector);
w[48] = __byte_perm_S (w[18], w[19], selector);
w[47] = __byte_perm_S (w[17], w[18], selector);
w[46] = __byte_perm_S (w[16], w[17], selector);
w[45] = __byte_perm_S (w[15], w[16], selector);
w[44] = __byte_perm_S (w[14], w[15], selector);
w[43] = __byte_perm_S (w[13], w[14], selector);
w[42] = __byte_perm_S (w[12], w[13], selector);
w[41] = __byte_perm_S (w[11], w[12], selector);
w[40] = __byte_perm_S (w[10], w[11], selector);
w[39] = __byte_perm_S (w[ 9], w[10], selector);
w[38] = __byte_perm_S (w[ 8], w[ 9], selector);
w[37] = __byte_perm_S (w[ 7], w[ 8], selector);
w[36] = __byte_perm_S (w[ 6], w[ 7], selector);
w[35] = __byte_perm_S (w[ 5], w[ 6], selector);
w[34] = __byte_perm_S (w[ 4], w[ 5], selector);
w[33] = __byte_perm_S (w[ 3], w[ 4], selector);
w[32] = __byte_perm_S (w[ 2], w[ 3], selector);
w[31] = __byte_perm_S (w[ 1], w[ 2], selector);
w[30] = __byte_perm_S (w[ 0], w[ 1], selector);
w[29] = __byte_perm_S ( 0, w[ 0], selector);
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 30:
w[63] = __byte_perm_S (w[32], w[33], selector);
w[62] = __byte_perm_S (w[31], w[32], selector);
w[61] = __byte_perm_S (w[30], w[31], selector);
w[60] = __byte_perm_S (w[29], w[30], selector);
w[59] = __byte_perm_S (w[28], w[29], selector);
w[58] = __byte_perm_S (w[27], w[28], selector);
w[57] = __byte_perm_S (w[26], w[27], selector);
w[56] = __byte_perm_S (w[25], w[26], selector);
w[55] = __byte_perm_S (w[24], w[25], selector);
w[54] = __byte_perm_S (w[23], w[24], selector);
w[53] = __byte_perm_S (w[22], w[23], selector);
w[52] = __byte_perm_S (w[21], w[22], selector);
w[51] = __byte_perm_S (w[20], w[21], selector);
w[50] = __byte_perm_S (w[19], w[20], selector);
w[49] = __byte_perm_S (w[18], w[19], selector);
w[48] = __byte_perm_S (w[17], w[18], selector);
w[47] = __byte_perm_S (w[16], w[17], selector);
w[46] = __byte_perm_S (w[15], w[16], selector);
w[45] = __byte_perm_S (w[14], w[15], selector);
w[44] = __byte_perm_S (w[13], w[14], selector);
w[43] = __byte_perm_S (w[12], w[13], selector);
w[42] = __byte_perm_S (w[11], w[12], selector);
w[41] = __byte_perm_S (w[10], w[11], selector);
w[40] = __byte_perm_S (w[ 9], w[10], selector);
w[39] = __byte_perm_S (w[ 8], w[ 9], selector);
w[38] = __byte_perm_S (w[ 7], w[ 8], selector);
w[37] = __byte_perm_S (w[ 6], w[ 7], selector);
w[36] = __byte_perm_S (w[ 5], w[ 6], selector);
w[35] = __byte_perm_S (w[ 4], w[ 5], selector);
w[34] = __byte_perm_S (w[ 3], w[ 4], selector);
w[33] = __byte_perm_S (w[ 2], w[ 3], selector);
w[32] = __byte_perm_S (w[ 1], w[ 2], selector);
w[31] = __byte_perm_S (w[ 0], w[ 1], selector);
w[30] = __byte_perm_S ( 0, w[ 0], selector);
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 31:
w[63] = __byte_perm_S (w[31], w[32], selector);
w[62] = __byte_perm_S (w[30], w[31], selector);
w[61] = __byte_perm_S (w[29], w[30], selector);
w[60] = __byte_perm_S (w[28], w[29], selector);
w[59] = __byte_perm_S (w[27], w[28], selector);
w[58] = __byte_perm_S (w[26], w[27], selector);
w[57] = __byte_perm_S (w[25], w[26], selector);
w[56] = __byte_perm_S (w[24], w[25], selector);
w[55] = __byte_perm_S (w[23], w[24], selector);
w[54] = __byte_perm_S (w[22], w[23], selector);
w[53] = __byte_perm_S (w[21], w[22], selector);
w[52] = __byte_perm_S (w[20], w[21], selector);
w[51] = __byte_perm_S (w[19], w[20], selector);
w[50] = __byte_perm_S (w[18], w[19], selector);
w[49] = __byte_perm_S (w[17], w[18], selector);
w[48] = __byte_perm_S (w[16], w[17], selector);
w[47] = __byte_perm_S (w[15], w[16], selector);
w[46] = __byte_perm_S (w[14], w[15], selector);
w[45] = __byte_perm_S (w[13], w[14], selector);
w[44] = __byte_perm_S (w[12], w[13], selector);
w[43] = __byte_perm_S (w[11], w[12], selector);
w[42] = __byte_perm_S (w[10], w[11], selector);
w[41] = __byte_perm_S (w[ 9], w[10], selector);
w[40] = __byte_perm_S (w[ 8], w[ 9], selector);
w[39] = __byte_perm_S (w[ 7], w[ 8], selector);
w[38] = __byte_perm_S (w[ 6], w[ 7], selector);
w[37] = __byte_perm_S (w[ 5], w[ 6], selector);
w[36] = __byte_perm_S (w[ 4], w[ 5], selector);
w[35] = __byte_perm_S (w[ 3], w[ 4], selector);
w[34] = __byte_perm_S (w[ 2], w[ 3], selector);
w[33] = __byte_perm_S (w[ 1], w[ 2], selector);
w[32] = __byte_perm_S (w[ 0], w[ 1], selector);
w[31] = __byte_perm_S ( 0, w[ 0], selector);
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 32:
w[63] = __byte_perm_S (w[30], w[31], selector);
w[62] = __byte_perm_S (w[29], w[30], selector);
w[61] = __byte_perm_S (w[28], w[29], selector);
w[60] = __byte_perm_S (w[27], w[28], selector);
w[59] = __byte_perm_S (w[26], w[27], selector);
w[58] = __byte_perm_S (w[25], w[26], selector);
w[57] = __byte_perm_S (w[24], w[25], selector);
w[56] = __byte_perm_S (w[23], w[24], selector);
w[55] = __byte_perm_S (w[22], w[23], selector);
w[54] = __byte_perm_S (w[21], w[22], selector);
w[53] = __byte_perm_S (w[20], w[21], selector);
w[52] = __byte_perm_S (w[19], w[20], selector);
w[51] = __byte_perm_S (w[18], w[19], selector);
w[50] = __byte_perm_S (w[17], w[18], selector);
w[49] = __byte_perm_S (w[16], w[17], selector);
w[48] = __byte_perm_S (w[15], w[16], selector);
w[47] = __byte_perm_S (w[14], w[15], selector);
w[46] = __byte_perm_S (w[13], w[14], selector);
w[45] = __byte_perm_S (w[12], w[13], selector);
w[44] = __byte_perm_S (w[11], w[12], selector);
w[43] = __byte_perm_S (w[10], w[11], selector);
w[42] = __byte_perm_S (w[ 9], w[10], selector);
w[41] = __byte_perm_S (w[ 8], w[ 9], selector);
w[40] = __byte_perm_S (w[ 7], w[ 8], selector);
w[39] = __byte_perm_S (w[ 6], w[ 7], selector);
w[38] = __byte_perm_S (w[ 5], w[ 6], selector);
w[37] = __byte_perm_S (w[ 4], w[ 5], selector);
w[36] = __byte_perm_S (w[ 3], w[ 4], selector);
w[35] = __byte_perm_S (w[ 2], w[ 3], selector);
w[34] = __byte_perm_S (w[ 1], w[ 2], selector);
w[33] = __byte_perm_S (w[ 0], w[ 1], selector);
w[32] = __byte_perm_S ( 0, w[ 0], selector);
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 33:
w[63] = __byte_perm_S (w[29], w[30], selector);
w[62] = __byte_perm_S (w[28], w[29], selector);
w[61] = __byte_perm_S (w[27], w[28], selector);
w[60] = __byte_perm_S (w[26], w[27], selector);
w[59] = __byte_perm_S (w[25], w[26], selector);
w[58] = __byte_perm_S (w[24], w[25], selector);
w[57] = __byte_perm_S (w[23], w[24], selector);
w[56] = __byte_perm_S (w[22], w[23], selector);
w[55] = __byte_perm_S (w[21], w[22], selector);
w[54] = __byte_perm_S (w[20], w[21], selector);
w[53] = __byte_perm_S (w[19], w[20], selector);
w[52] = __byte_perm_S (w[18], w[19], selector);
w[51] = __byte_perm_S (w[17], w[18], selector);
w[50] = __byte_perm_S (w[16], w[17], selector);
w[49] = __byte_perm_S (w[15], w[16], selector);
w[48] = __byte_perm_S (w[14], w[15], selector);
w[47] = __byte_perm_S (w[13], w[14], selector);
w[46] = __byte_perm_S (w[12], w[13], selector);
w[45] = __byte_perm_S (w[11], w[12], selector);
w[44] = __byte_perm_S (w[10], w[11], selector);
w[43] = __byte_perm_S (w[ 9], w[10], selector);
w[42] = __byte_perm_S (w[ 8], w[ 9], selector);
w[41] = __byte_perm_S (w[ 7], w[ 8], selector);
w[40] = __byte_perm_S (w[ 6], w[ 7], selector);
w[39] = __byte_perm_S (w[ 5], w[ 6], selector);
w[38] = __byte_perm_S (w[ 4], w[ 5], selector);
w[37] = __byte_perm_S (w[ 3], w[ 4], selector);
w[36] = __byte_perm_S (w[ 2], w[ 3], selector);
w[35] = __byte_perm_S (w[ 1], w[ 2], selector);
w[34] = __byte_perm_S (w[ 0], w[ 1], selector);
w[33] = __byte_perm_S ( 0, w[ 0], selector);
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 34:
w[63] = __byte_perm_S (w[28], w[29], selector);
w[62] = __byte_perm_S (w[27], w[28], selector);
w[61] = __byte_perm_S (w[26], w[27], selector);
w[60] = __byte_perm_S (w[25], w[26], selector);
w[59] = __byte_perm_S (w[24], w[25], selector);
w[58] = __byte_perm_S (w[23], w[24], selector);
w[57] = __byte_perm_S (w[22], w[23], selector);
w[56] = __byte_perm_S (w[21], w[22], selector);
w[55] = __byte_perm_S (w[20], w[21], selector);
w[54] = __byte_perm_S (w[19], w[20], selector);
w[53] = __byte_perm_S (w[18], w[19], selector);
w[52] = __byte_perm_S (w[17], w[18], selector);
w[51] = __byte_perm_S (w[16], w[17], selector);
w[50] = __byte_perm_S (w[15], w[16], selector);
w[49] = __byte_perm_S (w[14], w[15], selector);
w[48] = __byte_perm_S (w[13], w[14], selector);
w[47] = __byte_perm_S (w[12], w[13], selector);
w[46] = __byte_perm_S (w[11], w[12], selector);
w[45] = __byte_perm_S (w[10], w[11], selector);
w[44] = __byte_perm_S (w[ 9], w[10], selector);
w[43] = __byte_perm_S (w[ 8], w[ 9], selector);
w[42] = __byte_perm_S (w[ 7], w[ 8], selector);
w[41] = __byte_perm_S (w[ 6], w[ 7], selector);
w[40] = __byte_perm_S (w[ 5], w[ 6], selector);
w[39] = __byte_perm_S (w[ 4], w[ 5], selector);
w[38] = __byte_perm_S (w[ 3], w[ 4], selector);
w[37] = __byte_perm_S (w[ 2], w[ 3], selector);
w[36] = __byte_perm_S (w[ 1], w[ 2], selector);
w[35] = __byte_perm_S (w[ 0], w[ 1], selector);
w[34] = __byte_perm_S ( 0, w[ 0], selector);
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 35:
w[63] = __byte_perm_S (w[27], w[28], selector);
w[62] = __byte_perm_S (w[26], w[27], selector);
w[61] = __byte_perm_S (w[25], w[26], selector);
w[60] = __byte_perm_S (w[24], w[25], selector);
w[59] = __byte_perm_S (w[23], w[24], selector);
w[58] = __byte_perm_S (w[22], w[23], selector);
w[57] = __byte_perm_S (w[21], w[22], selector);
w[56] = __byte_perm_S (w[20], w[21], selector);
w[55] = __byte_perm_S (w[19], w[20], selector);
w[54] = __byte_perm_S (w[18], w[19], selector);
w[53] = __byte_perm_S (w[17], w[18], selector);
w[52] = __byte_perm_S (w[16], w[17], selector);
w[51] = __byte_perm_S (w[15], w[16], selector);
w[50] = __byte_perm_S (w[14], w[15], selector);
w[49] = __byte_perm_S (w[13], w[14], selector);
w[48] = __byte_perm_S (w[12], w[13], selector);
w[47] = __byte_perm_S (w[11], w[12], selector);
w[46] = __byte_perm_S (w[10], w[11], selector);
w[45] = __byte_perm_S (w[ 9], w[10], selector);
w[44] = __byte_perm_S (w[ 8], w[ 9], selector);
w[43] = __byte_perm_S (w[ 7], w[ 8], selector);
w[42] = __byte_perm_S (w[ 6], w[ 7], selector);
w[41] = __byte_perm_S (w[ 5], w[ 6], selector);
w[40] = __byte_perm_S (w[ 4], w[ 5], selector);
w[39] = __byte_perm_S (w[ 3], w[ 4], selector);
w[38] = __byte_perm_S (w[ 2], w[ 3], selector);
w[37] = __byte_perm_S (w[ 1], w[ 2], selector);
w[36] = __byte_perm_S (w[ 0], w[ 1], selector);
w[35] = __byte_perm_S ( 0, w[ 0], selector);
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 36:
w[63] = __byte_perm_S (w[26], w[27], selector);
w[62] = __byte_perm_S (w[25], w[26], selector);
w[61] = __byte_perm_S (w[24], w[25], selector);
w[60] = __byte_perm_S (w[23], w[24], selector);
w[59] = __byte_perm_S (w[22], w[23], selector);
w[58] = __byte_perm_S (w[21], w[22], selector);
w[57] = __byte_perm_S (w[20], w[21], selector);
w[56] = __byte_perm_S (w[19], w[20], selector);
w[55] = __byte_perm_S (w[18], w[19], selector);
w[54] = __byte_perm_S (w[17], w[18], selector);
w[53] = __byte_perm_S (w[16], w[17], selector);
w[52] = __byte_perm_S (w[15], w[16], selector);
w[51] = __byte_perm_S (w[14], w[15], selector);
w[50] = __byte_perm_S (w[13], w[14], selector);
w[49] = __byte_perm_S (w[12], w[13], selector);
w[48] = __byte_perm_S (w[11], w[12], selector);
w[47] = __byte_perm_S (w[10], w[11], selector);
w[46] = __byte_perm_S (w[ 9], w[10], selector);
w[45] = __byte_perm_S (w[ 8], w[ 9], selector);
w[44] = __byte_perm_S (w[ 7], w[ 8], selector);
w[43] = __byte_perm_S (w[ 6], w[ 7], selector);
w[42] = __byte_perm_S (w[ 5], w[ 6], selector);
w[41] = __byte_perm_S (w[ 4], w[ 5], selector);
w[40] = __byte_perm_S (w[ 3], w[ 4], selector);
w[39] = __byte_perm_S (w[ 2], w[ 3], selector);
w[38] = __byte_perm_S (w[ 1], w[ 2], selector);
w[37] = __byte_perm_S (w[ 0], w[ 1], selector);
w[36] = __byte_perm_S ( 0, w[ 0], selector);
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 37:
w[63] = __byte_perm_S (w[25], w[26], selector);
w[62] = __byte_perm_S (w[24], w[25], selector);
w[61] = __byte_perm_S (w[23], w[24], selector);
w[60] = __byte_perm_S (w[22], w[23], selector);
w[59] = __byte_perm_S (w[21], w[22], selector);
w[58] = __byte_perm_S (w[20], w[21], selector);
w[57] = __byte_perm_S (w[19], w[20], selector);
w[56] = __byte_perm_S (w[18], w[19], selector);
w[55] = __byte_perm_S (w[17], w[18], selector);
w[54] = __byte_perm_S (w[16], w[17], selector);
w[53] = __byte_perm_S (w[15], w[16], selector);
w[52] = __byte_perm_S (w[14], w[15], selector);
w[51] = __byte_perm_S (w[13], w[14], selector);
w[50] = __byte_perm_S (w[12], w[13], selector);
w[49] = __byte_perm_S (w[11], w[12], selector);
w[48] = __byte_perm_S (w[10], w[11], selector);
w[47] = __byte_perm_S (w[ 9], w[10], selector);
w[46] = __byte_perm_S (w[ 8], w[ 9], selector);
w[45] = __byte_perm_S (w[ 7], w[ 8], selector);
w[44] = __byte_perm_S (w[ 6], w[ 7], selector);
w[43] = __byte_perm_S (w[ 5], w[ 6], selector);
w[42] = __byte_perm_S (w[ 4], w[ 5], selector);
w[41] = __byte_perm_S (w[ 3], w[ 4], selector);
w[40] = __byte_perm_S (w[ 2], w[ 3], selector);
w[39] = __byte_perm_S (w[ 1], w[ 2], selector);
w[38] = __byte_perm_S (w[ 0], w[ 1], selector);
w[37] = __byte_perm_S ( 0, w[ 0], selector);
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 38:
w[63] = __byte_perm_S (w[24], w[25], selector);
w[62] = __byte_perm_S (w[23], w[24], selector);
w[61] = __byte_perm_S (w[22], w[23], selector);
w[60] = __byte_perm_S (w[21], w[22], selector);
w[59] = __byte_perm_S (w[20], w[21], selector);
w[58] = __byte_perm_S (w[19], w[20], selector);
w[57] = __byte_perm_S (w[18], w[19], selector);
w[56] = __byte_perm_S (w[17], w[18], selector);
w[55] = __byte_perm_S (w[16], w[17], selector);
w[54] = __byte_perm_S (w[15], w[16], selector);
w[53] = __byte_perm_S (w[14], w[15], selector);
w[52] = __byte_perm_S (w[13], w[14], selector);
w[51] = __byte_perm_S (w[12], w[13], selector);
w[50] = __byte_perm_S (w[11], w[12], selector);
w[49] = __byte_perm_S (w[10], w[11], selector);
w[48] = __byte_perm_S (w[ 9], w[10], selector);
w[47] = __byte_perm_S (w[ 8], w[ 9], selector);
w[46] = __byte_perm_S (w[ 7], w[ 8], selector);
w[45] = __byte_perm_S (w[ 6], w[ 7], selector);
w[44] = __byte_perm_S (w[ 5], w[ 6], selector);
w[43] = __byte_perm_S (w[ 4], w[ 5], selector);
w[42] = __byte_perm_S (w[ 3], w[ 4], selector);
w[41] = __byte_perm_S (w[ 2], w[ 3], selector);
w[40] = __byte_perm_S (w[ 1], w[ 2], selector);
w[39] = __byte_perm_S (w[ 0], w[ 1], selector);
w[38] = __byte_perm_S ( 0, w[ 0], selector);
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 39:
w[63] = __byte_perm_S (w[23], w[24], selector);
w[62] = __byte_perm_S (w[22], w[23], selector);
w[61] = __byte_perm_S (w[21], w[22], selector);
w[60] = __byte_perm_S (w[20], w[21], selector);
w[59] = __byte_perm_S (w[19], w[20], selector);
w[58] = __byte_perm_S (w[18], w[19], selector);
w[57] = __byte_perm_S (w[17], w[18], selector);
w[56] = __byte_perm_S (w[16], w[17], selector);
w[55] = __byte_perm_S (w[15], w[16], selector);
w[54] = __byte_perm_S (w[14], w[15], selector);
w[53] = __byte_perm_S (w[13], w[14], selector);
w[52] = __byte_perm_S (w[12], w[13], selector);
w[51] = __byte_perm_S (w[11], w[12], selector);
w[50] = __byte_perm_S (w[10], w[11], selector);
w[49] = __byte_perm_S (w[ 9], w[10], selector);
w[48] = __byte_perm_S (w[ 8], w[ 9], selector);
w[47] = __byte_perm_S (w[ 7], w[ 8], selector);
w[46] = __byte_perm_S (w[ 6], w[ 7], selector);
w[45] = __byte_perm_S (w[ 5], w[ 6], selector);
w[44] = __byte_perm_S (w[ 4], w[ 5], selector);
w[43] = __byte_perm_S (w[ 3], w[ 4], selector);
w[42] = __byte_perm_S (w[ 2], w[ 3], selector);
w[41] = __byte_perm_S (w[ 1], w[ 2], selector);
w[40] = __byte_perm_S (w[ 0], w[ 1], selector);
w[39] = __byte_perm_S ( 0, w[ 0], selector);
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 40:
w[63] = __byte_perm_S (w[22], w[23], selector);
w[62] = __byte_perm_S (w[21], w[22], selector);
w[61] = __byte_perm_S (w[20], w[21], selector);
w[60] = __byte_perm_S (w[19], w[20], selector);
w[59] = __byte_perm_S (w[18], w[19], selector);
w[58] = __byte_perm_S (w[17], w[18], selector);
w[57] = __byte_perm_S (w[16], w[17], selector);
w[56] = __byte_perm_S (w[15], w[16], selector);
w[55] = __byte_perm_S (w[14], w[15], selector);
w[54] = __byte_perm_S (w[13], w[14], selector);
w[53] = __byte_perm_S (w[12], w[13], selector);
w[52] = __byte_perm_S (w[11], w[12], selector);
w[51] = __byte_perm_S (w[10], w[11], selector);
w[50] = __byte_perm_S (w[ 9], w[10], selector);
w[49] = __byte_perm_S (w[ 8], w[ 9], selector);
w[48] = __byte_perm_S (w[ 7], w[ 8], selector);
w[47] = __byte_perm_S (w[ 6], w[ 7], selector);
w[46] = __byte_perm_S (w[ 5], w[ 6], selector);
w[45] = __byte_perm_S (w[ 4], w[ 5], selector);
w[44] = __byte_perm_S (w[ 3], w[ 4], selector);
w[43] = __byte_perm_S (w[ 2], w[ 3], selector);
w[42] = __byte_perm_S (w[ 1], w[ 2], selector);
w[41] = __byte_perm_S (w[ 0], w[ 1], selector);
w[40] = __byte_perm_S ( 0, w[ 0], selector);
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 41:
w[63] = __byte_perm_S (w[21], w[22], selector);
w[62] = __byte_perm_S (w[20], w[21], selector);
w[61] = __byte_perm_S (w[19], w[20], selector);
w[60] = __byte_perm_S (w[18], w[19], selector);
w[59] = __byte_perm_S (w[17], w[18], selector);
w[58] = __byte_perm_S (w[16], w[17], selector);
w[57] = __byte_perm_S (w[15], w[16], selector);
w[56] = __byte_perm_S (w[14], w[15], selector);
w[55] = __byte_perm_S (w[13], w[14], selector);
w[54] = __byte_perm_S (w[12], w[13], selector);
w[53] = __byte_perm_S (w[11], w[12], selector);
w[52] = __byte_perm_S (w[10], w[11], selector);
w[51] = __byte_perm_S (w[ 9], w[10], selector);
w[50] = __byte_perm_S (w[ 8], w[ 9], selector);
w[49] = __byte_perm_S (w[ 7], w[ 8], selector);
w[48] = __byte_perm_S (w[ 6], w[ 7], selector);
w[47] = __byte_perm_S (w[ 5], w[ 6], selector);
w[46] = __byte_perm_S (w[ 4], w[ 5], selector);
w[45] = __byte_perm_S (w[ 3], w[ 4], selector);
w[44] = __byte_perm_S (w[ 2], w[ 3], selector);
w[43] = __byte_perm_S (w[ 1], w[ 2], selector);
w[42] = __byte_perm_S (w[ 0], w[ 1], selector);
w[41] = __byte_perm_S ( 0, w[ 0], selector);
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 42:
w[63] = __byte_perm_S (w[20], w[21], selector);
w[62] = __byte_perm_S (w[19], w[20], selector);
w[61] = __byte_perm_S (w[18], w[19], selector);
w[60] = __byte_perm_S (w[17], w[18], selector);
w[59] = __byte_perm_S (w[16], w[17], selector);
w[58] = __byte_perm_S (w[15], w[16], selector);
w[57] = __byte_perm_S (w[14], w[15], selector);
w[56] = __byte_perm_S (w[13], w[14], selector);
w[55] = __byte_perm_S (w[12], w[13], selector);
w[54] = __byte_perm_S (w[11], w[12], selector);
w[53] = __byte_perm_S (w[10], w[11], selector);
w[52] = __byte_perm_S (w[ 9], w[10], selector);
w[51] = __byte_perm_S (w[ 8], w[ 9], selector);
w[50] = __byte_perm_S (w[ 7], w[ 8], selector);
w[49] = __byte_perm_S (w[ 6], w[ 7], selector);
w[48] = __byte_perm_S (w[ 5], w[ 6], selector);
w[47] = __byte_perm_S (w[ 4], w[ 5], selector);
w[46] = __byte_perm_S (w[ 3], w[ 4], selector);
w[45] = __byte_perm_S (w[ 2], w[ 3], selector);
w[44] = __byte_perm_S (w[ 1], w[ 2], selector);
w[43] = __byte_perm_S (w[ 0], w[ 1], selector);
w[42] = __byte_perm_S ( 0, w[ 0], selector);
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 43:
w[63] = __byte_perm_S (w[19], w[20], selector);
w[62] = __byte_perm_S (w[18], w[19], selector);
w[61] = __byte_perm_S (w[17], w[18], selector);
w[60] = __byte_perm_S (w[16], w[17], selector);
w[59] = __byte_perm_S (w[15], w[16], selector);
w[58] = __byte_perm_S (w[14], w[15], selector);
w[57] = __byte_perm_S (w[13], w[14], selector);
w[56] = __byte_perm_S (w[12], w[13], selector);
w[55] = __byte_perm_S (w[11], w[12], selector);
w[54] = __byte_perm_S (w[10], w[11], selector);
w[53] = __byte_perm_S (w[ 9], w[10], selector);
w[52] = __byte_perm_S (w[ 8], w[ 9], selector);
w[51] = __byte_perm_S (w[ 7], w[ 8], selector);
w[50] = __byte_perm_S (w[ 6], w[ 7], selector);
w[49] = __byte_perm_S (w[ 5], w[ 6], selector);
w[48] = __byte_perm_S (w[ 4], w[ 5], selector);
w[47] = __byte_perm_S (w[ 3], w[ 4], selector);
w[46] = __byte_perm_S (w[ 2], w[ 3], selector);
w[45] = __byte_perm_S (w[ 1], w[ 2], selector);
w[44] = __byte_perm_S (w[ 0], w[ 1], selector);
w[43] = __byte_perm_S ( 0, w[ 0], selector);
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 44:
w[63] = __byte_perm_S (w[18], w[19], selector);
w[62] = __byte_perm_S (w[17], w[18], selector);
w[61] = __byte_perm_S (w[16], w[17], selector);
w[60] = __byte_perm_S (w[15], w[16], selector);
w[59] = __byte_perm_S (w[14], w[15], selector);
w[58] = __byte_perm_S (w[13], w[14], selector);
w[57] = __byte_perm_S (w[12], w[13], selector);
w[56] = __byte_perm_S (w[11], w[12], selector);
w[55] = __byte_perm_S (w[10], w[11], selector);
w[54] = __byte_perm_S (w[ 9], w[10], selector);
w[53] = __byte_perm_S (w[ 8], w[ 9], selector);
w[52] = __byte_perm_S (w[ 7], w[ 8], selector);
w[51] = __byte_perm_S (w[ 6], w[ 7], selector);
w[50] = __byte_perm_S (w[ 5], w[ 6], selector);
w[49] = __byte_perm_S (w[ 4], w[ 5], selector);
w[48] = __byte_perm_S (w[ 3], w[ 4], selector);
w[47] = __byte_perm_S (w[ 2], w[ 3], selector);
w[46] = __byte_perm_S (w[ 1], w[ 2], selector);
w[45] = __byte_perm_S (w[ 0], w[ 1], selector);
w[44] = __byte_perm_S ( 0, w[ 0], selector);
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 45:
w[63] = __byte_perm_S (w[17], w[18], selector);
w[62] = __byte_perm_S (w[16], w[17], selector);
w[61] = __byte_perm_S (w[15], w[16], selector);
w[60] = __byte_perm_S (w[14], w[15], selector);
w[59] = __byte_perm_S (w[13], w[14], selector);
w[58] = __byte_perm_S (w[12], w[13], selector);
w[57] = __byte_perm_S (w[11], w[12], selector);
w[56] = __byte_perm_S (w[10], w[11], selector);
w[55] = __byte_perm_S (w[ 9], w[10], selector);
w[54] = __byte_perm_S (w[ 8], w[ 9], selector);
w[53] = __byte_perm_S (w[ 7], w[ 8], selector);
w[52] = __byte_perm_S (w[ 6], w[ 7], selector);
w[51] = __byte_perm_S (w[ 5], w[ 6], selector);
w[50] = __byte_perm_S (w[ 4], w[ 5], selector);
w[49] = __byte_perm_S (w[ 3], w[ 4], selector);
w[48] = __byte_perm_S (w[ 2], w[ 3], selector);
w[47] = __byte_perm_S (w[ 1], w[ 2], selector);
w[46] = __byte_perm_S (w[ 0], w[ 1], selector);
w[45] = __byte_perm_S ( 0, w[ 0], selector);
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 46:
w[63] = __byte_perm_S (w[16], w[17], selector);
w[62] = __byte_perm_S (w[15], w[16], selector);
w[61] = __byte_perm_S (w[14], w[15], selector);
w[60] = __byte_perm_S (w[13], w[14], selector);
w[59] = __byte_perm_S (w[12], w[13], selector);
w[58] = __byte_perm_S (w[11], w[12], selector);
w[57] = __byte_perm_S (w[10], w[11], selector);
w[56] = __byte_perm_S (w[ 9], w[10], selector);
w[55] = __byte_perm_S (w[ 8], w[ 9], selector);
w[54] = __byte_perm_S (w[ 7], w[ 8], selector);
w[53] = __byte_perm_S (w[ 6], w[ 7], selector);
w[52] = __byte_perm_S (w[ 5], w[ 6], selector);
w[51] = __byte_perm_S (w[ 4], w[ 5], selector);
w[50] = __byte_perm_S (w[ 3], w[ 4], selector);
w[49] = __byte_perm_S (w[ 2], w[ 3], selector);
w[48] = __byte_perm_S (w[ 1], w[ 2], selector);
w[47] = __byte_perm_S (w[ 0], w[ 1], selector);
w[46] = __byte_perm_S ( 0, w[ 0], selector);
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 47:
w[63] = __byte_perm_S (w[15], w[16], selector);
w[62] = __byte_perm_S (w[14], w[15], selector);
w[61] = __byte_perm_S (w[13], w[14], selector);
w[60] = __byte_perm_S (w[12], w[13], selector);
w[59] = __byte_perm_S (w[11], w[12], selector);
w[58] = __byte_perm_S (w[10], w[11], selector);
w[57] = __byte_perm_S (w[ 9], w[10], selector);
w[56] = __byte_perm_S (w[ 8], w[ 9], selector);
w[55] = __byte_perm_S (w[ 7], w[ 8], selector);
w[54] = __byte_perm_S (w[ 6], w[ 7], selector);
w[53] = __byte_perm_S (w[ 5], w[ 6], selector);
w[52] = __byte_perm_S (w[ 4], w[ 5], selector);
w[51] = __byte_perm_S (w[ 3], w[ 4], selector);
w[50] = __byte_perm_S (w[ 2], w[ 3], selector);
w[49] = __byte_perm_S (w[ 1], w[ 2], selector);
w[48] = __byte_perm_S (w[ 0], w[ 1], selector);
w[47] = __byte_perm_S ( 0, w[ 0], selector);
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 48:
w[63] = __byte_perm_S (w[14], w[15], selector);
w[62] = __byte_perm_S (w[13], w[14], selector);
w[61] = __byte_perm_S (w[12], w[13], selector);
w[60] = __byte_perm_S (w[11], w[12], selector);
w[59] = __byte_perm_S (w[10], w[11], selector);
w[58] = __byte_perm_S (w[ 9], w[10], selector);
w[57] = __byte_perm_S (w[ 8], w[ 9], selector);
w[56] = __byte_perm_S (w[ 7], w[ 8], selector);
w[55] = __byte_perm_S (w[ 6], w[ 7], selector);
w[54] = __byte_perm_S (w[ 5], w[ 6], selector);
w[53] = __byte_perm_S (w[ 4], w[ 5], selector);
w[52] = __byte_perm_S (w[ 3], w[ 4], selector);
w[51] = __byte_perm_S (w[ 2], w[ 3], selector);
w[50] = __byte_perm_S (w[ 1], w[ 2], selector);
w[49] = __byte_perm_S (w[ 0], w[ 1], selector);
w[48] = __byte_perm_S ( 0, w[ 0], selector);
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 49:
w[63] = __byte_perm_S (w[13], w[14], selector);
w[62] = __byte_perm_S (w[12], w[13], selector);
w[61] = __byte_perm_S (w[11], w[12], selector);
w[60] = __byte_perm_S (w[10], w[11], selector);
w[59] = __byte_perm_S (w[ 9], w[10], selector);
w[58] = __byte_perm_S (w[ 8], w[ 9], selector);
w[57] = __byte_perm_S (w[ 7], w[ 8], selector);
w[56] = __byte_perm_S (w[ 6], w[ 7], selector);
w[55] = __byte_perm_S (w[ 5], w[ 6], selector);
w[54] = __byte_perm_S (w[ 4], w[ 5], selector);
w[53] = __byte_perm_S (w[ 3], w[ 4], selector);
w[52] = __byte_perm_S (w[ 2], w[ 3], selector);
w[51] = __byte_perm_S (w[ 1], w[ 2], selector);
w[50] = __byte_perm_S (w[ 0], w[ 1], selector);
w[49] = __byte_perm_S ( 0, w[ 0], selector);
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 50:
w[63] = __byte_perm_S (w[12], w[13], selector);
w[62] = __byte_perm_S (w[11], w[12], selector);
w[61] = __byte_perm_S (w[10], w[11], selector);
w[60] = __byte_perm_S (w[ 9], w[10], selector);
w[59] = __byte_perm_S (w[ 8], w[ 9], selector);
w[58] = __byte_perm_S (w[ 7], w[ 8], selector);
w[57] = __byte_perm_S (w[ 6], w[ 7], selector);
w[56] = __byte_perm_S (w[ 5], w[ 6], selector);
w[55] = __byte_perm_S (w[ 4], w[ 5], selector);
w[54] = __byte_perm_S (w[ 3], w[ 4], selector);
w[53] = __byte_perm_S (w[ 2], w[ 3], selector);
w[52] = __byte_perm_S (w[ 1], w[ 2], selector);
w[51] = __byte_perm_S (w[ 0], w[ 1], selector);
w[50] = __byte_perm_S ( 0, w[ 0], selector);
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 51:
w[63] = __byte_perm_S (w[11], w[12], selector);
w[62] = __byte_perm_S (w[10], w[11], selector);
w[61] = __byte_perm_S (w[ 9], w[10], selector);
w[60] = __byte_perm_S (w[ 8], w[ 9], selector);
w[59] = __byte_perm_S (w[ 7], w[ 8], selector);
w[58] = __byte_perm_S (w[ 6], w[ 7], selector);
w[57] = __byte_perm_S (w[ 5], w[ 6], selector);
w[56] = __byte_perm_S (w[ 4], w[ 5], selector);
w[55] = __byte_perm_S (w[ 3], w[ 4], selector);
w[54] = __byte_perm_S (w[ 2], w[ 3], selector);
w[53] = __byte_perm_S (w[ 1], w[ 2], selector);
w[52] = __byte_perm_S (w[ 0], w[ 1], selector);
w[51] = __byte_perm_S ( 0, w[ 0], selector);
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 52:
w[63] = __byte_perm_S (w[10], w[11], selector);
w[62] = __byte_perm_S (w[ 9], w[10], selector);
w[61] = __byte_perm_S (w[ 8], w[ 9], selector);
w[60] = __byte_perm_S (w[ 7], w[ 8], selector);
w[59] = __byte_perm_S (w[ 6], w[ 7], selector);
w[58] = __byte_perm_S (w[ 5], w[ 6], selector);
w[57] = __byte_perm_S (w[ 4], w[ 5], selector);
w[56] = __byte_perm_S (w[ 3], w[ 4], selector);
w[55] = __byte_perm_S (w[ 2], w[ 3], selector);
w[54] = __byte_perm_S (w[ 1], w[ 2], selector);
w[53] = __byte_perm_S (w[ 0], w[ 1], selector);
w[52] = __byte_perm_S ( 0, w[ 0], selector);
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 53:
w[63] = __byte_perm_S (w[ 9], w[10], selector);
w[62] = __byte_perm_S (w[ 8], w[ 9], selector);
w[61] = __byte_perm_S (w[ 7], w[ 8], selector);
w[60] = __byte_perm_S (w[ 6], w[ 7], selector);
w[59] = __byte_perm_S (w[ 5], w[ 6], selector);
w[58] = __byte_perm_S (w[ 4], w[ 5], selector);
w[57] = __byte_perm_S (w[ 3], w[ 4], selector);
w[56] = __byte_perm_S (w[ 2], w[ 3], selector);
w[55] = __byte_perm_S (w[ 1], w[ 2], selector);
w[54] = __byte_perm_S (w[ 0], w[ 1], selector);
w[53] = __byte_perm_S ( 0, w[ 0], selector);
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 54:
w[63] = __byte_perm_S (w[ 8], w[ 9], selector);
w[62] = __byte_perm_S (w[ 7], w[ 8], selector);
w[61] = __byte_perm_S (w[ 6], w[ 7], selector);
w[60] = __byte_perm_S (w[ 5], w[ 6], selector);
w[59] = __byte_perm_S (w[ 4], w[ 5], selector);
w[58] = __byte_perm_S (w[ 3], w[ 4], selector);
w[57] = __byte_perm_S (w[ 2], w[ 3], selector);
w[56] = __byte_perm_S (w[ 1], w[ 2], selector);
w[55] = __byte_perm_S (w[ 0], w[ 1], selector);
w[54] = __byte_perm_S ( 0, w[ 0], selector);
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 55:
w[63] = __byte_perm_S (w[ 7], w[ 8], selector);
w[62] = __byte_perm_S (w[ 6], w[ 7], selector);
w[61] = __byte_perm_S (w[ 5], w[ 6], selector);
w[60] = __byte_perm_S (w[ 4], w[ 5], selector);
w[59] = __byte_perm_S (w[ 3], w[ 4], selector);
w[58] = __byte_perm_S (w[ 2], w[ 3], selector);
w[57] = __byte_perm_S (w[ 1], w[ 2], selector);
w[56] = __byte_perm_S (w[ 0], w[ 1], selector);
w[55] = __byte_perm_S ( 0, w[ 0], selector);
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 56:
w[63] = __byte_perm_S (w[ 6], w[ 7], selector);
w[62] = __byte_perm_S (w[ 5], w[ 6], selector);
w[61] = __byte_perm_S (w[ 4], w[ 5], selector);
w[60] = __byte_perm_S (w[ 3], w[ 4], selector);
w[59] = __byte_perm_S (w[ 2], w[ 3], selector);
w[58] = __byte_perm_S (w[ 1], w[ 2], selector);
w[57] = __byte_perm_S (w[ 0], w[ 1], selector);
w[56] = __byte_perm_S ( 0, w[ 0], selector);
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 57:
w[63] = __byte_perm_S (w[ 5], w[ 6], selector);
w[62] = __byte_perm_S (w[ 4], w[ 5], selector);
w[61] = __byte_perm_S (w[ 3], w[ 4], selector);
w[60] = __byte_perm_S (w[ 2], w[ 3], selector);
w[59] = __byte_perm_S (w[ 1], w[ 2], selector);
w[58] = __byte_perm_S (w[ 0], w[ 1], selector);
w[57] = __byte_perm_S ( 0, w[ 0], selector);
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 58:
w[63] = __byte_perm_S (w[ 4], w[ 5], selector);
w[62] = __byte_perm_S (w[ 3], w[ 4], selector);
w[61] = __byte_perm_S (w[ 2], w[ 3], selector);
w[60] = __byte_perm_S (w[ 1], w[ 2], selector);
w[59] = __byte_perm_S (w[ 0], w[ 1], selector);
w[58] = __byte_perm_S ( 0, w[ 0], selector);
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 59:
w[63] = __byte_perm_S (w[ 3], w[ 4], selector);
w[62] = __byte_perm_S (w[ 2], w[ 3], selector);
w[61] = __byte_perm_S (w[ 1], w[ 2], selector);
w[60] = __byte_perm_S (w[ 0], w[ 1], selector);
w[59] = __byte_perm_S ( 0, w[ 0], selector);
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 60:
w[63] = __byte_perm_S (w[ 2], w[ 3], selector);
w[62] = __byte_perm_S (w[ 1], w[ 2], selector);
w[61] = __byte_perm_S (w[ 0], w[ 1], selector);
w[60] = __byte_perm_S ( 0, w[ 0], selector);
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 61:
w[63] = __byte_perm_S (w[ 1], w[ 2], selector);
w[62] = __byte_perm_S (w[ 0], w[ 1], selector);
w[61] = __byte_perm_S ( 0, w[ 0], selector);
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 62:
w[63] = __byte_perm_S (w[ 0], w[ 1], selector);
w[62] = __byte_perm_S ( 0, w[ 0], selector);
w[61] = 0;
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
case 63:
w[63] = __byte_perm_S ( 0, w[ 0], selector);
w[62] = 0;
w[61] = 0;
w[60] = 0;
w[59] = 0;
w[58] = 0;
w[57] = 0;
w[56] = 0;
w[55] = 0;
w[54] = 0;
w[53] = 0;
w[52] = 0;
w[51] = 0;
w[50] = 0;
w[49] = 0;
w[48] = 0;
w[47] = 0;
w[46] = 0;
w[45] = 0;
w[44] = 0;
w[43] = 0;
w[42] = 0;
w[41] = 0;
w[40] = 0;
w[39] = 0;
w[38] = 0;
w[37] = 0;
w[36] = 0;
w[35] = 0;
w[34] = 0;
w[33] = 0;
w[32] = 0;
w[31] = 0;
w[30] = 0;
w[29] = 0;
w[28] = 0;
w[27] = 0;
w[26] = 0;
w[25] = 0;
w[24] = 0;
w[23] = 0;
w[22] = 0;
w[21] = 0;
w[20] = 0;
w[19] = 0;
w[18] = 0;
w[17] = 0;
w[16] = 0;
w[15] = 0;
w[14] = 0;
w[13] = 0;
w[12] = 0;
w[11] = 0;
w[10] = 0;
w[ 9] = 0;
w[ 8] = 0;
w[ 7] = 0;
w[ 6] = 0;
w[ 5] = 0;
w[ 4] = 0;
w[ 3] = 0;
w[ 2] = 0;
w[ 1] = 0;
w[ 0] = 0;
break;
}
#endif
}
__kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max)
{
const u32 gid = get_global_id (0);
if (gid >= gid_max) return;
pw_t pw = pws[gid];
pw_t comb = combs_buf[0];
const u32 pw_len = pw.pw_len;
const u32 comb_len = comb.pw_len;
if (combs_mode == COMBINATOR_MODE_BASE_LEFT)
{
switch_buffer_by_offset_1x64_le_S (comb.i, pw_len);
}
if (combs_mode == COMBINATOR_MODE_BASE_RIGHT)
{
switch_buffer_by_offset_1x64_le_S (pw.i, comb_len);
}
#pragma unroll
for (int i = 0; i < 64; i++)
{
pw.i[i] |= comb.i[i];
}
pw.pw_len = pw_len + comb_len;
pws_amp[gid] = pw;
}