/** * Author......: See docs/credits.txt * License.....: MIT */ #include "inc_hash_constants.h" #include "inc_vendor.cl" #include "inc_types.cl" inline void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset) { #if defined IS_AMD || defined IS_GENERIC const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset; switch (offset / 4) { case 0: w[63] = amd_bytealign_S (w[63], w[62], offset_minus_4); w[62] = amd_bytealign_S (w[62], w[61], offset_minus_4); w[61] = amd_bytealign_S (w[61], w[60], offset_minus_4); w[60] = amd_bytealign_S (w[60], w[59], offset_minus_4); w[59] = amd_bytealign_S (w[59], w[58], offset_minus_4); w[58] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[57] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[56] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[55] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[54] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[53] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[52] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[51] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[50] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[49] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[48] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[47] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[46] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[45] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[44] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[43] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[42] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[41] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[40] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[39] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[38] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[37] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[36] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[35] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[34] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[33] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[32] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[31] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[30] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[29] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[28] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[27] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[26] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[25] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[24] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[23] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[22] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[21] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[20] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[19] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[18] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[17] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[16] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[15] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[14] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[13] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[12] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[11] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[10] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 4] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 3] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 2] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 1] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 0] = amd_bytealign_S (w[ 0], 0, offset_minus_4); if (offset_mod_4 == 0) { w[ 0] = w[ 1]; w[ 1] = w[ 2]; w[ 2] = w[ 3]; w[ 3] = w[ 4]; w[ 4] = w[ 5]; w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 1: w[63] = amd_bytealign_S (w[62], w[61], offset_minus_4); w[62] = amd_bytealign_S (w[61], w[60], offset_minus_4); w[61] = amd_bytealign_S (w[60], w[59], offset_minus_4); w[60] = amd_bytealign_S (w[59], w[58], offset_minus_4); w[59] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[58] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[57] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[56] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[55] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[54] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[53] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[52] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[51] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[50] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[49] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[48] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[47] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[46] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[45] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[44] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[43] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[42] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[41] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[40] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[39] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[38] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[37] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[36] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[35] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[34] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[33] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[32] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[31] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[30] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[29] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[28] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[27] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[26] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[25] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[24] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[23] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[22] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[21] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[20] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[19] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[18] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[17] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[16] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[15] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[14] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[13] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[12] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[11] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[10] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 4] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 3] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 2] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 1] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 0] = 0; if (offset_mod_4 == 0) { w[ 1] = w[ 2]; w[ 2] = w[ 3]; w[ 3] = w[ 4]; w[ 4] = w[ 5]; w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 2: w[63] = amd_bytealign_S (w[61], w[60], offset_minus_4); w[62] = amd_bytealign_S (w[60], w[59], offset_minus_4); w[61] = amd_bytealign_S (w[59], w[58], offset_minus_4); w[60] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[59] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[58] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[57] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[56] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[55] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[54] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[53] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[52] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[51] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[50] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[49] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[48] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[47] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[46] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[45] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[44] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[43] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[42] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[41] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[40] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[39] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[38] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[37] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[36] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[35] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[34] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[33] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[32] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[31] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[30] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[29] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[28] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[27] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[26] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[25] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[24] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[23] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[22] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[21] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[20] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[19] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[18] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[17] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[16] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[15] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[14] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[13] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[12] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[11] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[10] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 4] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 3] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 2] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 2] = w[ 3]; w[ 3] = w[ 4]; w[ 4] = w[ 5]; w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 3: w[63] = amd_bytealign_S (w[60], w[59], offset_minus_4); w[62] = amd_bytealign_S (w[59], w[58], offset_minus_4); w[61] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[60] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[59] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[58] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[57] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[56] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[55] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[54] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[53] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[52] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[51] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[50] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[49] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[48] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[47] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[46] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[45] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[44] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[43] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[42] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[41] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[40] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[39] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[38] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[37] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[36] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[35] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[34] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[33] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[32] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[31] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[30] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[29] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[28] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[27] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[26] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[25] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[24] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[23] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[22] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[21] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[20] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[19] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[18] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[17] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[16] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[15] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[14] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[13] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[12] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[11] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[10] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 4] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 3] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 3] = w[ 4]; w[ 4] = w[ 5]; w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 4: w[63] = amd_bytealign_S (w[59], w[58], offset_minus_4); w[62] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[61] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[60] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[59] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[58] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[57] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[56] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[55] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[54] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[53] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[52] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[51] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[50] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[49] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[48] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[47] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[46] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[45] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[44] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[43] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[42] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[41] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[40] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[39] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[38] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[37] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[36] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[35] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[34] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[33] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[32] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[31] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[30] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[29] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[28] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[27] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[26] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[25] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[24] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[23] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[22] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[21] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[20] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[19] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[18] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[17] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[16] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[15] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[14] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[13] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[12] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[11] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[10] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 4] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 4] = w[ 5]; w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 5: w[63] = amd_bytealign_S (w[58], w[57], offset_minus_4); w[62] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[61] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[60] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[59] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[58] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[57] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[56] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[55] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[54] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[53] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[52] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[51] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[50] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[49] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[48] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[47] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[46] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[45] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[44] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[43] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[42] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[41] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[40] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[39] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[38] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[37] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[36] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[35] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[34] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[33] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[32] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[31] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[30] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[29] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[28] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[27] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[26] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[25] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[24] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[23] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[22] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[21] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[20] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[19] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[18] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[17] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[16] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[15] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[14] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[13] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[12] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[11] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[10] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 5] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 5] = w[ 6]; w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 6: w[63] = amd_bytealign_S (w[57], w[56], offset_minus_4); w[62] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[61] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[60] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[59] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[58] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[57] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[56] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[55] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[54] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[53] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[52] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[51] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[50] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[49] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[48] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[47] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[46] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[45] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[44] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[43] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[42] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[41] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[40] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[39] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[38] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[37] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[36] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[35] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[34] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[33] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[32] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[31] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[30] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[29] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[28] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[27] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[26] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[25] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[24] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[23] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[22] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[21] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[20] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[19] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[18] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[17] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[16] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[15] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[14] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[13] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[12] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[11] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[10] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 6] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 6] = w[ 7]; w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 7: w[63] = amd_bytealign_S (w[56], w[55], offset_minus_4); w[62] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[61] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[60] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[59] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[58] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[57] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[56] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[55] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[54] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[53] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[52] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[51] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[50] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[49] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[48] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[47] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[46] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[45] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[44] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[43] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[42] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[41] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[40] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[39] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[38] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[37] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[36] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[35] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[34] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[33] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[32] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[31] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[30] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[29] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[28] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[27] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[26] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[25] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[24] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[23] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[22] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[21] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[20] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[19] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[18] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[17] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[16] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[15] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[14] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[13] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[12] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[11] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[10] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 7] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 7] = w[ 8]; w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 8: w[63] = amd_bytealign_S (w[55], w[54], offset_minus_4); w[62] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[61] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[60] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[59] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[58] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[57] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[56] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[55] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[54] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[53] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[52] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[51] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[50] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[49] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[48] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[47] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[46] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[45] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[44] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[43] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[42] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[41] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[40] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[39] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[38] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[37] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[36] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[35] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[34] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[33] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[32] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[31] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[30] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[29] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[28] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[27] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[26] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[25] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[24] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[23] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[22] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[21] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[20] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[19] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[18] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[17] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[16] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[15] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[14] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[13] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[12] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[11] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[10] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 8] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 8] = w[ 9]; w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 9: w[63] = amd_bytealign_S (w[54], w[53], offset_minus_4); w[62] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[61] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[60] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[59] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[58] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[57] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[56] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[55] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[54] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[53] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[52] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[51] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[50] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[49] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[48] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[47] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[46] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[45] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[44] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[43] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[42] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[41] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[40] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[39] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[38] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[37] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[36] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[35] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[34] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[33] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[32] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[31] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[30] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[29] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[28] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[27] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[26] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[25] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[24] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[23] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[22] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[21] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[20] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[19] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[18] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[17] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[16] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[15] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[14] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[13] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[12] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[11] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[10] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[ 9] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[ 9] = w[10]; w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 10: w[63] = amd_bytealign_S (w[53], w[52], offset_minus_4); w[62] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[61] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[60] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[59] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[58] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[57] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[56] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[55] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[54] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[53] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[52] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[51] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[50] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[49] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[48] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[47] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[46] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[45] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[44] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[43] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[42] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[41] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[40] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[39] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[38] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[37] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[36] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[35] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[34] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[33] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[32] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[31] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[30] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[29] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[28] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[27] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[26] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[25] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[24] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[23] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[22] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[21] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[20] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[19] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[18] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[17] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[16] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[15] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[14] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[13] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[12] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[11] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[10] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[10] = w[11]; w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 11: w[63] = amd_bytealign_S (w[52], w[51], offset_minus_4); w[62] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[61] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[60] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[59] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[58] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[57] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[56] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[55] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[54] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[53] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[52] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[51] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[50] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[49] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[48] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[47] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[46] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[45] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[44] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[43] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[42] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[41] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[40] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[39] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[38] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[37] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[36] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[35] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[34] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[33] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[32] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[31] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[30] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[29] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[28] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[27] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[26] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[25] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[24] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[23] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[22] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[21] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[20] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[19] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[18] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[17] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[16] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[15] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[14] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[13] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[12] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[11] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[11] = w[12]; w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 12: w[63] = amd_bytealign_S (w[51], w[50], offset_minus_4); w[62] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[61] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[60] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[59] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[58] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[57] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[56] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[55] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[54] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[53] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[52] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[51] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[50] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[49] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[48] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[47] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[46] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[45] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[44] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[43] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[42] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[41] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[40] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[39] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[38] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[37] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[36] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[35] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[34] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[33] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[32] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[31] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[30] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[29] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[28] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[27] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[26] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[25] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[24] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[23] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[22] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[21] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[20] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[19] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[18] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[17] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[16] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[15] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[14] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[13] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[12] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[12] = w[13]; w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 13: w[63] = amd_bytealign_S (w[50], w[49], offset_minus_4); w[62] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[61] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[60] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[59] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[58] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[57] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[56] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[55] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[54] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[53] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[52] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[51] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[50] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[49] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[48] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[47] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[46] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[45] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[44] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[43] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[42] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[41] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[40] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[39] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[38] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[37] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[36] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[35] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[34] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[33] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[32] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[31] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[30] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[29] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[28] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[27] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[26] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[25] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[24] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[23] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[22] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[21] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[20] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[19] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[18] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[17] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[16] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[15] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[14] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[13] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[13] = w[14]; w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 14: w[63] = amd_bytealign_S (w[49], w[48], offset_minus_4); w[62] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[61] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[60] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[59] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[58] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[57] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[56] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[55] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[54] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[53] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[52] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[51] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[50] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[49] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[48] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[47] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[46] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[45] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[44] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[43] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[42] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[41] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[40] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[39] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[38] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[37] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[36] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[35] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[34] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[33] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[32] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[31] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[30] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[29] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[28] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[27] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[26] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[25] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[24] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[23] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[22] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[21] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[20] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[19] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[18] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[17] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[16] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[15] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[14] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[14] = w[15]; w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 15: w[63] = amd_bytealign_S (w[48], w[47], offset_minus_4); w[62] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[61] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[60] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[59] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[58] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[57] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[56] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[55] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[54] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[53] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[52] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[51] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[50] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[49] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[48] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[47] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[46] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[45] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[44] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[43] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[42] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[41] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[40] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[39] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[38] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[37] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[36] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[35] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[34] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[33] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[32] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[31] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[30] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[29] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[28] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[27] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[26] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[25] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[24] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[23] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[22] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[21] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[20] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[19] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[18] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[17] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[16] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[15] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[15] = w[16]; w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 16: w[63] = amd_bytealign_S (w[47], w[46], offset_minus_4); w[62] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[61] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[60] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[59] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[58] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[57] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[56] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[55] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[54] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[53] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[52] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[51] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[50] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[49] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[48] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[47] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[46] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[45] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[44] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[43] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[42] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[41] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[40] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[39] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[38] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[37] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[36] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[35] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[34] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[33] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[32] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[31] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[30] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[29] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[28] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[27] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[26] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[25] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[24] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[23] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[22] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[21] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[20] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[19] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[18] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[17] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[16] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[16] = w[17]; w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 17: w[63] = amd_bytealign_S (w[46], w[45], offset_minus_4); w[62] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[61] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[60] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[59] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[58] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[57] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[56] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[55] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[54] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[53] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[52] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[51] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[50] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[49] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[48] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[47] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[46] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[45] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[44] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[43] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[42] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[41] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[40] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[39] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[38] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[37] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[36] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[35] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[34] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[33] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[32] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[31] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[30] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[29] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[28] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[27] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[26] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[25] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[24] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[23] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[22] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[21] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[20] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[19] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[18] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[17] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[17] = w[18]; w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 18: w[63] = amd_bytealign_S (w[45], w[44], offset_minus_4); w[62] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[61] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[60] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[59] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[58] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[57] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[56] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[55] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[54] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[53] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[52] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[51] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[50] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[49] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[48] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[47] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[46] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[45] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[44] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[43] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[42] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[41] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[40] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[39] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[38] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[37] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[36] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[35] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[34] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[33] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[32] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[31] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[30] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[29] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[28] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[27] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[26] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[25] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[24] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[23] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[22] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[21] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[20] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[19] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[18] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[18] = w[19]; w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 19: w[63] = amd_bytealign_S (w[44], w[43], offset_minus_4); w[62] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[61] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[60] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[59] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[58] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[57] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[56] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[55] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[54] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[53] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[52] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[51] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[50] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[49] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[48] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[47] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[46] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[45] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[44] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[43] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[42] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[41] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[40] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[39] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[38] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[37] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[36] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[35] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[34] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[33] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[32] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[31] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[30] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[29] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[28] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[27] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[26] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[25] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[24] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[23] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[22] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[21] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[20] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[19] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[19] = w[20]; w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 20: w[63] = amd_bytealign_S (w[43], w[42], offset_minus_4); w[62] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[61] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[60] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[59] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[58] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[57] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[56] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[55] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[54] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[53] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[52] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[51] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[50] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[49] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[48] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[47] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[46] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[45] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[44] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[43] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[42] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[41] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[40] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[39] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[38] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[37] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[36] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[35] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[34] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[33] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[32] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[31] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[30] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[29] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[28] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[27] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[26] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[25] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[24] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[23] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[22] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[21] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[20] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[20] = w[21]; w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 21: w[63] = amd_bytealign_S (w[42], w[41], offset_minus_4); w[62] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[61] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[60] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[59] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[58] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[57] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[56] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[55] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[54] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[53] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[52] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[51] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[50] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[49] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[48] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[47] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[46] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[45] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[44] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[43] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[42] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[41] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[40] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[39] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[38] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[37] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[36] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[35] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[34] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[33] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[32] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[31] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[30] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[29] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[28] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[27] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[26] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[25] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[24] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[23] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[22] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[21] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[21] = w[22]; w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 22: w[63] = amd_bytealign_S (w[41], w[40], offset_minus_4); w[62] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[61] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[60] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[59] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[58] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[57] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[56] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[55] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[54] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[53] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[52] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[51] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[50] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[49] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[48] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[47] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[46] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[45] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[44] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[43] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[42] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[41] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[40] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[39] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[38] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[37] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[36] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[35] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[34] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[33] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[32] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[31] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[30] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[29] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[28] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[27] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[26] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[25] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[24] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[23] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[22] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[22] = w[23]; w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 23: w[63] = amd_bytealign_S (w[40], w[39], offset_minus_4); w[62] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[61] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[60] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[59] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[58] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[57] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[56] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[55] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[54] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[53] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[52] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[51] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[50] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[49] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[48] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[47] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[46] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[45] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[44] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[43] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[42] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[41] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[40] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[39] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[38] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[37] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[36] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[35] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[34] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[33] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[32] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[31] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[30] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[29] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[28] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[27] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[26] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[25] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[24] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[23] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[23] = w[24]; w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 24: w[63] = amd_bytealign_S (w[39], w[38], offset_minus_4); w[62] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[61] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[60] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[59] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[58] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[57] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[56] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[55] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[54] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[53] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[52] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[51] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[50] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[49] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[48] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[47] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[46] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[45] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[44] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[43] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[42] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[41] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[40] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[39] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[38] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[37] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[36] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[35] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[34] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[33] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[32] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[31] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[30] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[29] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[28] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[27] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[26] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[25] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[24] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[24] = w[25]; w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 25: w[63] = amd_bytealign_S (w[38], w[37], offset_minus_4); w[62] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[61] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[60] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[59] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[58] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[57] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[56] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[55] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[54] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[53] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[52] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[51] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[50] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[49] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[48] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[47] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[46] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[45] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[44] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[43] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[42] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[41] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[40] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[39] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[38] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[37] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[36] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[35] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[34] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[33] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[32] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[31] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[30] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[29] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[28] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[27] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[26] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[25] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[25] = w[26]; w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 26: w[63] = amd_bytealign_S (w[37], w[36], offset_minus_4); w[62] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[61] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[60] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[59] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[58] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[57] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[56] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[55] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[54] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[53] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[52] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[51] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[50] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[49] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[48] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[47] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[46] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[45] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[44] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[43] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[42] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[41] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[40] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[39] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[38] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[37] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[36] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[35] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[34] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[33] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[32] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[31] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[30] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[29] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[28] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[27] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[26] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[26] = w[27]; w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 27: w[63] = amd_bytealign_S (w[36], w[35], offset_minus_4); w[62] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[61] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[60] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[59] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[58] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[57] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[56] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[55] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[54] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[53] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[52] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[51] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[50] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[49] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[48] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[47] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[46] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[45] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[44] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[43] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[42] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[41] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[40] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[39] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[38] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[37] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[36] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[35] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[34] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[33] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[32] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[31] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[30] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[29] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[28] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[27] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[27] = w[28]; w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 28: w[63] = amd_bytealign_S (w[35], w[34], offset_minus_4); w[62] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[61] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[60] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[59] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[58] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[57] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[56] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[55] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[54] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[53] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[52] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[51] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[50] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[49] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[48] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[47] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[46] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[45] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[44] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[43] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[42] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[41] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[40] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[39] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[38] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[37] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[36] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[35] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[34] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[33] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[32] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[31] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[30] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[29] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[28] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[28] = w[29]; w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 29: w[63] = amd_bytealign_S (w[34], w[33], offset_minus_4); w[62] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[61] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[60] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[59] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[58] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[57] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[56] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[55] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[54] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[53] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[52] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[51] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[50] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[49] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[48] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[47] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[46] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[45] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[44] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[43] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[42] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[41] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[40] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[39] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[38] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[37] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[36] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[35] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[34] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[33] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[32] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[31] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[30] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[29] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[29] = w[30]; w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 30: w[63] = amd_bytealign_S (w[33], w[32], offset_minus_4); w[62] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[61] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[60] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[59] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[58] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[57] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[56] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[55] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[54] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[53] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[52] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[51] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[50] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[49] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[48] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[47] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[46] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[45] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[44] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[43] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[42] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[41] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[40] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[39] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[38] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[37] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[36] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[35] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[34] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[33] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[32] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[31] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[30] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[30] = w[31]; w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 31: w[63] = amd_bytealign_S (w[32], w[31], offset_minus_4); w[62] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[61] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[60] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[59] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[58] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[57] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[56] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[55] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[54] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[53] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[52] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[51] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[50] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[49] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[48] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[47] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[46] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[45] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[44] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[43] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[42] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[41] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[40] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[39] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[38] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[37] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[36] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[35] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[34] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[33] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[32] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[31] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[31] = w[32]; w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 32: w[63] = amd_bytealign_S (w[31], w[30], offset_minus_4); w[62] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[61] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[60] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[59] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[58] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[57] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[56] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[55] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[54] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[53] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[52] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[51] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[50] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[49] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[48] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[47] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[46] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[45] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[44] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[43] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[42] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[41] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[40] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[39] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[38] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[37] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[36] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[35] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[34] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[33] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[32] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[32] = w[33]; w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 33: w[63] = amd_bytealign_S (w[30], w[29], offset_minus_4); w[62] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[61] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[60] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[59] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[58] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[57] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[56] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[55] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[54] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[53] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[52] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[51] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[50] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[49] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[48] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[47] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[46] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[45] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[44] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[43] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[42] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[41] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[40] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[39] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[38] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[37] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[36] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[35] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[34] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[33] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[33] = w[34]; w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 34: w[63] = amd_bytealign_S (w[29], w[28], offset_minus_4); w[62] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[61] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[60] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[59] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[58] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[57] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[56] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[55] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[54] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[53] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[52] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[51] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[50] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[49] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[48] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[47] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[46] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[45] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[44] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[43] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[42] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[41] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[40] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[39] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[38] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[37] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[36] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[35] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[34] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[34] = w[35]; w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 35: w[63] = amd_bytealign_S (w[28], w[27], offset_minus_4); w[62] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[61] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[60] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[59] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[58] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[57] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[56] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[55] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[54] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[53] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[52] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[51] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[50] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[49] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[48] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[47] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[46] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[45] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[44] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[43] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[42] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[41] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[40] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[39] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[38] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[37] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[36] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[35] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[35] = w[36]; w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 36: w[63] = amd_bytealign_S (w[27], w[26], offset_minus_4); w[62] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[61] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[60] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[59] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[58] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[57] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[56] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[55] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[54] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[53] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[52] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[51] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[50] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[49] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[48] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[47] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[46] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[45] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[44] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[43] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[42] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[41] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[40] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[39] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[38] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[37] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[36] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[36] = w[37]; w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 37: w[63] = amd_bytealign_S (w[26], w[25], offset_minus_4); w[62] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[61] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[60] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[59] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[58] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[57] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[56] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[55] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[54] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[53] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[52] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[51] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[50] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[49] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[48] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[47] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[46] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[45] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[44] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[43] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[42] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[41] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[40] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[39] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[38] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[37] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[37] = w[38]; w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 38: w[63] = amd_bytealign_S (w[25], w[24], offset_minus_4); w[62] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[61] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[60] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[59] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[58] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[57] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[56] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[55] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[54] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[53] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[52] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[51] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[50] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[49] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[48] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[47] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[46] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[45] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[44] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[43] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[42] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[41] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[40] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[39] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[38] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[38] = w[39]; w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 39: w[63] = amd_bytealign_S (w[24], w[23], offset_minus_4); w[62] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[61] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[60] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[59] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[58] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[57] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[56] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[55] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[54] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[53] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[52] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[51] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[50] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[49] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[48] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[47] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[46] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[45] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[44] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[43] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[42] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[41] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[40] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[39] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[39] = w[40]; w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 40: w[63] = amd_bytealign_S (w[23], w[22], offset_minus_4); w[62] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[61] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[60] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[59] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[58] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[57] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[56] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[55] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[54] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[53] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[52] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[51] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[50] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[49] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[48] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[47] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[46] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[45] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[44] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[43] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[42] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[41] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[40] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[40] = w[41]; w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 41: w[63] = amd_bytealign_S (w[22], w[21], offset_minus_4); w[62] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[61] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[60] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[59] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[58] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[57] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[56] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[55] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[54] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[53] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[52] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[51] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[50] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[49] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[48] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[47] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[46] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[45] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[44] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[43] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[42] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[41] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[41] = w[42]; w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 42: w[63] = amd_bytealign_S (w[21], w[20], offset_minus_4); w[62] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[61] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[60] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[59] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[58] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[57] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[56] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[55] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[54] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[53] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[52] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[51] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[50] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[49] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[48] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[47] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[46] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[45] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[44] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[43] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[42] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[42] = w[43]; w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 43: w[63] = amd_bytealign_S (w[20], w[19], offset_minus_4); w[62] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[61] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[60] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[59] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[58] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[57] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[56] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[55] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[54] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[53] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[52] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[51] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[50] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[49] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[48] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[47] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[46] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[45] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[44] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[43] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[43] = w[44]; w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 44: w[63] = amd_bytealign_S (w[19], w[18], offset_minus_4); w[62] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[61] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[60] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[59] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[58] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[57] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[56] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[55] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[54] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[53] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[52] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[51] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[50] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[49] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[48] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[47] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[46] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[45] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[44] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[44] = w[45]; w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 45: w[63] = amd_bytealign_S (w[18], w[17], offset_minus_4); w[62] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[61] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[60] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[59] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[58] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[57] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[56] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[55] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[54] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[53] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[52] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[51] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[50] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[49] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[48] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[47] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[46] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[45] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[45] = w[46]; w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 46: w[63] = amd_bytealign_S (w[17], w[16], offset_minus_4); w[62] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[61] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[60] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[59] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[58] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[57] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[56] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[55] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[54] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[53] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[52] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[51] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[50] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[49] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[48] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[47] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[46] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[46] = w[47]; w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 47: w[63] = amd_bytealign_S (w[16], w[15], offset_minus_4); w[62] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[61] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[60] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[59] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[58] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[57] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[56] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[55] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[54] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[53] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[52] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[51] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[50] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[49] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[48] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[47] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[47] = w[48]; w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 48: w[63] = amd_bytealign_S (w[15], w[14], offset_minus_4); w[62] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[61] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[60] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[59] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[58] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[57] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[56] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[55] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[54] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[53] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[52] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[51] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[50] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[49] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[48] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[48] = w[49]; w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 49: w[63] = amd_bytealign_S (w[14], w[13], offset_minus_4); w[62] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[61] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[60] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[59] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[58] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[57] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[56] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[55] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[54] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[53] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[52] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[51] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[50] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[49] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[49] = w[50]; w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 50: w[63] = amd_bytealign_S (w[13], w[12], offset_minus_4); w[62] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[61] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[60] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[59] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[58] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[57] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[56] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[55] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[54] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[53] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[52] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[51] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[50] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[50] = w[51]; w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 51: w[63] = amd_bytealign_S (w[12], w[11], offset_minus_4); w[62] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[61] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[60] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[59] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[58] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[57] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[56] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[55] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[54] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[53] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[52] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[51] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[51] = w[52]; w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 52: w[63] = amd_bytealign_S (w[11], w[10], offset_minus_4); w[62] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[61] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[60] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[59] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[58] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[57] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[56] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[55] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[54] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[53] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[52] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[52] = w[53]; w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 53: w[63] = amd_bytealign_S (w[10], w[ 9], offset_minus_4); w[62] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[61] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[60] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[59] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[58] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[57] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[56] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[55] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[54] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[53] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[53] = w[54]; w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 54: w[63] = amd_bytealign_S (w[ 9], w[ 8], offset_minus_4); w[62] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[61] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[60] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[59] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[58] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[57] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[56] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[55] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[54] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[54] = w[55]; w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 55: w[63] = amd_bytealign_S (w[ 8], w[ 7], offset_minus_4); w[62] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[61] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[60] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[59] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[58] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[57] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[56] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[55] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[55] = w[56]; w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 56: w[63] = amd_bytealign_S (w[ 7], w[ 6], offset_minus_4); w[62] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[61] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[60] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[59] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[58] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[57] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[56] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[56] = w[57]; w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 57: w[63] = amd_bytealign_S (w[ 6], w[ 5], offset_minus_4); w[62] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[61] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[60] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[59] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[58] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[57] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[57] = w[58]; w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 58: w[63] = amd_bytealign_S (w[ 5], w[ 4], offset_minus_4); w[62] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[61] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[60] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[59] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[58] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[58] = w[59]; w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 59: w[63] = amd_bytealign_S (w[ 4], w[ 3], offset_minus_4); w[62] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[61] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[60] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[59] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[59] = w[60]; w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 60: w[63] = amd_bytealign_S (w[ 3], w[ 2], offset_minus_4); w[62] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[61] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[60] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[60] = w[61]; w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 61: w[63] = amd_bytealign_S (w[ 2], w[ 1], offset_minus_4); w[62] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[61] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[61] = w[62]; w[62] = w[63]; w[63] = 0; } break; case 62: w[63] = amd_bytealign_S (w[ 1], w[ 0], offset_minus_4); w[62] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[62] = w[63]; w[63] = 0; } break; case 63: w[63] = amd_bytealign_S (w[ 0], 0, offset_minus_4); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; if (offset_mod_4 == 0) { w[63] = 0; } break; } #endif #ifdef IS_NV const int offset_minus_4 = 4 - (offset % 4); const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; switch (offset / 4) { case 0: w[63] = __byte_perm_S (w[62], w[63], selector); w[62] = __byte_perm_S (w[61], w[62], selector); w[61] = __byte_perm_S (w[60], w[61], selector); w[60] = __byte_perm_S (w[59], w[60], selector); w[59] = __byte_perm_S (w[58], w[59], selector); w[58] = __byte_perm_S (w[57], w[58], selector); w[57] = __byte_perm_S (w[56], w[57], selector); w[56] = __byte_perm_S (w[55], w[56], selector); w[55] = __byte_perm_S (w[54], w[55], selector); w[54] = __byte_perm_S (w[53], w[54], selector); w[53] = __byte_perm_S (w[52], w[53], selector); w[52] = __byte_perm_S (w[51], w[52], selector); w[51] = __byte_perm_S (w[50], w[51], selector); w[50] = __byte_perm_S (w[49], w[50], selector); w[49] = __byte_perm_S (w[48], w[49], selector); w[48] = __byte_perm_S (w[47], w[48], selector); w[47] = __byte_perm_S (w[46], w[47], selector); w[46] = __byte_perm_S (w[45], w[46], selector); w[45] = __byte_perm_S (w[44], w[45], selector); w[44] = __byte_perm_S (w[43], w[44], selector); w[43] = __byte_perm_S (w[42], w[43], selector); w[42] = __byte_perm_S (w[41], w[42], selector); w[41] = __byte_perm_S (w[40], w[41], selector); w[40] = __byte_perm_S (w[39], w[40], selector); w[39] = __byte_perm_S (w[38], w[39], selector); w[38] = __byte_perm_S (w[37], w[38], selector); w[37] = __byte_perm_S (w[36], w[37], selector); w[36] = __byte_perm_S (w[35], w[36], selector); w[35] = __byte_perm_S (w[34], w[35], selector); w[34] = __byte_perm_S (w[33], w[34], selector); w[33] = __byte_perm_S (w[32], w[33], selector); w[32] = __byte_perm_S (w[31], w[32], selector); w[31] = __byte_perm_S (w[30], w[31], selector); w[30] = __byte_perm_S (w[29], w[30], selector); w[29] = __byte_perm_S (w[28], w[29], selector); w[28] = __byte_perm_S (w[27], w[28], selector); w[27] = __byte_perm_S (w[26], w[27], selector); w[26] = __byte_perm_S (w[25], w[26], selector); w[25] = __byte_perm_S (w[24], w[25], selector); w[24] = __byte_perm_S (w[23], w[24], selector); w[23] = __byte_perm_S (w[22], w[23], selector); w[22] = __byte_perm_S (w[21], w[22], selector); w[21] = __byte_perm_S (w[20], w[21], selector); w[20] = __byte_perm_S (w[19], w[20], selector); w[19] = __byte_perm_S (w[18], w[19], selector); w[18] = __byte_perm_S (w[17], w[18], selector); w[17] = __byte_perm_S (w[16], w[17], selector); w[16] = __byte_perm_S (w[15], w[16], selector); w[15] = __byte_perm_S (w[14], w[15], selector); w[14] = __byte_perm_S (w[13], w[14], selector); w[13] = __byte_perm_S (w[12], w[13], selector); w[12] = __byte_perm_S (w[11], w[12], selector); w[11] = __byte_perm_S (w[10], w[11], selector); w[10] = __byte_perm_S (w[ 9], w[10], selector); w[ 9] = __byte_perm_S (w[ 8], w[ 9], selector); w[ 8] = __byte_perm_S (w[ 7], w[ 8], selector); w[ 7] = __byte_perm_S (w[ 6], w[ 7], selector); w[ 6] = __byte_perm_S (w[ 5], w[ 6], selector); w[ 5] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 4] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 3] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 2] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 1] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 0] = __byte_perm_S ( 0, w[ 0], selector); break; case 1: w[63] = __byte_perm_S (w[61], w[62], selector); w[62] = __byte_perm_S (w[60], w[61], selector); w[61] = __byte_perm_S (w[59], w[60], selector); w[60] = __byte_perm_S (w[58], w[59], selector); w[59] = __byte_perm_S (w[57], w[58], selector); w[58] = __byte_perm_S (w[56], w[57], selector); w[57] = __byte_perm_S (w[55], w[56], selector); w[56] = __byte_perm_S (w[54], w[55], selector); w[55] = __byte_perm_S (w[53], w[54], selector); w[54] = __byte_perm_S (w[52], w[53], selector); w[53] = __byte_perm_S (w[51], w[52], selector); w[52] = __byte_perm_S (w[50], w[51], selector); w[51] = __byte_perm_S (w[49], w[50], selector); w[50] = __byte_perm_S (w[48], w[49], selector); w[49] = __byte_perm_S (w[47], w[48], selector); w[48] = __byte_perm_S (w[46], w[47], selector); w[47] = __byte_perm_S (w[45], w[46], selector); w[46] = __byte_perm_S (w[44], w[45], selector); w[45] = __byte_perm_S (w[43], w[44], selector); w[44] = __byte_perm_S (w[42], w[43], selector); w[43] = __byte_perm_S (w[41], w[42], selector); w[42] = __byte_perm_S (w[40], w[41], selector); w[41] = __byte_perm_S (w[39], w[40], selector); w[40] = __byte_perm_S (w[38], w[39], selector); w[39] = __byte_perm_S (w[37], w[38], selector); w[38] = __byte_perm_S (w[36], w[37], selector); w[37] = __byte_perm_S (w[35], w[36], selector); w[36] = __byte_perm_S (w[34], w[35], selector); w[35] = __byte_perm_S (w[33], w[34], selector); w[34] = __byte_perm_S (w[32], w[33], selector); w[33] = __byte_perm_S (w[31], w[32], selector); w[32] = __byte_perm_S (w[30], w[31], selector); w[31] = __byte_perm_S (w[29], w[30], selector); w[30] = __byte_perm_S (w[28], w[29], selector); w[29] = __byte_perm_S (w[27], w[28], selector); w[28] = __byte_perm_S (w[26], w[27], selector); w[27] = __byte_perm_S (w[25], w[26], selector); w[26] = __byte_perm_S (w[24], w[25], selector); w[25] = __byte_perm_S (w[23], w[24], selector); w[24] = __byte_perm_S (w[22], w[23], selector); w[23] = __byte_perm_S (w[21], w[22], selector); w[22] = __byte_perm_S (w[20], w[21], selector); w[21] = __byte_perm_S (w[19], w[20], selector); w[20] = __byte_perm_S (w[18], w[19], selector); w[19] = __byte_perm_S (w[17], w[18], selector); w[18] = __byte_perm_S (w[16], w[17], selector); w[17] = __byte_perm_S (w[15], w[16], selector); w[16] = __byte_perm_S (w[14], w[15], selector); w[15] = __byte_perm_S (w[13], w[14], selector); w[14] = __byte_perm_S (w[12], w[13], selector); w[13] = __byte_perm_S (w[11], w[12], selector); w[12] = __byte_perm_S (w[10], w[11], selector); w[11] = __byte_perm_S (w[ 9], w[10], selector); w[10] = __byte_perm_S (w[ 8], w[ 9], selector); w[ 9] = __byte_perm_S (w[ 7], w[ 8], selector); w[ 8] = __byte_perm_S (w[ 6], w[ 7], selector); w[ 7] = __byte_perm_S (w[ 5], w[ 6], selector); w[ 6] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 5] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 4] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 3] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 2] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 1] = __byte_perm_S ( 0, w[ 0], selector); w[ 0] = 0; break; case 2: w[63] = __byte_perm_S (w[60], w[61], selector); w[62] = __byte_perm_S (w[59], w[60], selector); w[61] = __byte_perm_S (w[58], w[59], selector); w[60] = __byte_perm_S (w[57], w[58], selector); w[59] = __byte_perm_S (w[56], w[57], selector); w[58] = __byte_perm_S (w[55], w[56], selector); w[57] = __byte_perm_S (w[54], w[55], selector); w[56] = __byte_perm_S (w[53], w[54], selector); w[55] = __byte_perm_S (w[52], w[53], selector); w[54] = __byte_perm_S (w[51], w[52], selector); w[53] = __byte_perm_S (w[50], w[51], selector); w[52] = __byte_perm_S (w[49], w[50], selector); w[51] = __byte_perm_S (w[48], w[49], selector); w[50] = __byte_perm_S (w[47], w[48], selector); w[49] = __byte_perm_S (w[46], w[47], selector); w[48] = __byte_perm_S (w[45], w[46], selector); w[47] = __byte_perm_S (w[44], w[45], selector); w[46] = __byte_perm_S (w[43], w[44], selector); w[45] = __byte_perm_S (w[42], w[43], selector); w[44] = __byte_perm_S (w[41], w[42], selector); w[43] = __byte_perm_S (w[40], w[41], selector); w[42] = __byte_perm_S (w[39], w[40], selector); w[41] = __byte_perm_S (w[38], w[39], selector); w[40] = __byte_perm_S (w[37], w[38], selector); w[39] = __byte_perm_S (w[36], w[37], selector); w[38] = __byte_perm_S (w[35], w[36], selector); w[37] = __byte_perm_S (w[34], w[35], selector); w[36] = __byte_perm_S (w[33], w[34], selector); w[35] = __byte_perm_S (w[32], w[33], selector); w[34] = __byte_perm_S (w[31], w[32], selector); w[33] = __byte_perm_S (w[30], w[31], selector); w[32] = __byte_perm_S (w[29], w[30], selector); w[31] = __byte_perm_S (w[28], w[29], selector); w[30] = __byte_perm_S (w[27], w[28], selector); w[29] = __byte_perm_S (w[26], w[27], selector); w[28] = __byte_perm_S (w[25], w[26], selector); w[27] = __byte_perm_S (w[24], w[25], selector); w[26] = __byte_perm_S (w[23], w[24], selector); w[25] = __byte_perm_S (w[22], w[23], selector); w[24] = __byte_perm_S (w[21], w[22], selector); w[23] = __byte_perm_S (w[20], w[21], selector); w[22] = __byte_perm_S (w[19], w[20], selector); w[21] = __byte_perm_S (w[18], w[19], selector); w[20] = __byte_perm_S (w[17], w[18], selector); w[19] = __byte_perm_S (w[16], w[17], selector); w[18] = __byte_perm_S (w[15], w[16], selector); w[17] = __byte_perm_S (w[14], w[15], selector); w[16] = __byte_perm_S (w[13], w[14], selector); w[15] = __byte_perm_S (w[12], w[13], selector); w[14] = __byte_perm_S (w[11], w[12], selector); w[13] = __byte_perm_S (w[10], w[11], selector); w[12] = __byte_perm_S (w[ 9], w[10], selector); w[11] = __byte_perm_S (w[ 8], w[ 9], selector); w[10] = __byte_perm_S (w[ 7], w[ 8], selector); w[ 9] = __byte_perm_S (w[ 6], w[ 7], selector); w[ 8] = __byte_perm_S (w[ 5], w[ 6], selector); w[ 7] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 6] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 5] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 4] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 3] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 2] = __byte_perm_S ( 0, w[ 0], selector); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = __byte_perm_S (w[59], w[60], selector); w[62] = __byte_perm_S (w[58], w[59], selector); w[61] = __byte_perm_S (w[57], w[58], selector); w[60] = __byte_perm_S (w[56], w[57], selector); w[59] = __byte_perm_S (w[55], w[56], selector); w[58] = __byte_perm_S (w[54], w[55], selector); w[57] = __byte_perm_S (w[53], w[54], selector); w[56] = __byte_perm_S (w[52], w[53], selector); w[55] = __byte_perm_S (w[51], w[52], selector); w[54] = __byte_perm_S (w[50], w[51], selector); w[53] = __byte_perm_S (w[49], w[50], selector); w[52] = __byte_perm_S (w[48], w[49], selector); w[51] = __byte_perm_S (w[47], w[48], selector); w[50] = __byte_perm_S (w[46], w[47], selector); w[49] = __byte_perm_S (w[45], w[46], selector); w[48] = __byte_perm_S (w[44], w[45], selector); w[47] = __byte_perm_S (w[43], w[44], selector); w[46] = __byte_perm_S (w[42], w[43], selector); w[45] = __byte_perm_S (w[41], w[42], selector); w[44] = __byte_perm_S (w[40], w[41], selector); w[43] = __byte_perm_S (w[39], w[40], selector); w[42] = __byte_perm_S (w[38], w[39], selector); w[41] = __byte_perm_S (w[37], w[38], selector); w[40] = __byte_perm_S (w[36], w[37], selector); w[39] = __byte_perm_S (w[35], w[36], selector); w[38] = __byte_perm_S (w[34], w[35], selector); w[37] = __byte_perm_S (w[33], w[34], selector); w[36] = __byte_perm_S (w[32], w[33], selector); w[35] = __byte_perm_S (w[31], w[32], selector); w[34] = __byte_perm_S (w[30], w[31], selector); w[33] = __byte_perm_S (w[29], w[30], selector); w[32] = __byte_perm_S (w[28], w[29], selector); w[31] = __byte_perm_S (w[27], w[28], selector); w[30] = __byte_perm_S (w[26], w[27], selector); w[29] = __byte_perm_S (w[25], w[26], selector); w[28] = __byte_perm_S (w[24], w[25], selector); w[27] = __byte_perm_S (w[23], w[24], selector); w[26] = __byte_perm_S (w[22], w[23], selector); w[25] = __byte_perm_S (w[21], w[22], selector); w[24] = __byte_perm_S (w[20], w[21], selector); w[23] = __byte_perm_S (w[19], w[20], selector); w[22] = __byte_perm_S (w[18], w[19], selector); w[21] = __byte_perm_S (w[17], w[18], selector); w[20] = __byte_perm_S (w[16], w[17], selector); w[19] = __byte_perm_S (w[15], w[16], selector); w[18] = __byte_perm_S (w[14], w[15], selector); w[17] = __byte_perm_S (w[13], w[14], selector); w[16] = __byte_perm_S (w[12], w[13], selector); w[15] = __byte_perm_S (w[11], w[12], selector); w[14] = __byte_perm_S (w[10], w[11], selector); w[13] = __byte_perm_S (w[ 9], w[10], selector); w[12] = __byte_perm_S (w[ 8], w[ 9], selector); w[11] = __byte_perm_S (w[ 7], w[ 8], selector); w[10] = __byte_perm_S (w[ 6], w[ 7], selector); w[ 9] = __byte_perm_S (w[ 5], w[ 6], selector); w[ 8] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 7] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 6] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 5] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 4] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 3] = __byte_perm_S ( 0, w[ 0], selector); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = __byte_perm_S (w[58], w[59], selector); w[62] = __byte_perm_S (w[57], w[58], selector); w[61] = __byte_perm_S (w[56], w[57], selector); w[60] = __byte_perm_S (w[55], w[56], selector); w[59] = __byte_perm_S (w[54], w[55], selector); w[58] = __byte_perm_S (w[53], w[54], selector); w[57] = __byte_perm_S (w[52], w[53], selector); w[56] = __byte_perm_S (w[51], w[52], selector); w[55] = __byte_perm_S (w[50], w[51], selector); w[54] = __byte_perm_S (w[49], w[50], selector); w[53] = __byte_perm_S (w[48], w[49], selector); w[52] = __byte_perm_S (w[47], w[48], selector); w[51] = __byte_perm_S (w[46], w[47], selector); w[50] = __byte_perm_S (w[45], w[46], selector); w[49] = __byte_perm_S (w[44], w[45], selector); w[48] = __byte_perm_S (w[43], w[44], selector); w[47] = __byte_perm_S (w[42], w[43], selector); w[46] = __byte_perm_S (w[41], w[42], selector); w[45] = __byte_perm_S (w[40], w[41], selector); w[44] = __byte_perm_S (w[39], w[40], selector); w[43] = __byte_perm_S (w[38], w[39], selector); w[42] = __byte_perm_S (w[37], w[38], selector); w[41] = __byte_perm_S (w[36], w[37], selector); w[40] = __byte_perm_S (w[35], w[36], selector); w[39] = __byte_perm_S (w[34], w[35], selector); w[38] = __byte_perm_S (w[33], w[34], selector); w[37] = __byte_perm_S (w[32], w[33], selector); w[36] = __byte_perm_S (w[31], w[32], selector); w[35] = __byte_perm_S (w[30], w[31], selector); w[34] = __byte_perm_S (w[29], w[30], selector); w[33] = __byte_perm_S (w[28], w[29], selector); w[32] = __byte_perm_S (w[27], w[28], selector); w[31] = __byte_perm_S (w[26], w[27], selector); w[30] = __byte_perm_S (w[25], w[26], selector); w[29] = __byte_perm_S (w[24], w[25], selector); w[28] = __byte_perm_S (w[23], w[24], selector); w[27] = __byte_perm_S (w[22], w[23], selector); w[26] = __byte_perm_S (w[21], w[22], selector); w[25] = __byte_perm_S (w[20], w[21], selector); w[24] = __byte_perm_S (w[19], w[20], selector); w[23] = __byte_perm_S (w[18], w[19], selector); w[22] = __byte_perm_S (w[17], w[18], selector); w[21] = __byte_perm_S (w[16], w[17], selector); w[20] = __byte_perm_S (w[15], w[16], selector); w[19] = __byte_perm_S (w[14], w[15], selector); w[18] = __byte_perm_S (w[13], w[14], selector); w[17] = __byte_perm_S (w[12], w[13], selector); w[16] = __byte_perm_S (w[11], w[12], selector); w[15] = __byte_perm_S (w[10], w[11], selector); w[14] = __byte_perm_S (w[ 9], w[10], selector); w[13] = __byte_perm_S (w[ 8], w[ 9], selector); w[12] = __byte_perm_S (w[ 7], w[ 8], selector); w[11] = __byte_perm_S (w[ 6], w[ 7], selector); w[10] = __byte_perm_S (w[ 5], w[ 6], selector); w[ 9] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 8] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 7] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 6] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 5] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 4] = __byte_perm_S ( 0, w[ 0], selector); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = __byte_perm_S (w[57], w[58], selector); w[62] = __byte_perm_S (w[56], w[57], selector); w[61] = __byte_perm_S (w[55], w[56], selector); w[60] = __byte_perm_S (w[54], w[55], selector); w[59] = __byte_perm_S (w[53], w[54], selector); w[58] = __byte_perm_S (w[52], w[53], selector); w[57] = __byte_perm_S (w[51], w[52], selector); w[56] = __byte_perm_S (w[50], w[51], selector); w[55] = __byte_perm_S (w[49], w[50], selector); w[54] = __byte_perm_S (w[48], w[49], selector); w[53] = __byte_perm_S (w[47], w[48], selector); w[52] = __byte_perm_S (w[46], w[47], selector); w[51] = __byte_perm_S (w[45], w[46], selector); w[50] = __byte_perm_S (w[44], w[45], selector); w[49] = __byte_perm_S (w[43], w[44], selector); w[48] = __byte_perm_S (w[42], w[43], selector); w[47] = __byte_perm_S (w[41], w[42], selector); w[46] = __byte_perm_S (w[40], w[41], selector); w[45] = __byte_perm_S (w[39], w[40], selector); w[44] = __byte_perm_S (w[38], w[39], selector); w[43] = __byte_perm_S (w[37], w[38], selector); w[42] = __byte_perm_S (w[36], w[37], selector); w[41] = __byte_perm_S (w[35], w[36], selector); w[40] = __byte_perm_S (w[34], w[35], selector); w[39] = __byte_perm_S (w[33], w[34], selector); w[38] = __byte_perm_S (w[32], w[33], selector); w[37] = __byte_perm_S (w[31], w[32], selector); w[36] = __byte_perm_S (w[30], w[31], selector); w[35] = __byte_perm_S (w[29], w[30], selector); w[34] = __byte_perm_S (w[28], w[29], selector); w[33] = __byte_perm_S (w[27], w[28], selector); w[32] = __byte_perm_S (w[26], w[27], selector); w[31] = __byte_perm_S (w[25], w[26], selector); w[30] = __byte_perm_S (w[24], w[25], selector); w[29] = __byte_perm_S (w[23], w[24], selector); w[28] = __byte_perm_S (w[22], w[23], selector); w[27] = __byte_perm_S (w[21], w[22], selector); w[26] = __byte_perm_S (w[20], w[21], selector); w[25] = __byte_perm_S (w[19], w[20], selector); w[24] = __byte_perm_S (w[18], w[19], selector); w[23] = __byte_perm_S (w[17], w[18], selector); w[22] = __byte_perm_S (w[16], w[17], selector); w[21] = __byte_perm_S (w[15], w[16], selector); w[20] = __byte_perm_S (w[14], w[15], selector); w[19] = __byte_perm_S (w[13], w[14], selector); w[18] = __byte_perm_S (w[12], w[13], selector); w[17] = __byte_perm_S (w[11], w[12], selector); w[16] = __byte_perm_S (w[10], w[11], selector); w[15] = __byte_perm_S (w[ 9], w[10], selector); w[14] = __byte_perm_S (w[ 8], w[ 9], selector); w[13] = __byte_perm_S (w[ 7], w[ 8], selector); w[12] = __byte_perm_S (w[ 6], w[ 7], selector); w[11] = __byte_perm_S (w[ 5], w[ 6], selector); w[10] = __byte_perm_S (w[ 4], w[ 5], selector); w[ 9] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 8] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 7] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 6] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 5] = __byte_perm_S ( 0, w[ 0], selector); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = __byte_perm_S (w[56], w[57], selector); w[62] = __byte_perm_S (w[55], w[56], selector); w[61] = __byte_perm_S (w[54], w[55], selector); w[60] = __byte_perm_S (w[53], w[54], selector); w[59] = __byte_perm_S (w[52], w[53], selector); w[58] = __byte_perm_S (w[51], w[52], selector); w[57] = __byte_perm_S (w[50], w[51], selector); w[56] = __byte_perm_S (w[49], w[50], selector); w[55] = __byte_perm_S (w[48], w[49], selector); w[54] = __byte_perm_S (w[47], w[48], selector); w[53] = __byte_perm_S (w[46], w[47], selector); w[52] = __byte_perm_S (w[45], w[46], selector); w[51] = __byte_perm_S (w[44], w[45], selector); w[50] = __byte_perm_S (w[43], w[44], selector); w[49] = __byte_perm_S (w[42], w[43], selector); w[48] = __byte_perm_S (w[41], w[42], selector); w[47] = __byte_perm_S (w[40], w[41], selector); w[46] = __byte_perm_S (w[39], w[40], selector); w[45] = __byte_perm_S (w[38], w[39], selector); w[44] = __byte_perm_S (w[37], w[38], selector); w[43] = __byte_perm_S (w[36], w[37], selector); w[42] = __byte_perm_S (w[35], w[36], selector); w[41] = __byte_perm_S (w[34], w[35], selector); w[40] = __byte_perm_S (w[33], w[34], selector); w[39] = __byte_perm_S (w[32], w[33], selector); w[38] = __byte_perm_S (w[31], w[32], selector); w[37] = __byte_perm_S (w[30], w[31], selector); w[36] = __byte_perm_S (w[29], w[30], selector); w[35] = __byte_perm_S (w[28], w[29], selector); w[34] = __byte_perm_S (w[27], w[28], selector); w[33] = __byte_perm_S (w[26], w[27], selector); w[32] = __byte_perm_S (w[25], w[26], selector); w[31] = __byte_perm_S (w[24], w[25], selector); w[30] = __byte_perm_S (w[23], w[24], selector); w[29] = __byte_perm_S (w[22], w[23], selector); w[28] = __byte_perm_S (w[21], w[22], selector); w[27] = __byte_perm_S (w[20], w[21], selector); w[26] = __byte_perm_S (w[19], w[20], selector); w[25] = __byte_perm_S (w[18], w[19], selector); w[24] = __byte_perm_S (w[17], w[18], selector); w[23] = __byte_perm_S (w[16], w[17], selector); w[22] = __byte_perm_S (w[15], w[16], selector); w[21] = __byte_perm_S (w[14], w[15], selector); w[20] = __byte_perm_S (w[13], w[14], selector); w[19] = __byte_perm_S (w[12], w[13], selector); w[18] = __byte_perm_S (w[11], w[12], selector); w[17] = __byte_perm_S (w[10], w[11], selector); w[16] = __byte_perm_S (w[ 9], w[10], selector); w[15] = __byte_perm_S (w[ 8], w[ 9], selector); w[14] = __byte_perm_S (w[ 7], w[ 8], selector); w[13] = __byte_perm_S (w[ 6], w[ 7], selector); w[12] = __byte_perm_S (w[ 5], w[ 6], selector); w[11] = __byte_perm_S (w[ 4], w[ 5], selector); w[10] = __byte_perm_S (w[ 3], w[ 4], selector); w[ 9] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 8] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 7] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 6] = __byte_perm_S ( 0, w[ 0], selector); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = __byte_perm_S (w[55], w[56], selector); w[62] = __byte_perm_S (w[54], w[55], selector); w[61] = __byte_perm_S (w[53], w[54], selector); w[60] = __byte_perm_S (w[52], w[53], selector); w[59] = __byte_perm_S (w[51], w[52], selector); w[58] = __byte_perm_S (w[50], w[51], selector); w[57] = __byte_perm_S (w[49], w[50], selector); w[56] = __byte_perm_S (w[48], w[49], selector); w[55] = __byte_perm_S (w[47], w[48], selector); w[54] = __byte_perm_S (w[46], w[47], selector); w[53] = __byte_perm_S (w[45], w[46], selector); w[52] = __byte_perm_S (w[44], w[45], selector); w[51] = __byte_perm_S (w[43], w[44], selector); w[50] = __byte_perm_S (w[42], w[43], selector); w[49] = __byte_perm_S (w[41], w[42], selector); w[48] = __byte_perm_S (w[40], w[41], selector); w[47] = __byte_perm_S (w[39], w[40], selector); w[46] = __byte_perm_S (w[38], w[39], selector); w[45] = __byte_perm_S (w[37], w[38], selector); w[44] = __byte_perm_S (w[36], w[37], selector); w[43] = __byte_perm_S (w[35], w[36], selector); w[42] = __byte_perm_S (w[34], w[35], selector); w[41] = __byte_perm_S (w[33], w[34], selector); w[40] = __byte_perm_S (w[32], w[33], selector); w[39] = __byte_perm_S (w[31], w[32], selector); w[38] = __byte_perm_S (w[30], w[31], selector); w[37] = __byte_perm_S (w[29], w[30], selector); w[36] = __byte_perm_S (w[28], w[29], selector); w[35] = __byte_perm_S (w[27], w[28], selector); w[34] = __byte_perm_S (w[26], w[27], selector); w[33] = __byte_perm_S (w[25], w[26], selector); w[32] = __byte_perm_S (w[24], w[25], selector); w[31] = __byte_perm_S (w[23], w[24], selector); w[30] = __byte_perm_S (w[22], w[23], selector); w[29] = __byte_perm_S (w[21], w[22], selector); w[28] = __byte_perm_S (w[20], w[21], selector); w[27] = __byte_perm_S (w[19], w[20], selector); w[26] = __byte_perm_S (w[18], w[19], selector); w[25] = __byte_perm_S (w[17], w[18], selector); w[24] = __byte_perm_S (w[16], w[17], selector); w[23] = __byte_perm_S (w[15], w[16], selector); w[22] = __byte_perm_S (w[14], w[15], selector); w[21] = __byte_perm_S (w[13], w[14], selector); w[20] = __byte_perm_S (w[12], w[13], selector); w[19] = __byte_perm_S (w[11], w[12], selector); w[18] = __byte_perm_S (w[10], w[11], selector); w[17] = __byte_perm_S (w[ 9], w[10], selector); w[16] = __byte_perm_S (w[ 8], w[ 9], selector); w[15] = __byte_perm_S (w[ 7], w[ 8], selector); w[14] = __byte_perm_S (w[ 6], w[ 7], selector); w[13] = __byte_perm_S (w[ 5], w[ 6], selector); w[12] = __byte_perm_S (w[ 4], w[ 5], selector); w[11] = __byte_perm_S (w[ 3], w[ 4], selector); w[10] = __byte_perm_S (w[ 2], w[ 3], selector); w[ 9] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 8] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 7] = __byte_perm_S ( 0, w[ 0], selector); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = __byte_perm_S (w[54], w[55], selector); w[62] = __byte_perm_S (w[53], w[54], selector); w[61] = __byte_perm_S (w[52], w[53], selector); w[60] = __byte_perm_S (w[51], w[52], selector); w[59] = __byte_perm_S (w[50], w[51], selector); w[58] = __byte_perm_S (w[49], w[50], selector); w[57] = __byte_perm_S (w[48], w[49], selector); w[56] = __byte_perm_S (w[47], w[48], selector); w[55] = __byte_perm_S (w[46], w[47], selector); w[54] = __byte_perm_S (w[45], w[46], selector); w[53] = __byte_perm_S (w[44], w[45], selector); w[52] = __byte_perm_S (w[43], w[44], selector); w[51] = __byte_perm_S (w[42], w[43], selector); w[50] = __byte_perm_S (w[41], w[42], selector); w[49] = __byte_perm_S (w[40], w[41], selector); w[48] = __byte_perm_S (w[39], w[40], selector); w[47] = __byte_perm_S (w[38], w[39], selector); w[46] = __byte_perm_S (w[37], w[38], selector); w[45] = __byte_perm_S (w[36], w[37], selector); w[44] = __byte_perm_S (w[35], w[36], selector); w[43] = __byte_perm_S (w[34], w[35], selector); w[42] = __byte_perm_S (w[33], w[34], selector); w[41] = __byte_perm_S (w[32], w[33], selector); w[40] = __byte_perm_S (w[31], w[32], selector); w[39] = __byte_perm_S (w[30], w[31], selector); w[38] = __byte_perm_S (w[29], w[30], selector); w[37] = __byte_perm_S (w[28], w[29], selector); w[36] = __byte_perm_S (w[27], w[28], selector); w[35] = __byte_perm_S (w[26], w[27], selector); w[34] = __byte_perm_S (w[25], w[26], selector); w[33] = __byte_perm_S (w[24], w[25], selector); w[32] = __byte_perm_S (w[23], w[24], selector); w[31] = __byte_perm_S (w[22], w[23], selector); w[30] = __byte_perm_S (w[21], w[22], selector); w[29] = __byte_perm_S (w[20], w[21], selector); w[28] = __byte_perm_S (w[19], w[20], selector); w[27] = __byte_perm_S (w[18], w[19], selector); w[26] = __byte_perm_S (w[17], w[18], selector); w[25] = __byte_perm_S (w[16], w[17], selector); w[24] = __byte_perm_S (w[15], w[16], selector); w[23] = __byte_perm_S (w[14], w[15], selector); w[22] = __byte_perm_S (w[13], w[14], selector); w[21] = __byte_perm_S (w[12], w[13], selector); w[20] = __byte_perm_S (w[11], w[12], selector); w[19] = __byte_perm_S (w[10], w[11], selector); w[18] = __byte_perm_S (w[ 9], w[10], selector); w[17] = __byte_perm_S (w[ 8], w[ 9], selector); w[16] = __byte_perm_S (w[ 7], w[ 8], selector); w[15] = __byte_perm_S (w[ 6], w[ 7], selector); w[14] = __byte_perm_S (w[ 5], w[ 6], selector); w[13] = __byte_perm_S (w[ 4], w[ 5], selector); w[12] = __byte_perm_S (w[ 3], w[ 4], selector); w[11] = __byte_perm_S (w[ 2], w[ 3], selector); w[10] = __byte_perm_S (w[ 1], w[ 2], selector); w[ 9] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 8] = __byte_perm_S ( 0, w[ 0], selector); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = __byte_perm_S (w[53], w[54], selector); w[62] = __byte_perm_S (w[52], w[53], selector); w[61] = __byte_perm_S (w[51], w[52], selector); w[60] = __byte_perm_S (w[50], w[51], selector); w[59] = __byte_perm_S (w[49], w[50], selector); w[58] = __byte_perm_S (w[48], w[49], selector); w[57] = __byte_perm_S (w[47], w[48], selector); w[56] = __byte_perm_S (w[46], w[47], selector); w[55] = __byte_perm_S (w[45], w[46], selector); w[54] = __byte_perm_S (w[44], w[45], selector); w[53] = __byte_perm_S (w[43], w[44], selector); w[52] = __byte_perm_S (w[42], w[43], selector); w[51] = __byte_perm_S (w[41], w[42], selector); w[50] = __byte_perm_S (w[40], w[41], selector); w[49] = __byte_perm_S (w[39], w[40], selector); w[48] = __byte_perm_S (w[38], w[39], selector); w[47] = __byte_perm_S (w[37], w[38], selector); w[46] = __byte_perm_S (w[36], w[37], selector); w[45] = __byte_perm_S (w[35], w[36], selector); w[44] = __byte_perm_S (w[34], w[35], selector); w[43] = __byte_perm_S (w[33], w[34], selector); w[42] = __byte_perm_S (w[32], w[33], selector); w[41] = __byte_perm_S (w[31], w[32], selector); w[40] = __byte_perm_S (w[30], w[31], selector); w[39] = __byte_perm_S (w[29], w[30], selector); w[38] = __byte_perm_S (w[28], w[29], selector); w[37] = __byte_perm_S (w[27], w[28], selector); w[36] = __byte_perm_S (w[26], w[27], selector); w[35] = __byte_perm_S (w[25], w[26], selector); w[34] = __byte_perm_S (w[24], w[25], selector); w[33] = __byte_perm_S (w[23], w[24], selector); w[32] = __byte_perm_S (w[22], w[23], selector); w[31] = __byte_perm_S (w[21], w[22], selector); w[30] = __byte_perm_S (w[20], w[21], selector); w[29] = __byte_perm_S (w[19], w[20], selector); w[28] = __byte_perm_S (w[18], w[19], selector); w[27] = __byte_perm_S (w[17], w[18], selector); w[26] = __byte_perm_S (w[16], w[17], selector); w[25] = __byte_perm_S (w[15], w[16], selector); w[24] = __byte_perm_S (w[14], w[15], selector); w[23] = __byte_perm_S (w[13], w[14], selector); w[22] = __byte_perm_S (w[12], w[13], selector); w[21] = __byte_perm_S (w[11], w[12], selector); w[20] = __byte_perm_S (w[10], w[11], selector); w[19] = __byte_perm_S (w[ 9], w[10], selector); w[18] = __byte_perm_S (w[ 8], w[ 9], selector); w[17] = __byte_perm_S (w[ 7], w[ 8], selector); w[16] = __byte_perm_S (w[ 6], w[ 7], selector); w[15] = __byte_perm_S (w[ 5], w[ 6], selector); w[14] = __byte_perm_S (w[ 4], w[ 5], selector); w[13] = __byte_perm_S (w[ 3], w[ 4], selector); w[12] = __byte_perm_S (w[ 2], w[ 3], selector); w[11] = __byte_perm_S (w[ 1], w[ 2], selector); w[10] = __byte_perm_S (w[ 0], w[ 1], selector); w[ 9] = __byte_perm_S ( 0, w[ 0], selector); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = __byte_perm_S (w[52], w[53], selector); w[62] = __byte_perm_S (w[51], w[52], selector); w[61] = __byte_perm_S (w[50], w[51], selector); w[60] = __byte_perm_S (w[49], w[50], selector); w[59] = __byte_perm_S (w[48], w[49], selector); w[58] = __byte_perm_S (w[47], w[48], selector); w[57] = __byte_perm_S (w[46], w[47], selector); w[56] = __byte_perm_S (w[45], w[46], selector); w[55] = __byte_perm_S (w[44], w[45], selector); w[54] = __byte_perm_S (w[43], w[44], selector); w[53] = __byte_perm_S (w[42], w[43], selector); w[52] = __byte_perm_S (w[41], w[42], selector); w[51] = __byte_perm_S (w[40], w[41], selector); w[50] = __byte_perm_S (w[39], w[40], selector); w[49] = __byte_perm_S (w[38], w[39], selector); w[48] = __byte_perm_S (w[37], w[38], selector); w[47] = __byte_perm_S (w[36], w[37], selector); w[46] = __byte_perm_S (w[35], w[36], selector); w[45] = __byte_perm_S (w[34], w[35], selector); w[44] = __byte_perm_S (w[33], w[34], selector); w[43] = __byte_perm_S (w[32], w[33], selector); w[42] = __byte_perm_S (w[31], w[32], selector); w[41] = __byte_perm_S (w[30], w[31], selector); w[40] = __byte_perm_S (w[29], w[30], selector); w[39] = __byte_perm_S (w[28], w[29], selector); w[38] = __byte_perm_S (w[27], w[28], selector); w[37] = __byte_perm_S (w[26], w[27], selector); w[36] = __byte_perm_S (w[25], w[26], selector); w[35] = __byte_perm_S (w[24], w[25], selector); w[34] = __byte_perm_S (w[23], w[24], selector); w[33] = __byte_perm_S (w[22], w[23], selector); w[32] = __byte_perm_S (w[21], w[22], selector); w[31] = __byte_perm_S (w[20], w[21], selector); w[30] = __byte_perm_S (w[19], w[20], selector); w[29] = __byte_perm_S (w[18], w[19], selector); w[28] = __byte_perm_S (w[17], w[18], selector); w[27] = __byte_perm_S (w[16], w[17], selector); w[26] = __byte_perm_S (w[15], w[16], selector); w[25] = __byte_perm_S (w[14], w[15], selector); w[24] = __byte_perm_S (w[13], w[14], selector); w[23] = __byte_perm_S (w[12], w[13], selector); w[22] = __byte_perm_S (w[11], w[12], selector); w[21] = __byte_perm_S (w[10], w[11], selector); w[20] = __byte_perm_S (w[ 9], w[10], selector); w[19] = __byte_perm_S (w[ 8], w[ 9], selector); w[18] = __byte_perm_S (w[ 7], w[ 8], selector); w[17] = __byte_perm_S (w[ 6], w[ 7], selector); w[16] = __byte_perm_S (w[ 5], w[ 6], selector); w[15] = __byte_perm_S (w[ 4], w[ 5], selector); w[14] = __byte_perm_S (w[ 3], w[ 4], selector); w[13] = __byte_perm_S (w[ 2], w[ 3], selector); w[12] = __byte_perm_S (w[ 1], w[ 2], selector); w[11] = __byte_perm_S (w[ 0], w[ 1], selector); w[10] = __byte_perm_S ( 0, w[ 0], selector); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = __byte_perm_S (w[51], w[52], selector); w[62] = __byte_perm_S (w[50], w[51], selector); w[61] = __byte_perm_S (w[49], w[50], selector); w[60] = __byte_perm_S (w[48], w[49], selector); w[59] = __byte_perm_S (w[47], w[48], selector); w[58] = __byte_perm_S (w[46], w[47], selector); w[57] = __byte_perm_S (w[45], w[46], selector); w[56] = __byte_perm_S (w[44], w[45], selector); w[55] = __byte_perm_S (w[43], w[44], selector); w[54] = __byte_perm_S (w[42], w[43], selector); w[53] = __byte_perm_S (w[41], w[42], selector); w[52] = __byte_perm_S (w[40], w[41], selector); w[51] = __byte_perm_S (w[39], w[40], selector); w[50] = __byte_perm_S (w[38], w[39], selector); w[49] = __byte_perm_S (w[37], w[38], selector); w[48] = __byte_perm_S (w[36], w[37], selector); w[47] = __byte_perm_S (w[35], w[36], selector); w[46] = __byte_perm_S (w[34], w[35], selector); w[45] = __byte_perm_S (w[33], w[34], selector); w[44] = __byte_perm_S (w[32], w[33], selector); w[43] = __byte_perm_S (w[31], w[32], selector); w[42] = __byte_perm_S (w[30], w[31], selector); w[41] = __byte_perm_S (w[29], w[30], selector); w[40] = __byte_perm_S (w[28], w[29], selector); w[39] = __byte_perm_S (w[27], w[28], selector); w[38] = __byte_perm_S (w[26], w[27], selector); w[37] = __byte_perm_S (w[25], w[26], selector); w[36] = __byte_perm_S (w[24], w[25], selector); w[35] = __byte_perm_S (w[23], w[24], selector); w[34] = __byte_perm_S (w[22], w[23], selector); w[33] = __byte_perm_S (w[21], w[22], selector); w[32] = __byte_perm_S (w[20], w[21], selector); w[31] = __byte_perm_S (w[19], w[20], selector); w[30] = __byte_perm_S (w[18], w[19], selector); w[29] = __byte_perm_S (w[17], w[18], selector); w[28] = __byte_perm_S (w[16], w[17], selector); w[27] = __byte_perm_S (w[15], w[16], selector); w[26] = __byte_perm_S (w[14], w[15], selector); w[25] = __byte_perm_S (w[13], w[14], selector); w[24] = __byte_perm_S (w[12], w[13], selector); w[23] = __byte_perm_S (w[11], w[12], selector); w[22] = __byte_perm_S (w[10], w[11], selector); w[21] = __byte_perm_S (w[ 9], w[10], selector); w[20] = __byte_perm_S (w[ 8], w[ 9], selector); w[19] = __byte_perm_S (w[ 7], w[ 8], selector); w[18] = __byte_perm_S (w[ 6], w[ 7], selector); w[17] = __byte_perm_S (w[ 5], w[ 6], selector); w[16] = __byte_perm_S (w[ 4], w[ 5], selector); w[15] = __byte_perm_S (w[ 3], w[ 4], selector); w[14] = __byte_perm_S (w[ 2], w[ 3], selector); w[13] = __byte_perm_S (w[ 1], w[ 2], selector); w[12] = __byte_perm_S (w[ 0], w[ 1], selector); w[11] = __byte_perm_S ( 0, w[ 0], selector); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = __byte_perm_S (w[50], w[51], selector); w[62] = __byte_perm_S (w[49], w[50], selector); w[61] = __byte_perm_S (w[48], w[49], selector); w[60] = __byte_perm_S (w[47], w[48], selector); w[59] = __byte_perm_S (w[46], w[47], selector); w[58] = __byte_perm_S (w[45], w[46], selector); w[57] = __byte_perm_S (w[44], w[45], selector); w[56] = __byte_perm_S (w[43], w[44], selector); w[55] = __byte_perm_S (w[42], w[43], selector); w[54] = __byte_perm_S (w[41], w[42], selector); w[53] = __byte_perm_S (w[40], w[41], selector); w[52] = __byte_perm_S (w[39], w[40], selector); w[51] = __byte_perm_S (w[38], w[39], selector); w[50] = __byte_perm_S (w[37], w[38], selector); w[49] = __byte_perm_S (w[36], w[37], selector); w[48] = __byte_perm_S (w[35], w[36], selector); w[47] = __byte_perm_S (w[34], w[35], selector); w[46] = __byte_perm_S (w[33], w[34], selector); w[45] = __byte_perm_S (w[32], w[33], selector); w[44] = __byte_perm_S (w[31], w[32], selector); w[43] = __byte_perm_S (w[30], w[31], selector); w[42] = __byte_perm_S (w[29], w[30], selector); w[41] = __byte_perm_S (w[28], w[29], selector); w[40] = __byte_perm_S (w[27], w[28], selector); w[39] = __byte_perm_S (w[26], w[27], selector); w[38] = __byte_perm_S (w[25], w[26], selector); w[37] = __byte_perm_S (w[24], w[25], selector); w[36] = __byte_perm_S (w[23], w[24], selector); w[35] = __byte_perm_S (w[22], w[23], selector); w[34] = __byte_perm_S (w[21], w[22], selector); w[33] = __byte_perm_S (w[20], w[21], selector); w[32] = __byte_perm_S (w[19], w[20], selector); w[31] = __byte_perm_S (w[18], w[19], selector); w[30] = __byte_perm_S (w[17], w[18], selector); w[29] = __byte_perm_S (w[16], w[17], selector); w[28] = __byte_perm_S (w[15], w[16], selector); w[27] = __byte_perm_S (w[14], w[15], selector); w[26] = __byte_perm_S (w[13], w[14], selector); w[25] = __byte_perm_S (w[12], w[13], selector); w[24] = __byte_perm_S (w[11], w[12], selector); w[23] = __byte_perm_S (w[10], w[11], selector); w[22] = __byte_perm_S (w[ 9], w[10], selector); w[21] = __byte_perm_S (w[ 8], w[ 9], selector); w[20] = __byte_perm_S (w[ 7], w[ 8], selector); w[19] = __byte_perm_S (w[ 6], w[ 7], selector); w[18] = __byte_perm_S (w[ 5], w[ 6], selector); w[17] = __byte_perm_S (w[ 4], w[ 5], selector); w[16] = __byte_perm_S (w[ 3], w[ 4], selector); w[15] = __byte_perm_S (w[ 2], w[ 3], selector); w[14] = __byte_perm_S (w[ 1], w[ 2], selector); w[13] = __byte_perm_S (w[ 0], w[ 1], selector); w[12] = __byte_perm_S ( 0, w[ 0], selector); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = __byte_perm_S (w[49], w[50], selector); w[62] = __byte_perm_S (w[48], w[49], selector); w[61] = __byte_perm_S (w[47], w[48], selector); w[60] = __byte_perm_S (w[46], w[47], selector); w[59] = __byte_perm_S (w[45], w[46], selector); w[58] = __byte_perm_S (w[44], w[45], selector); w[57] = __byte_perm_S (w[43], w[44], selector); w[56] = __byte_perm_S (w[42], w[43], selector); w[55] = __byte_perm_S (w[41], w[42], selector); w[54] = __byte_perm_S (w[40], w[41], selector); w[53] = __byte_perm_S (w[39], w[40], selector); w[52] = __byte_perm_S (w[38], w[39], selector); w[51] = __byte_perm_S (w[37], w[38], selector); w[50] = __byte_perm_S (w[36], w[37], selector); w[49] = __byte_perm_S (w[35], w[36], selector); w[48] = __byte_perm_S (w[34], w[35], selector); w[47] = __byte_perm_S (w[33], w[34], selector); w[46] = __byte_perm_S (w[32], w[33], selector); w[45] = __byte_perm_S (w[31], w[32], selector); w[44] = __byte_perm_S (w[30], w[31], selector); w[43] = __byte_perm_S (w[29], w[30], selector); w[42] = __byte_perm_S (w[28], w[29], selector); w[41] = __byte_perm_S (w[27], w[28], selector); w[40] = __byte_perm_S (w[26], w[27], selector); w[39] = __byte_perm_S (w[25], w[26], selector); w[38] = __byte_perm_S (w[24], w[25], selector); w[37] = __byte_perm_S (w[23], w[24], selector); w[36] = __byte_perm_S (w[22], w[23], selector); w[35] = __byte_perm_S (w[21], w[22], selector); w[34] = __byte_perm_S (w[20], w[21], selector); w[33] = __byte_perm_S (w[19], w[20], selector); w[32] = __byte_perm_S (w[18], w[19], selector); w[31] = __byte_perm_S (w[17], w[18], selector); w[30] = __byte_perm_S (w[16], w[17], selector); w[29] = __byte_perm_S (w[15], w[16], selector); w[28] = __byte_perm_S (w[14], w[15], selector); w[27] = __byte_perm_S (w[13], w[14], selector); w[26] = __byte_perm_S (w[12], w[13], selector); w[25] = __byte_perm_S (w[11], w[12], selector); w[24] = __byte_perm_S (w[10], w[11], selector); w[23] = __byte_perm_S (w[ 9], w[10], selector); w[22] = __byte_perm_S (w[ 8], w[ 9], selector); w[21] = __byte_perm_S (w[ 7], w[ 8], selector); w[20] = __byte_perm_S (w[ 6], w[ 7], selector); w[19] = __byte_perm_S (w[ 5], w[ 6], selector); w[18] = __byte_perm_S (w[ 4], w[ 5], selector); w[17] = __byte_perm_S (w[ 3], w[ 4], selector); w[16] = __byte_perm_S (w[ 2], w[ 3], selector); w[15] = __byte_perm_S (w[ 1], w[ 2], selector); w[14] = __byte_perm_S (w[ 0], w[ 1], selector); w[13] = __byte_perm_S ( 0, w[ 0], selector); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = __byte_perm_S (w[48], w[49], selector); w[62] = __byte_perm_S (w[47], w[48], selector); w[61] = __byte_perm_S (w[46], w[47], selector); w[60] = __byte_perm_S (w[45], w[46], selector); w[59] = __byte_perm_S (w[44], w[45], selector); w[58] = __byte_perm_S (w[43], w[44], selector); w[57] = __byte_perm_S (w[42], w[43], selector); w[56] = __byte_perm_S (w[41], w[42], selector); w[55] = __byte_perm_S (w[40], w[41], selector); w[54] = __byte_perm_S (w[39], w[40], selector); w[53] = __byte_perm_S (w[38], w[39], selector); w[52] = __byte_perm_S (w[37], w[38], selector); w[51] = __byte_perm_S (w[36], w[37], selector); w[50] = __byte_perm_S (w[35], w[36], selector); w[49] = __byte_perm_S (w[34], w[35], selector); w[48] = __byte_perm_S (w[33], w[34], selector); w[47] = __byte_perm_S (w[32], w[33], selector); w[46] = __byte_perm_S (w[31], w[32], selector); w[45] = __byte_perm_S (w[30], w[31], selector); w[44] = __byte_perm_S (w[29], w[30], selector); w[43] = __byte_perm_S (w[28], w[29], selector); w[42] = __byte_perm_S (w[27], w[28], selector); w[41] = __byte_perm_S (w[26], w[27], selector); w[40] = __byte_perm_S (w[25], w[26], selector); w[39] = __byte_perm_S (w[24], w[25], selector); w[38] = __byte_perm_S (w[23], w[24], selector); w[37] = __byte_perm_S (w[22], w[23], selector); w[36] = __byte_perm_S (w[21], w[22], selector); w[35] = __byte_perm_S (w[20], w[21], selector); w[34] = __byte_perm_S (w[19], w[20], selector); w[33] = __byte_perm_S (w[18], w[19], selector); w[32] = __byte_perm_S (w[17], w[18], selector); w[31] = __byte_perm_S (w[16], w[17], selector); w[30] = __byte_perm_S (w[15], w[16], selector); w[29] = __byte_perm_S (w[14], w[15], selector); w[28] = __byte_perm_S (w[13], w[14], selector); w[27] = __byte_perm_S (w[12], w[13], selector); w[26] = __byte_perm_S (w[11], w[12], selector); w[25] = __byte_perm_S (w[10], w[11], selector); w[24] = __byte_perm_S (w[ 9], w[10], selector); w[23] = __byte_perm_S (w[ 8], w[ 9], selector); w[22] = __byte_perm_S (w[ 7], w[ 8], selector); w[21] = __byte_perm_S (w[ 6], w[ 7], selector); w[20] = __byte_perm_S (w[ 5], w[ 6], selector); w[19] = __byte_perm_S (w[ 4], w[ 5], selector); w[18] = __byte_perm_S (w[ 3], w[ 4], selector); w[17] = __byte_perm_S (w[ 2], w[ 3], selector); w[16] = __byte_perm_S (w[ 1], w[ 2], selector); w[15] = __byte_perm_S (w[ 0], w[ 1], selector); w[14] = __byte_perm_S ( 0, w[ 0], selector); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = __byte_perm_S (w[47], w[48], selector); w[62] = __byte_perm_S (w[46], w[47], selector); w[61] = __byte_perm_S (w[45], w[46], selector); w[60] = __byte_perm_S (w[44], w[45], selector); w[59] = __byte_perm_S (w[43], w[44], selector); w[58] = __byte_perm_S (w[42], w[43], selector); w[57] = __byte_perm_S (w[41], w[42], selector); w[56] = __byte_perm_S (w[40], w[41], selector); w[55] = __byte_perm_S (w[39], w[40], selector); w[54] = __byte_perm_S (w[38], w[39], selector); w[53] = __byte_perm_S (w[37], w[38], selector); w[52] = __byte_perm_S (w[36], w[37], selector); w[51] = __byte_perm_S (w[35], w[36], selector); w[50] = __byte_perm_S (w[34], w[35], selector); w[49] = __byte_perm_S (w[33], w[34], selector); w[48] = __byte_perm_S (w[32], w[33], selector); w[47] = __byte_perm_S (w[31], w[32], selector); w[46] = __byte_perm_S (w[30], w[31], selector); w[45] = __byte_perm_S (w[29], w[30], selector); w[44] = __byte_perm_S (w[28], w[29], selector); w[43] = __byte_perm_S (w[27], w[28], selector); w[42] = __byte_perm_S (w[26], w[27], selector); w[41] = __byte_perm_S (w[25], w[26], selector); w[40] = __byte_perm_S (w[24], w[25], selector); w[39] = __byte_perm_S (w[23], w[24], selector); w[38] = __byte_perm_S (w[22], w[23], selector); w[37] = __byte_perm_S (w[21], w[22], selector); w[36] = __byte_perm_S (w[20], w[21], selector); w[35] = __byte_perm_S (w[19], w[20], selector); w[34] = __byte_perm_S (w[18], w[19], selector); w[33] = __byte_perm_S (w[17], w[18], selector); w[32] = __byte_perm_S (w[16], w[17], selector); w[31] = __byte_perm_S (w[15], w[16], selector); w[30] = __byte_perm_S (w[14], w[15], selector); w[29] = __byte_perm_S (w[13], w[14], selector); w[28] = __byte_perm_S (w[12], w[13], selector); w[27] = __byte_perm_S (w[11], w[12], selector); w[26] = __byte_perm_S (w[10], w[11], selector); w[25] = __byte_perm_S (w[ 9], w[10], selector); w[24] = __byte_perm_S (w[ 8], w[ 9], selector); w[23] = __byte_perm_S (w[ 7], w[ 8], selector); w[22] = __byte_perm_S (w[ 6], w[ 7], selector); w[21] = __byte_perm_S (w[ 5], w[ 6], selector); w[20] = __byte_perm_S (w[ 4], w[ 5], selector); w[19] = __byte_perm_S (w[ 3], w[ 4], selector); w[18] = __byte_perm_S (w[ 2], w[ 3], selector); w[17] = __byte_perm_S (w[ 1], w[ 2], selector); w[16] = __byte_perm_S (w[ 0], w[ 1], selector); w[15] = __byte_perm_S ( 0, w[ 0], selector); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = __byte_perm_S (w[46], w[47], selector); w[62] = __byte_perm_S (w[45], w[46], selector); w[61] = __byte_perm_S (w[44], w[45], selector); w[60] = __byte_perm_S (w[43], w[44], selector); w[59] = __byte_perm_S (w[42], w[43], selector); w[58] = __byte_perm_S (w[41], w[42], selector); w[57] = __byte_perm_S (w[40], w[41], selector); w[56] = __byte_perm_S (w[39], w[40], selector); w[55] = __byte_perm_S (w[38], w[39], selector); w[54] = __byte_perm_S (w[37], w[38], selector); w[53] = __byte_perm_S (w[36], w[37], selector); w[52] = __byte_perm_S (w[35], w[36], selector); w[51] = __byte_perm_S (w[34], w[35], selector); w[50] = __byte_perm_S (w[33], w[34], selector); w[49] = __byte_perm_S (w[32], w[33], selector); w[48] = __byte_perm_S (w[31], w[32], selector); w[47] = __byte_perm_S (w[30], w[31], selector); w[46] = __byte_perm_S (w[29], w[30], selector); w[45] = __byte_perm_S (w[28], w[29], selector); w[44] = __byte_perm_S (w[27], w[28], selector); w[43] = __byte_perm_S (w[26], w[27], selector); w[42] = __byte_perm_S (w[25], w[26], selector); w[41] = __byte_perm_S (w[24], w[25], selector); w[40] = __byte_perm_S (w[23], w[24], selector); w[39] = __byte_perm_S (w[22], w[23], selector); w[38] = __byte_perm_S (w[21], w[22], selector); w[37] = __byte_perm_S (w[20], w[21], selector); w[36] = __byte_perm_S (w[19], w[20], selector); w[35] = __byte_perm_S (w[18], w[19], selector); w[34] = __byte_perm_S (w[17], w[18], selector); w[33] = __byte_perm_S (w[16], w[17], selector); w[32] = __byte_perm_S (w[15], w[16], selector); w[31] = __byte_perm_S (w[14], w[15], selector); w[30] = __byte_perm_S (w[13], w[14], selector); w[29] = __byte_perm_S (w[12], w[13], selector); w[28] = __byte_perm_S (w[11], w[12], selector); w[27] = __byte_perm_S (w[10], w[11], selector); w[26] = __byte_perm_S (w[ 9], w[10], selector); w[25] = __byte_perm_S (w[ 8], w[ 9], selector); w[24] = __byte_perm_S (w[ 7], w[ 8], selector); w[23] = __byte_perm_S (w[ 6], w[ 7], selector); w[22] = __byte_perm_S (w[ 5], w[ 6], selector); w[21] = __byte_perm_S (w[ 4], w[ 5], selector); w[20] = __byte_perm_S (w[ 3], w[ 4], selector); w[19] = __byte_perm_S (w[ 2], w[ 3], selector); w[18] = __byte_perm_S (w[ 1], w[ 2], selector); w[17] = __byte_perm_S (w[ 0], w[ 1], selector); w[16] = __byte_perm_S ( 0, w[ 0], selector); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = __byte_perm_S (w[45], w[46], selector); w[62] = __byte_perm_S (w[44], w[45], selector); w[61] = __byte_perm_S (w[43], w[44], selector); w[60] = __byte_perm_S (w[42], w[43], selector); w[59] = __byte_perm_S (w[41], w[42], selector); w[58] = __byte_perm_S (w[40], w[41], selector); w[57] = __byte_perm_S (w[39], w[40], selector); w[56] = __byte_perm_S (w[38], w[39], selector); w[55] = __byte_perm_S (w[37], w[38], selector); w[54] = __byte_perm_S (w[36], w[37], selector); w[53] = __byte_perm_S (w[35], w[36], selector); w[52] = __byte_perm_S (w[34], w[35], selector); w[51] = __byte_perm_S (w[33], w[34], selector); w[50] = __byte_perm_S (w[32], w[33], selector); w[49] = __byte_perm_S (w[31], w[32], selector); w[48] = __byte_perm_S (w[30], w[31], selector); w[47] = __byte_perm_S (w[29], w[30], selector); w[46] = __byte_perm_S (w[28], w[29], selector); w[45] = __byte_perm_S (w[27], w[28], selector); w[44] = __byte_perm_S (w[26], w[27], selector); w[43] = __byte_perm_S (w[25], w[26], selector); w[42] = __byte_perm_S (w[24], w[25], selector); w[41] = __byte_perm_S (w[23], w[24], selector); w[40] = __byte_perm_S (w[22], w[23], selector); w[39] = __byte_perm_S (w[21], w[22], selector); w[38] = __byte_perm_S (w[20], w[21], selector); w[37] = __byte_perm_S (w[19], w[20], selector); w[36] = __byte_perm_S (w[18], w[19], selector); w[35] = __byte_perm_S (w[17], w[18], selector); w[34] = __byte_perm_S (w[16], w[17], selector); w[33] = __byte_perm_S (w[15], w[16], selector); w[32] = __byte_perm_S (w[14], w[15], selector); w[31] = __byte_perm_S (w[13], w[14], selector); w[30] = __byte_perm_S (w[12], w[13], selector); w[29] = __byte_perm_S (w[11], w[12], selector); w[28] = __byte_perm_S (w[10], w[11], selector); w[27] = __byte_perm_S (w[ 9], w[10], selector); w[26] = __byte_perm_S (w[ 8], w[ 9], selector); w[25] = __byte_perm_S (w[ 7], w[ 8], selector); w[24] = __byte_perm_S (w[ 6], w[ 7], selector); w[23] = __byte_perm_S (w[ 5], w[ 6], selector); w[22] = __byte_perm_S (w[ 4], w[ 5], selector); w[21] = __byte_perm_S (w[ 3], w[ 4], selector); w[20] = __byte_perm_S (w[ 2], w[ 3], selector); w[19] = __byte_perm_S (w[ 1], w[ 2], selector); w[18] = __byte_perm_S (w[ 0], w[ 1], selector); w[17] = __byte_perm_S ( 0, w[ 0], selector); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = __byte_perm_S (w[44], w[45], selector); w[62] = __byte_perm_S (w[43], w[44], selector); w[61] = __byte_perm_S (w[42], w[43], selector); w[60] = __byte_perm_S (w[41], w[42], selector); w[59] = __byte_perm_S (w[40], w[41], selector); w[58] = __byte_perm_S (w[39], w[40], selector); w[57] = __byte_perm_S (w[38], w[39], selector); w[56] = __byte_perm_S (w[37], w[38], selector); w[55] = __byte_perm_S (w[36], w[37], selector); w[54] = __byte_perm_S (w[35], w[36], selector); w[53] = __byte_perm_S (w[34], w[35], selector); w[52] = __byte_perm_S (w[33], w[34], selector); w[51] = __byte_perm_S (w[32], w[33], selector); w[50] = __byte_perm_S (w[31], w[32], selector); w[49] = __byte_perm_S (w[30], w[31], selector); w[48] = __byte_perm_S (w[29], w[30], selector); w[47] = __byte_perm_S (w[28], w[29], selector); w[46] = __byte_perm_S (w[27], w[28], selector); w[45] = __byte_perm_S (w[26], w[27], selector); w[44] = __byte_perm_S (w[25], w[26], selector); w[43] = __byte_perm_S (w[24], w[25], selector); w[42] = __byte_perm_S (w[23], w[24], selector); w[41] = __byte_perm_S (w[22], w[23], selector); w[40] = __byte_perm_S (w[21], w[22], selector); w[39] = __byte_perm_S (w[20], w[21], selector); w[38] = __byte_perm_S (w[19], w[20], selector); w[37] = __byte_perm_S (w[18], w[19], selector); w[36] = __byte_perm_S (w[17], w[18], selector); w[35] = __byte_perm_S (w[16], w[17], selector); w[34] = __byte_perm_S (w[15], w[16], selector); w[33] = __byte_perm_S (w[14], w[15], selector); w[32] = __byte_perm_S (w[13], w[14], selector); w[31] = __byte_perm_S (w[12], w[13], selector); w[30] = __byte_perm_S (w[11], w[12], selector); w[29] = __byte_perm_S (w[10], w[11], selector); w[28] = __byte_perm_S (w[ 9], w[10], selector); w[27] = __byte_perm_S (w[ 8], w[ 9], selector); w[26] = __byte_perm_S (w[ 7], w[ 8], selector); w[25] = __byte_perm_S (w[ 6], w[ 7], selector); w[24] = __byte_perm_S (w[ 5], w[ 6], selector); w[23] = __byte_perm_S (w[ 4], w[ 5], selector); w[22] = __byte_perm_S (w[ 3], w[ 4], selector); w[21] = __byte_perm_S (w[ 2], w[ 3], selector); w[20] = __byte_perm_S (w[ 1], w[ 2], selector); w[19] = __byte_perm_S (w[ 0], w[ 1], selector); w[18] = __byte_perm_S ( 0, w[ 0], selector); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = __byte_perm_S (w[43], w[44], selector); w[62] = __byte_perm_S (w[42], w[43], selector); w[61] = __byte_perm_S (w[41], w[42], selector); w[60] = __byte_perm_S (w[40], w[41], selector); w[59] = __byte_perm_S (w[39], w[40], selector); w[58] = __byte_perm_S (w[38], w[39], selector); w[57] = __byte_perm_S (w[37], w[38], selector); w[56] = __byte_perm_S (w[36], w[37], selector); w[55] = __byte_perm_S (w[35], w[36], selector); w[54] = __byte_perm_S (w[34], w[35], selector); w[53] = __byte_perm_S (w[33], w[34], selector); w[52] = __byte_perm_S (w[32], w[33], selector); w[51] = __byte_perm_S (w[31], w[32], selector); w[50] = __byte_perm_S (w[30], w[31], selector); w[49] = __byte_perm_S (w[29], w[30], selector); w[48] = __byte_perm_S (w[28], w[29], selector); w[47] = __byte_perm_S (w[27], w[28], selector); w[46] = __byte_perm_S (w[26], w[27], selector); w[45] = __byte_perm_S (w[25], w[26], selector); w[44] = __byte_perm_S (w[24], w[25], selector); w[43] = __byte_perm_S (w[23], w[24], selector); w[42] = __byte_perm_S (w[22], w[23], selector); w[41] = __byte_perm_S (w[21], w[22], selector); w[40] = __byte_perm_S (w[20], w[21], selector); w[39] = __byte_perm_S (w[19], w[20], selector); w[38] = __byte_perm_S (w[18], w[19], selector); w[37] = __byte_perm_S (w[17], w[18], selector); w[36] = __byte_perm_S (w[16], w[17], selector); w[35] = __byte_perm_S (w[15], w[16], selector); w[34] = __byte_perm_S (w[14], w[15], selector); w[33] = __byte_perm_S (w[13], w[14], selector); w[32] = __byte_perm_S (w[12], w[13], selector); w[31] = __byte_perm_S (w[11], w[12], selector); w[30] = __byte_perm_S (w[10], w[11], selector); w[29] = __byte_perm_S (w[ 9], w[10], selector); w[28] = __byte_perm_S (w[ 8], w[ 9], selector); w[27] = __byte_perm_S (w[ 7], w[ 8], selector); w[26] = __byte_perm_S (w[ 6], w[ 7], selector); w[25] = __byte_perm_S (w[ 5], w[ 6], selector); w[24] = __byte_perm_S (w[ 4], w[ 5], selector); w[23] = __byte_perm_S (w[ 3], w[ 4], selector); w[22] = __byte_perm_S (w[ 2], w[ 3], selector); w[21] = __byte_perm_S (w[ 1], w[ 2], selector); w[20] = __byte_perm_S (w[ 0], w[ 1], selector); w[19] = __byte_perm_S ( 0, w[ 0], selector); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = __byte_perm_S (w[42], w[43], selector); w[62] = __byte_perm_S (w[41], w[42], selector); w[61] = __byte_perm_S (w[40], w[41], selector); w[60] = __byte_perm_S (w[39], w[40], selector); w[59] = __byte_perm_S (w[38], w[39], selector); w[58] = __byte_perm_S (w[37], w[38], selector); w[57] = __byte_perm_S (w[36], w[37], selector); w[56] = __byte_perm_S (w[35], w[36], selector); w[55] = __byte_perm_S (w[34], w[35], selector); w[54] = __byte_perm_S (w[33], w[34], selector); w[53] = __byte_perm_S (w[32], w[33], selector); w[52] = __byte_perm_S (w[31], w[32], selector); w[51] = __byte_perm_S (w[30], w[31], selector); w[50] = __byte_perm_S (w[29], w[30], selector); w[49] = __byte_perm_S (w[28], w[29], selector); w[48] = __byte_perm_S (w[27], w[28], selector); w[47] = __byte_perm_S (w[26], w[27], selector); w[46] = __byte_perm_S (w[25], w[26], selector); w[45] = __byte_perm_S (w[24], w[25], selector); w[44] = __byte_perm_S (w[23], w[24], selector); w[43] = __byte_perm_S (w[22], w[23], selector); w[42] = __byte_perm_S (w[21], w[22], selector); w[41] = __byte_perm_S (w[20], w[21], selector); w[40] = __byte_perm_S (w[19], w[20], selector); w[39] = __byte_perm_S (w[18], w[19], selector); w[38] = __byte_perm_S (w[17], w[18], selector); w[37] = __byte_perm_S (w[16], w[17], selector); w[36] = __byte_perm_S (w[15], w[16], selector); w[35] = __byte_perm_S (w[14], w[15], selector); w[34] = __byte_perm_S (w[13], w[14], selector); w[33] = __byte_perm_S (w[12], w[13], selector); w[32] = __byte_perm_S (w[11], w[12], selector); w[31] = __byte_perm_S (w[10], w[11], selector); w[30] = __byte_perm_S (w[ 9], w[10], selector); w[29] = __byte_perm_S (w[ 8], w[ 9], selector); w[28] = __byte_perm_S (w[ 7], w[ 8], selector); w[27] = __byte_perm_S (w[ 6], w[ 7], selector); w[26] = __byte_perm_S (w[ 5], w[ 6], selector); w[25] = __byte_perm_S (w[ 4], w[ 5], selector); w[24] = __byte_perm_S (w[ 3], w[ 4], selector); w[23] = __byte_perm_S (w[ 2], w[ 3], selector); w[22] = __byte_perm_S (w[ 1], w[ 2], selector); w[21] = __byte_perm_S (w[ 0], w[ 1], selector); w[20] = __byte_perm_S ( 0, w[ 0], selector); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = __byte_perm_S (w[41], w[42], selector); w[62] = __byte_perm_S (w[40], w[41], selector); w[61] = __byte_perm_S (w[39], w[40], selector); w[60] = __byte_perm_S (w[38], w[39], selector); w[59] = __byte_perm_S (w[37], w[38], selector); w[58] = __byte_perm_S (w[36], w[37], selector); w[57] = __byte_perm_S (w[35], w[36], selector); w[56] = __byte_perm_S (w[34], w[35], selector); w[55] = __byte_perm_S (w[33], w[34], selector); w[54] = __byte_perm_S (w[32], w[33], selector); w[53] = __byte_perm_S (w[31], w[32], selector); w[52] = __byte_perm_S (w[30], w[31], selector); w[51] = __byte_perm_S (w[29], w[30], selector); w[50] = __byte_perm_S (w[28], w[29], selector); w[49] = __byte_perm_S (w[27], w[28], selector); w[48] = __byte_perm_S (w[26], w[27], selector); w[47] = __byte_perm_S (w[25], w[26], selector); w[46] = __byte_perm_S (w[24], w[25], selector); w[45] = __byte_perm_S (w[23], w[24], selector); w[44] = __byte_perm_S (w[22], w[23], selector); w[43] = __byte_perm_S (w[21], w[22], selector); w[42] = __byte_perm_S (w[20], w[21], selector); w[41] = __byte_perm_S (w[19], w[20], selector); w[40] = __byte_perm_S (w[18], w[19], selector); w[39] = __byte_perm_S (w[17], w[18], selector); w[38] = __byte_perm_S (w[16], w[17], selector); w[37] = __byte_perm_S (w[15], w[16], selector); w[36] = __byte_perm_S (w[14], w[15], selector); w[35] = __byte_perm_S (w[13], w[14], selector); w[34] = __byte_perm_S (w[12], w[13], selector); w[33] = __byte_perm_S (w[11], w[12], selector); w[32] = __byte_perm_S (w[10], w[11], selector); w[31] = __byte_perm_S (w[ 9], w[10], selector); w[30] = __byte_perm_S (w[ 8], w[ 9], selector); w[29] = __byte_perm_S (w[ 7], w[ 8], selector); w[28] = __byte_perm_S (w[ 6], w[ 7], selector); w[27] = __byte_perm_S (w[ 5], w[ 6], selector); w[26] = __byte_perm_S (w[ 4], w[ 5], selector); w[25] = __byte_perm_S (w[ 3], w[ 4], selector); w[24] = __byte_perm_S (w[ 2], w[ 3], selector); w[23] = __byte_perm_S (w[ 1], w[ 2], selector); w[22] = __byte_perm_S (w[ 0], w[ 1], selector); w[21] = __byte_perm_S ( 0, w[ 0], selector); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = __byte_perm_S (w[40], w[41], selector); w[62] = __byte_perm_S (w[39], w[40], selector); w[61] = __byte_perm_S (w[38], w[39], selector); w[60] = __byte_perm_S (w[37], w[38], selector); w[59] = __byte_perm_S (w[36], w[37], selector); w[58] = __byte_perm_S (w[35], w[36], selector); w[57] = __byte_perm_S (w[34], w[35], selector); w[56] = __byte_perm_S (w[33], w[34], selector); w[55] = __byte_perm_S (w[32], w[33], selector); w[54] = __byte_perm_S (w[31], w[32], selector); w[53] = __byte_perm_S (w[30], w[31], selector); w[52] = __byte_perm_S (w[29], w[30], selector); w[51] = __byte_perm_S (w[28], w[29], selector); w[50] = __byte_perm_S (w[27], w[28], selector); w[49] = __byte_perm_S (w[26], w[27], selector); w[48] = __byte_perm_S (w[25], w[26], selector); w[47] = __byte_perm_S (w[24], w[25], selector); w[46] = __byte_perm_S (w[23], w[24], selector); w[45] = __byte_perm_S (w[22], w[23], selector); w[44] = __byte_perm_S (w[21], w[22], selector); w[43] = __byte_perm_S (w[20], w[21], selector); w[42] = __byte_perm_S (w[19], w[20], selector); w[41] = __byte_perm_S (w[18], w[19], selector); w[40] = __byte_perm_S (w[17], w[18], selector); w[39] = __byte_perm_S (w[16], w[17], selector); w[38] = __byte_perm_S (w[15], w[16], selector); w[37] = __byte_perm_S (w[14], w[15], selector); w[36] = __byte_perm_S (w[13], w[14], selector); w[35] = __byte_perm_S (w[12], w[13], selector); w[34] = __byte_perm_S (w[11], w[12], selector); w[33] = __byte_perm_S (w[10], w[11], selector); w[32] = __byte_perm_S (w[ 9], w[10], selector); w[31] = __byte_perm_S (w[ 8], w[ 9], selector); w[30] = __byte_perm_S (w[ 7], w[ 8], selector); w[29] = __byte_perm_S (w[ 6], w[ 7], selector); w[28] = __byte_perm_S (w[ 5], w[ 6], selector); w[27] = __byte_perm_S (w[ 4], w[ 5], selector); w[26] = __byte_perm_S (w[ 3], w[ 4], selector); w[25] = __byte_perm_S (w[ 2], w[ 3], selector); w[24] = __byte_perm_S (w[ 1], w[ 2], selector); w[23] = __byte_perm_S (w[ 0], w[ 1], selector); w[22] = __byte_perm_S ( 0, w[ 0], selector); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = __byte_perm_S (w[39], w[40], selector); w[62] = __byte_perm_S (w[38], w[39], selector); w[61] = __byte_perm_S (w[37], w[38], selector); w[60] = __byte_perm_S (w[36], w[37], selector); w[59] = __byte_perm_S (w[35], w[36], selector); w[58] = __byte_perm_S (w[34], w[35], selector); w[57] = __byte_perm_S (w[33], w[34], selector); w[56] = __byte_perm_S (w[32], w[33], selector); w[55] = __byte_perm_S (w[31], w[32], selector); w[54] = __byte_perm_S (w[30], w[31], selector); w[53] = __byte_perm_S (w[29], w[30], selector); w[52] = __byte_perm_S (w[28], w[29], selector); w[51] = __byte_perm_S (w[27], w[28], selector); w[50] = __byte_perm_S (w[26], w[27], selector); w[49] = __byte_perm_S (w[25], w[26], selector); w[48] = __byte_perm_S (w[24], w[25], selector); w[47] = __byte_perm_S (w[23], w[24], selector); w[46] = __byte_perm_S (w[22], w[23], selector); w[45] = __byte_perm_S (w[21], w[22], selector); w[44] = __byte_perm_S (w[20], w[21], selector); w[43] = __byte_perm_S (w[19], w[20], selector); w[42] = __byte_perm_S (w[18], w[19], selector); w[41] = __byte_perm_S (w[17], w[18], selector); w[40] = __byte_perm_S (w[16], w[17], selector); w[39] = __byte_perm_S (w[15], w[16], selector); w[38] = __byte_perm_S (w[14], w[15], selector); w[37] = __byte_perm_S (w[13], w[14], selector); w[36] = __byte_perm_S (w[12], w[13], selector); w[35] = __byte_perm_S (w[11], w[12], selector); w[34] = __byte_perm_S (w[10], w[11], selector); w[33] = __byte_perm_S (w[ 9], w[10], selector); w[32] = __byte_perm_S (w[ 8], w[ 9], selector); w[31] = __byte_perm_S (w[ 7], w[ 8], selector); w[30] = __byte_perm_S (w[ 6], w[ 7], selector); w[29] = __byte_perm_S (w[ 5], w[ 6], selector); w[28] = __byte_perm_S (w[ 4], w[ 5], selector); w[27] = __byte_perm_S (w[ 3], w[ 4], selector); w[26] = __byte_perm_S (w[ 2], w[ 3], selector); w[25] = __byte_perm_S (w[ 1], w[ 2], selector); w[24] = __byte_perm_S (w[ 0], w[ 1], selector); w[23] = __byte_perm_S ( 0, w[ 0], selector); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = __byte_perm_S (w[38], w[39], selector); w[62] = __byte_perm_S (w[37], w[38], selector); w[61] = __byte_perm_S (w[36], w[37], selector); w[60] = __byte_perm_S (w[35], w[36], selector); w[59] = __byte_perm_S (w[34], w[35], selector); w[58] = __byte_perm_S (w[33], w[34], selector); w[57] = __byte_perm_S (w[32], w[33], selector); w[56] = __byte_perm_S (w[31], w[32], selector); w[55] = __byte_perm_S (w[30], w[31], selector); w[54] = __byte_perm_S (w[29], w[30], selector); w[53] = __byte_perm_S (w[28], w[29], selector); w[52] = __byte_perm_S (w[27], w[28], selector); w[51] = __byte_perm_S (w[26], w[27], selector); w[50] = __byte_perm_S (w[25], w[26], selector); w[49] = __byte_perm_S (w[24], w[25], selector); w[48] = __byte_perm_S (w[23], w[24], selector); w[47] = __byte_perm_S (w[22], w[23], selector); w[46] = __byte_perm_S (w[21], w[22], selector); w[45] = __byte_perm_S (w[20], w[21], selector); w[44] = __byte_perm_S (w[19], w[20], selector); w[43] = __byte_perm_S (w[18], w[19], selector); w[42] = __byte_perm_S (w[17], w[18], selector); w[41] = __byte_perm_S (w[16], w[17], selector); w[40] = __byte_perm_S (w[15], w[16], selector); w[39] = __byte_perm_S (w[14], w[15], selector); w[38] = __byte_perm_S (w[13], w[14], selector); w[37] = __byte_perm_S (w[12], w[13], selector); w[36] = __byte_perm_S (w[11], w[12], selector); w[35] = __byte_perm_S (w[10], w[11], selector); w[34] = __byte_perm_S (w[ 9], w[10], selector); w[33] = __byte_perm_S (w[ 8], w[ 9], selector); w[32] = __byte_perm_S (w[ 7], w[ 8], selector); w[31] = __byte_perm_S (w[ 6], w[ 7], selector); w[30] = __byte_perm_S (w[ 5], w[ 6], selector); w[29] = __byte_perm_S (w[ 4], w[ 5], selector); w[28] = __byte_perm_S (w[ 3], w[ 4], selector); w[27] = __byte_perm_S (w[ 2], w[ 3], selector); w[26] = __byte_perm_S (w[ 1], w[ 2], selector); w[25] = __byte_perm_S (w[ 0], w[ 1], selector); w[24] = __byte_perm_S ( 0, w[ 0], selector); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = __byte_perm_S (w[37], w[38], selector); w[62] = __byte_perm_S (w[36], w[37], selector); w[61] = __byte_perm_S (w[35], w[36], selector); w[60] = __byte_perm_S (w[34], w[35], selector); w[59] = __byte_perm_S (w[33], w[34], selector); w[58] = __byte_perm_S (w[32], w[33], selector); w[57] = __byte_perm_S (w[31], w[32], selector); w[56] = __byte_perm_S (w[30], w[31], selector); w[55] = __byte_perm_S (w[29], w[30], selector); w[54] = __byte_perm_S (w[28], w[29], selector); w[53] = __byte_perm_S (w[27], w[28], selector); w[52] = __byte_perm_S (w[26], w[27], selector); w[51] = __byte_perm_S (w[25], w[26], selector); w[50] = __byte_perm_S (w[24], w[25], selector); w[49] = __byte_perm_S (w[23], w[24], selector); w[48] = __byte_perm_S (w[22], w[23], selector); w[47] = __byte_perm_S (w[21], w[22], selector); w[46] = __byte_perm_S (w[20], w[21], selector); w[45] = __byte_perm_S (w[19], w[20], selector); w[44] = __byte_perm_S (w[18], w[19], selector); w[43] = __byte_perm_S (w[17], w[18], selector); w[42] = __byte_perm_S (w[16], w[17], selector); w[41] = __byte_perm_S (w[15], w[16], selector); w[40] = __byte_perm_S (w[14], w[15], selector); w[39] = __byte_perm_S (w[13], w[14], selector); w[38] = __byte_perm_S (w[12], w[13], selector); w[37] = __byte_perm_S (w[11], w[12], selector); w[36] = __byte_perm_S (w[10], w[11], selector); w[35] = __byte_perm_S (w[ 9], w[10], selector); w[34] = __byte_perm_S (w[ 8], w[ 9], selector); w[33] = __byte_perm_S (w[ 7], w[ 8], selector); w[32] = __byte_perm_S (w[ 6], w[ 7], selector); w[31] = __byte_perm_S (w[ 5], w[ 6], selector); w[30] = __byte_perm_S (w[ 4], w[ 5], selector); w[29] = __byte_perm_S (w[ 3], w[ 4], selector); w[28] = __byte_perm_S (w[ 2], w[ 3], selector); w[27] = __byte_perm_S (w[ 1], w[ 2], selector); w[26] = __byte_perm_S (w[ 0], w[ 1], selector); w[25] = __byte_perm_S ( 0, w[ 0], selector); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = __byte_perm_S (w[36], w[37], selector); w[62] = __byte_perm_S (w[35], w[36], selector); w[61] = __byte_perm_S (w[34], w[35], selector); w[60] = __byte_perm_S (w[33], w[34], selector); w[59] = __byte_perm_S (w[32], w[33], selector); w[58] = __byte_perm_S (w[31], w[32], selector); w[57] = __byte_perm_S (w[30], w[31], selector); w[56] = __byte_perm_S (w[29], w[30], selector); w[55] = __byte_perm_S (w[28], w[29], selector); w[54] = __byte_perm_S (w[27], w[28], selector); w[53] = __byte_perm_S (w[26], w[27], selector); w[52] = __byte_perm_S (w[25], w[26], selector); w[51] = __byte_perm_S (w[24], w[25], selector); w[50] = __byte_perm_S (w[23], w[24], selector); w[49] = __byte_perm_S (w[22], w[23], selector); w[48] = __byte_perm_S (w[21], w[22], selector); w[47] = __byte_perm_S (w[20], w[21], selector); w[46] = __byte_perm_S (w[19], w[20], selector); w[45] = __byte_perm_S (w[18], w[19], selector); w[44] = __byte_perm_S (w[17], w[18], selector); w[43] = __byte_perm_S (w[16], w[17], selector); w[42] = __byte_perm_S (w[15], w[16], selector); w[41] = __byte_perm_S (w[14], w[15], selector); w[40] = __byte_perm_S (w[13], w[14], selector); w[39] = __byte_perm_S (w[12], w[13], selector); w[38] = __byte_perm_S (w[11], w[12], selector); w[37] = __byte_perm_S (w[10], w[11], selector); w[36] = __byte_perm_S (w[ 9], w[10], selector); w[35] = __byte_perm_S (w[ 8], w[ 9], selector); w[34] = __byte_perm_S (w[ 7], w[ 8], selector); w[33] = __byte_perm_S (w[ 6], w[ 7], selector); w[32] = __byte_perm_S (w[ 5], w[ 6], selector); w[31] = __byte_perm_S (w[ 4], w[ 5], selector); w[30] = __byte_perm_S (w[ 3], w[ 4], selector); w[29] = __byte_perm_S (w[ 2], w[ 3], selector); w[28] = __byte_perm_S (w[ 1], w[ 2], selector); w[27] = __byte_perm_S (w[ 0], w[ 1], selector); w[26] = __byte_perm_S ( 0, w[ 0], selector); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = __byte_perm_S (w[35], w[36], selector); w[62] = __byte_perm_S (w[34], w[35], selector); w[61] = __byte_perm_S (w[33], w[34], selector); w[60] = __byte_perm_S (w[32], w[33], selector); w[59] = __byte_perm_S (w[31], w[32], selector); w[58] = __byte_perm_S (w[30], w[31], selector); w[57] = __byte_perm_S (w[29], w[30], selector); w[56] = __byte_perm_S (w[28], w[29], selector); w[55] = __byte_perm_S (w[27], w[28], selector); w[54] = __byte_perm_S (w[26], w[27], selector); w[53] = __byte_perm_S (w[25], w[26], selector); w[52] = __byte_perm_S (w[24], w[25], selector); w[51] = __byte_perm_S (w[23], w[24], selector); w[50] = __byte_perm_S (w[22], w[23], selector); w[49] = __byte_perm_S (w[21], w[22], selector); w[48] = __byte_perm_S (w[20], w[21], selector); w[47] = __byte_perm_S (w[19], w[20], selector); w[46] = __byte_perm_S (w[18], w[19], selector); w[45] = __byte_perm_S (w[17], w[18], selector); w[44] = __byte_perm_S (w[16], w[17], selector); w[43] = __byte_perm_S (w[15], w[16], selector); w[42] = __byte_perm_S (w[14], w[15], selector); w[41] = __byte_perm_S (w[13], w[14], selector); w[40] = __byte_perm_S (w[12], w[13], selector); w[39] = __byte_perm_S (w[11], w[12], selector); w[38] = __byte_perm_S (w[10], w[11], selector); w[37] = __byte_perm_S (w[ 9], w[10], selector); w[36] = __byte_perm_S (w[ 8], w[ 9], selector); w[35] = __byte_perm_S (w[ 7], w[ 8], selector); w[34] = __byte_perm_S (w[ 6], w[ 7], selector); w[33] = __byte_perm_S (w[ 5], w[ 6], selector); w[32] = __byte_perm_S (w[ 4], w[ 5], selector); w[31] = __byte_perm_S (w[ 3], w[ 4], selector); w[30] = __byte_perm_S (w[ 2], w[ 3], selector); w[29] = __byte_perm_S (w[ 1], w[ 2], selector); w[28] = __byte_perm_S (w[ 0], w[ 1], selector); w[27] = __byte_perm_S ( 0, w[ 0], selector); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = __byte_perm_S (w[34], w[35], selector); w[62] = __byte_perm_S (w[33], w[34], selector); w[61] = __byte_perm_S (w[32], w[33], selector); w[60] = __byte_perm_S (w[31], w[32], selector); w[59] = __byte_perm_S (w[30], w[31], selector); w[58] = __byte_perm_S (w[29], w[30], selector); w[57] = __byte_perm_S (w[28], w[29], selector); w[56] = __byte_perm_S (w[27], w[28], selector); w[55] = __byte_perm_S (w[26], w[27], selector); w[54] = __byte_perm_S (w[25], w[26], selector); w[53] = __byte_perm_S (w[24], w[25], selector); w[52] = __byte_perm_S (w[23], w[24], selector); w[51] = __byte_perm_S (w[22], w[23], selector); w[50] = __byte_perm_S (w[21], w[22], selector); w[49] = __byte_perm_S (w[20], w[21], selector); w[48] = __byte_perm_S (w[19], w[20], selector); w[47] = __byte_perm_S (w[18], w[19], selector); w[46] = __byte_perm_S (w[17], w[18], selector); w[45] = __byte_perm_S (w[16], w[17], selector); w[44] = __byte_perm_S (w[15], w[16], selector); w[43] = __byte_perm_S (w[14], w[15], selector); w[42] = __byte_perm_S (w[13], w[14], selector); w[41] = __byte_perm_S (w[12], w[13], selector); w[40] = __byte_perm_S (w[11], w[12], selector); w[39] = __byte_perm_S (w[10], w[11], selector); w[38] = __byte_perm_S (w[ 9], w[10], selector); w[37] = __byte_perm_S (w[ 8], w[ 9], selector); w[36] = __byte_perm_S (w[ 7], w[ 8], selector); w[35] = __byte_perm_S (w[ 6], w[ 7], selector); w[34] = __byte_perm_S (w[ 5], w[ 6], selector); w[33] = __byte_perm_S (w[ 4], w[ 5], selector); w[32] = __byte_perm_S (w[ 3], w[ 4], selector); w[31] = __byte_perm_S (w[ 2], w[ 3], selector); w[30] = __byte_perm_S (w[ 1], w[ 2], selector); w[29] = __byte_perm_S (w[ 0], w[ 1], selector); w[28] = __byte_perm_S ( 0, w[ 0], selector); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = __byte_perm_S (w[33], w[34], selector); w[62] = __byte_perm_S (w[32], w[33], selector); w[61] = __byte_perm_S (w[31], w[32], selector); w[60] = __byte_perm_S (w[30], w[31], selector); w[59] = __byte_perm_S (w[29], w[30], selector); w[58] = __byte_perm_S (w[28], w[29], selector); w[57] = __byte_perm_S (w[27], w[28], selector); w[56] = __byte_perm_S (w[26], w[27], selector); w[55] = __byte_perm_S (w[25], w[26], selector); w[54] = __byte_perm_S (w[24], w[25], selector); w[53] = __byte_perm_S (w[23], w[24], selector); w[52] = __byte_perm_S (w[22], w[23], selector); w[51] = __byte_perm_S (w[21], w[22], selector); w[50] = __byte_perm_S (w[20], w[21], selector); w[49] = __byte_perm_S (w[19], w[20], selector); w[48] = __byte_perm_S (w[18], w[19], selector); w[47] = __byte_perm_S (w[17], w[18], selector); w[46] = __byte_perm_S (w[16], w[17], selector); w[45] = __byte_perm_S (w[15], w[16], selector); w[44] = __byte_perm_S (w[14], w[15], selector); w[43] = __byte_perm_S (w[13], w[14], selector); w[42] = __byte_perm_S (w[12], w[13], selector); w[41] = __byte_perm_S (w[11], w[12], selector); w[40] = __byte_perm_S (w[10], w[11], selector); w[39] = __byte_perm_S (w[ 9], w[10], selector); w[38] = __byte_perm_S (w[ 8], w[ 9], selector); w[37] = __byte_perm_S (w[ 7], w[ 8], selector); w[36] = __byte_perm_S (w[ 6], w[ 7], selector); w[35] = __byte_perm_S (w[ 5], w[ 6], selector); w[34] = __byte_perm_S (w[ 4], w[ 5], selector); w[33] = __byte_perm_S (w[ 3], w[ 4], selector); w[32] = __byte_perm_S (w[ 2], w[ 3], selector); w[31] = __byte_perm_S (w[ 1], w[ 2], selector); w[30] = __byte_perm_S (w[ 0], w[ 1], selector); w[29] = __byte_perm_S ( 0, w[ 0], selector); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = __byte_perm_S (w[32], w[33], selector); w[62] = __byte_perm_S (w[31], w[32], selector); w[61] = __byte_perm_S (w[30], w[31], selector); w[60] = __byte_perm_S (w[29], w[30], selector); w[59] = __byte_perm_S (w[28], w[29], selector); w[58] = __byte_perm_S (w[27], w[28], selector); w[57] = __byte_perm_S (w[26], w[27], selector); w[56] = __byte_perm_S (w[25], w[26], selector); w[55] = __byte_perm_S (w[24], w[25], selector); w[54] = __byte_perm_S (w[23], w[24], selector); w[53] = __byte_perm_S (w[22], w[23], selector); w[52] = __byte_perm_S (w[21], w[22], selector); w[51] = __byte_perm_S (w[20], w[21], selector); w[50] = __byte_perm_S (w[19], w[20], selector); w[49] = __byte_perm_S (w[18], w[19], selector); w[48] = __byte_perm_S (w[17], w[18], selector); w[47] = __byte_perm_S (w[16], w[17], selector); w[46] = __byte_perm_S (w[15], w[16], selector); w[45] = __byte_perm_S (w[14], w[15], selector); w[44] = __byte_perm_S (w[13], w[14], selector); w[43] = __byte_perm_S (w[12], w[13], selector); w[42] = __byte_perm_S (w[11], w[12], selector); w[41] = __byte_perm_S (w[10], w[11], selector); w[40] = __byte_perm_S (w[ 9], w[10], selector); w[39] = __byte_perm_S (w[ 8], w[ 9], selector); w[38] = __byte_perm_S (w[ 7], w[ 8], selector); w[37] = __byte_perm_S (w[ 6], w[ 7], selector); w[36] = __byte_perm_S (w[ 5], w[ 6], selector); w[35] = __byte_perm_S (w[ 4], w[ 5], selector); w[34] = __byte_perm_S (w[ 3], w[ 4], selector); w[33] = __byte_perm_S (w[ 2], w[ 3], selector); w[32] = __byte_perm_S (w[ 1], w[ 2], selector); w[31] = __byte_perm_S (w[ 0], w[ 1], selector); w[30] = __byte_perm_S ( 0, w[ 0], selector); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = __byte_perm_S (w[31], w[32], selector); w[62] = __byte_perm_S (w[30], w[31], selector); w[61] = __byte_perm_S (w[29], w[30], selector); w[60] = __byte_perm_S (w[28], w[29], selector); w[59] = __byte_perm_S (w[27], w[28], selector); w[58] = __byte_perm_S (w[26], w[27], selector); w[57] = __byte_perm_S (w[25], w[26], selector); w[56] = __byte_perm_S (w[24], w[25], selector); w[55] = __byte_perm_S (w[23], w[24], selector); w[54] = __byte_perm_S (w[22], w[23], selector); w[53] = __byte_perm_S (w[21], w[22], selector); w[52] = __byte_perm_S (w[20], w[21], selector); w[51] = __byte_perm_S (w[19], w[20], selector); w[50] = __byte_perm_S (w[18], w[19], selector); w[49] = __byte_perm_S (w[17], w[18], selector); w[48] = __byte_perm_S (w[16], w[17], selector); w[47] = __byte_perm_S (w[15], w[16], selector); w[46] = __byte_perm_S (w[14], w[15], selector); w[45] = __byte_perm_S (w[13], w[14], selector); w[44] = __byte_perm_S (w[12], w[13], selector); w[43] = __byte_perm_S (w[11], w[12], selector); w[42] = __byte_perm_S (w[10], w[11], selector); w[41] = __byte_perm_S (w[ 9], w[10], selector); w[40] = __byte_perm_S (w[ 8], w[ 9], selector); w[39] = __byte_perm_S (w[ 7], w[ 8], selector); w[38] = __byte_perm_S (w[ 6], w[ 7], selector); w[37] = __byte_perm_S (w[ 5], w[ 6], selector); w[36] = __byte_perm_S (w[ 4], w[ 5], selector); w[35] = __byte_perm_S (w[ 3], w[ 4], selector); w[34] = __byte_perm_S (w[ 2], w[ 3], selector); w[33] = __byte_perm_S (w[ 1], w[ 2], selector); w[32] = __byte_perm_S (w[ 0], w[ 1], selector); w[31] = __byte_perm_S ( 0, w[ 0], selector); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = __byte_perm_S (w[30], w[31], selector); w[62] = __byte_perm_S (w[29], w[30], selector); w[61] = __byte_perm_S (w[28], w[29], selector); w[60] = __byte_perm_S (w[27], w[28], selector); w[59] = __byte_perm_S (w[26], w[27], selector); w[58] = __byte_perm_S (w[25], w[26], selector); w[57] = __byte_perm_S (w[24], w[25], selector); w[56] = __byte_perm_S (w[23], w[24], selector); w[55] = __byte_perm_S (w[22], w[23], selector); w[54] = __byte_perm_S (w[21], w[22], selector); w[53] = __byte_perm_S (w[20], w[21], selector); w[52] = __byte_perm_S (w[19], w[20], selector); w[51] = __byte_perm_S (w[18], w[19], selector); w[50] = __byte_perm_S (w[17], w[18], selector); w[49] = __byte_perm_S (w[16], w[17], selector); w[48] = __byte_perm_S (w[15], w[16], selector); w[47] = __byte_perm_S (w[14], w[15], selector); w[46] = __byte_perm_S (w[13], w[14], selector); w[45] = __byte_perm_S (w[12], w[13], selector); w[44] = __byte_perm_S (w[11], w[12], selector); w[43] = __byte_perm_S (w[10], w[11], selector); w[42] = __byte_perm_S (w[ 9], w[10], selector); w[41] = __byte_perm_S (w[ 8], w[ 9], selector); w[40] = __byte_perm_S (w[ 7], w[ 8], selector); w[39] = __byte_perm_S (w[ 6], w[ 7], selector); w[38] = __byte_perm_S (w[ 5], w[ 6], selector); w[37] = __byte_perm_S (w[ 4], w[ 5], selector); w[36] = __byte_perm_S (w[ 3], w[ 4], selector); w[35] = __byte_perm_S (w[ 2], w[ 3], selector); w[34] = __byte_perm_S (w[ 1], w[ 2], selector); w[33] = __byte_perm_S (w[ 0], w[ 1], selector); w[32] = __byte_perm_S ( 0, w[ 0], selector); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = __byte_perm_S (w[29], w[30], selector); w[62] = __byte_perm_S (w[28], w[29], selector); w[61] = __byte_perm_S (w[27], w[28], selector); w[60] = __byte_perm_S (w[26], w[27], selector); w[59] = __byte_perm_S (w[25], w[26], selector); w[58] = __byte_perm_S (w[24], w[25], selector); w[57] = __byte_perm_S (w[23], w[24], selector); w[56] = __byte_perm_S (w[22], w[23], selector); w[55] = __byte_perm_S (w[21], w[22], selector); w[54] = __byte_perm_S (w[20], w[21], selector); w[53] = __byte_perm_S (w[19], w[20], selector); w[52] = __byte_perm_S (w[18], w[19], selector); w[51] = __byte_perm_S (w[17], w[18], selector); w[50] = __byte_perm_S (w[16], w[17], selector); w[49] = __byte_perm_S (w[15], w[16], selector); w[48] = __byte_perm_S (w[14], w[15], selector); w[47] = __byte_perm_S (w[13], w[14], selector); w[46] = __byte_perm_S (w[12], w[13], selector); w[45] = __byte_perm_S (w[11], w[12], selector); w[44] = __byte_perm_S (w[10], w[11], selector); w[43] = __byte_perm_S (w[ 9], w[10], selector); w[42] = __byte_perm_S (w[ 8], w[ 9], selector); w[41] = __byte_perm_S (w[ 7], w[ 8], selector); w[40] = __byte_perm_S (w[ 6], w[ 7], selector); w[39] = __byte_perm_S (w[ 5], w[ 6], selector); w[38] = __byte_perm_S (w[ 4], w[ 5], selector); w[37] = __byte_perm_S (w[ 3], w[ 4], selector); w[36] = __byte_perm_S (w[ 2], w[ 3], selector); w[35] = __byte_perm_S (w[ 1], w[ 2], selector); w[34] = __byte_perm_S (w[ 0], w[ 1], selector); w[33] = __byte_perm_S ( 0, w[ 0], selector); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = __byte_perm_S (w[28], w[29], selector); w[62] = __byte_perm_S (w[27], w[28], selector); w[61] = __byte_perm_S (w[26], w[27], selector); w[60] = __byte_perm_S (w[25], w[26], selector); w[59] = __byte_perm_S (w[24], w[25], selector); w[58] = __byte_perm_S (w[23], w[24], selector); w[57] = __byte_perm_S (w[22], w[23], selector); w[56] = __byte_perm_S (w[21], w[22], selector); w[55] = __byte_perm_S (w[20], w[21], selector); w[54] = __byte_perm_S (w[19], w[20], selector); w[53] = __byte_perm_S (w[18], w[19], selector); w[52] = __byte_perm_S (w[17], w[18], selector); w[51] = __byte_perm_S (w[16], w[17], selector); w[50] = __byte_perm_S (w[15], w[16], selector); w[49] = __byte_perm_S (w[14], w[15], selector); w[48] = __byte_perm_S (w[13], w[14], selector); w[47] = __byte_perm_S (w[12], w[13], selector); w[46] = __byte_perm_S (w[11], w[12], selector); w[45] = __byte_perm_S (w[10], w[11], selector); w[44] = __byte_perm_S (w[ 9], w[10], selector); w[43] = __byte_perm_S (w[ 8], w[ 9], selector); w[42] = __byte_perm_S (w[ 7], w[ 8], selector); w[41] = __byte_perm_S (w[ 6], w[ 7], selector); w[40] = __byte_perm_S (w[ 5], w[ 6], selector); w[39] = __byte_perm_S (w[ 4], w[ 5], selector); w[38] = __byte_perm_S (w[ 3], w[ 4], selector); w[37] = __byte_perm_S (w[ 2], w[ 3], selector); w[36] = __byte_perm_S (w[ 1], w[ 2], selector); w[35] = __byte_perm_S (w[ 0], w[ 1], selector); w[34] = __byte_perm_S ( 0, w[ 0], selector); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = __byte_perm_S (w[27], w[28], selector); w[62] = __byte_perm_S (w[26], w[27], selector); w[61] = __byte_perm_S (w[25], w[26], selector); w[60] = __byte_perm_S (w[24], w[25], selector); w[59] = __byte_perm_S (w[23], w[24], selector); w[58] = __byte_perm_S (w[22], w[23], selector); w[57] = __byte_perm_S (w[21], w[22], selector); w[56] = __byte_perm_S (w[20], w[21], selector); w[55] = __byte_perm_S (w[19], w[20], selector); w[54] = __byte_perm_S (w[18], w[19], selector); w[53] = __byte_perm_S (w[17], w[18], selector); w[52] = __byte_perm_S (w[16], w[17], selector); w[51] = __byte_perm_S (w[15], w[16], selector); w[50] = __byte_perm_S (w[14], w[15], selector); w[49] = __byte_perm_S (w[13], w[14], selector); w[48] = __byte_perm_S (w[12], w[13], selector); w[47] = __byte_perm_S (w[11], w[12], selector); w[46] = __byte_perm_S (w[10], w[11], selector); w[45] = __byte_perm_S (w[ 9], w[10], selector); w[44] = __byte_perm_S (w[ 8], w[ 9], selector); w[43] = __byte_perm_S (w[ 7], w[ 8], selector); w[42] = __byte_perm_S (w[ 6], w[ 7], selector); w[41] = __byte_perm_S (w[ 5], w[ 6], selector); w[40] = __byte_perm_S (w[ 4], w[ 5], selector); w[39] = __byte_perm_S (w[ 3], w[ 4], selector); w[38] = __byte_perm_S (w[ 2], w[ 3], selector); w[37] = __byte_perm_S (w[ 1], w[ 2], selector); w[36] = __byte_perm_S (w[ 0], w[ 1], selector); w[35] = __byte_perm_S ( 0, w[ 0], selector); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = __byte_perm_S (w[26], w[27], selector); w[62] = __byte_perm_S (w[25], w[26], selector); w[61] = __byte_perm_S (w[24], w[25], selector); w[60] = __byte_perm_S (w[23], w[24], selector); w[59] = __byte_perm_S (w[22], w[23], selector); w[58] = __byte_perm_S (w[21], w[22], selector); w[57] = __byte_perm_S (w[20], w[21], selector); w[56] = __byte_perm_S (w[19], w[20], selector); w[55] = __byte_perm_S (w[18], w[19], selector); w[54] = __byte_perm_S (w[17], w[18], selector); w[53] = __byte_perm_S (w[16], w[17], selector); w[52] = __byte_perm_S (w[15], w[16], selector); w[51] = __byte_perm_S (w[14], w[15], selector); w[50] = __byte_perm_S (w[13], w[14], selector); w[49] = __byte_perm_S (w[12], w[13], selector); w[48] = __byte_perm_S (w[11], w[12], selector); w[47] = __byte_perm_S (w[10], w[11], selector); w[46] = __byte_perm_S (w[ 9], w[10], selector); w[45] = __byte_perm_S (w[ 8], w[ 9], selector); w[44] = __byte_perm_S (w[ 7], w[ 8], selector); w[43] = __byte_perm_S (w[ 6], w[ 7], selector); w[42] = __byte_perm_S (w[ 5], w[ 6], selector); w[41] = __byte_perm_S (w[ 4], w[ 5], selector); w[40] = __byte_perm_S (w[ 3], w[ 4], selector); w[39] = __byte_perm_S (w[ 2], w[ 3], selector); w[38] = __byte_perm_S (w[ 1], w[ 2], selector); w[37] = __byte_perm_S (w[ 0], w[ 1], selector); w[36] = __byte_perm_S ( 0, w[ 0], selector); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = __byte_perm_S (w[25], w[26], selector); w[62] = __byte_perm_S (w[24], w[25], selector); w[61] = __byte_perm_S (w[23], w[24], selector); w[60] = __byte_perm_S (w[22], w[23], selector); w[59] = __byte_perm_S (w[21], w[22], selector); w[58] = __byte_perm_S (w[20], w[21], selector); w[57] = __byte_perm_S (w[19], w[20], selector); w[56] = __byte_perm_S (w[18], w[19], selector); w[55] = __byte_perm_S (w[17], w[18], selector); w[54] = __byte_perm_S (w[16], w[17], selector); w[53] = __byte_perm_S (w[15], w[16], selector); w[52] = __byte_perm_S (w[14], w[15], selector); w[51] = __byte_perm_S (w[13], w[14], selector); w[50] = __byte_perm_S (w[12], w[13], selector); w[49] = __byte_perm_S (w[11], w[12], selector); w[48] = __byte_perm_S (w[10], w[11], selector); w[47] = __byte_perm_S (w[ 9], w[10], selector); w[46] = __byte_perm_S (w[ 8], w[ 9], selector); w[45] = __byte_perm_S (w[ 7], w[ 8], selector); w[44] = __byte_perm_S (w[ 6], w[ 7], selector); w[43] = __byte_perm_S (w[ 5], w[ 6], selector); w[42] = __byte_perm_S (w[ 4], w[ 5], selector); w[41] = __byte_perm_S (w[ 3], w[ 4], selector); w[40] = __byte_perm_S (w[ 2], w[ 3], selector); w[39] = __byte_perm_S (w[ 1], w[ 2], selector); w[38] = __byte_perm_S (w[ 0], w[ 1], selector); w[37] = __byte_perm_S ( 0, w[ 0], selector); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = __byte_perm_S (w[24], w[25], selector); w[62] = __byte_perm_S (w[23], w[24], selector); w[61] = __byte_perm_S (w[22], w[23], selector); w[60] = __byte_perm_S (w[21], w[22], selector); w[59] = __byte_perm_S (w[20], w[21], selector); w[58] = __byte_perm_S (w[19], w[20], selector); w[57] = __byte_perm_S (w[18], w[19], selector); w[56] = __byte_perm_S (w[17], w[18], selector); w[55] = __byte_perm_S (w[16], w[17], selector); w[54] = __byte_perm_S (w[15], w[16], selector); w[53] = __byte_perm_S (w[14], w[15], selector); w[52] = __byte_perm_S (w[13], w[14], selector); w[51] = __byte_perm_S (w[12], w[13], selector); w[50] = __byte_perm_S (w[11], w[12], selector); w[49] = __byte_perm_S (w[10], w[11], selector); w[48] = __byte_perm_S (w[ 9], w[10], selector); w[47] = __byte_perm_S (w[ 8], w[ 9], selector); w[46] = __byte_perm_S (w[ 7], w[ 8], selector); w[45] = __byte_perm_S (w[ 6], w[ 7], selector); w[44] = __byte_perm_S (w[ 5], w[ 6], selector); w[43] = __byte_perm_S (w[ 4], w[ 5], selector); w[42] = __byte_perm_S (w[ 3], w[ 4], selector); w[41] = __byte_perm_S (w[ 2], w[ 3], selector); w[40] = __byte_perm_S (w[ 1], w[ 2], selector); w[39] = __byte_perm_S (w[ 0], w[ 1], selector); w[38] = __byte_perm_S ( 0, w[ 0], selector); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = __byte_perm_S (w[23], w[24], selector); w[62] = __byte_perm_S (w[22], w[23], selector); w[61] = __byte_perm_S (w[21], w[22], selector); w[60] = __byte_perm_S (w[20], w[21], selector); w[59] = __byte_perm_S (w[19], w[20], selector); w[58] = __byte_perm_S (w[18], w[19], selector); w[57] = __byte_perm_S (w[17], w[18], selector); w[56] = __byte_perm_S (w[16], w[17], selector); w[55] = __byte_perm_S (w[15], w[16], selector); w[54] = __byte_perm_S (w[14], w[15], selector); w[53] = __byte_perm_S (w[13], w[14], selector); w[52] = __byte_perm_S (w[12], w[13], selector); w[51] = __byte_perm_S (w[11], w[12], selector); w[50] = __byte_perm_S (w[10], w[11], selector); w[49] = __byte_perm_S (w[ 9], w[10], selector); w[48] = __byte_perm_S (w[ 8], w[ 9], selector); w[47] = __byte_perm_S (w[ 7], w[ 8], selector); w[46] = __byte_perm_S (w[ 6], w[ 7], selector); w[45] = __byte_perm_S (w[ 5], w[ 6], selector); w[44] = __byte_perm_S (w[ 4], w[ 5], selector); w[43] = __byte_perm_S (w[ 3], w[ 4], selector); w[42] = __byte_perm_S (w[ 2], w[ 3], selector); w[41] = __byte_perm_S (w[ 1], w[ 2], selector); w[40] = __byte_perm_S (w[ 0], w[ 1], selector); w[39] = __byte_perm_S ( 0, w[ 0], selector); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = __byte_perm_S (w[22], w[23], selector); w[62] = __byte_perm_S (w[21], w[22], selector); w[61] = __byte_perm_S (w[20], w[21], selector); w[60] = __byte_perm_S (w[19], w[20], selector); w[59] = __byte_perm_S (w[18], w[19], selector); w[58] = __byte_perm_S (w[17], w[18], selector); w[57] = __byte_perm_S (w[16], w[17], selector); w[56] = __byte_perm_S (w[15], w[16], selector); w[55] = __byte_perm_S (w[14], w[15], selector); w[54] = __byte_perm_S (w[13], w[14], selector); w[53] = __byte_perm_S (w[12], w[13], selector); w[52] = __byte_perm_S (w[11], w[12], selector); w[51] = __byte_perm_S (w[10], w[11], selector); w[50] = __byte_perm_S (w[ 9], w[10], selector); w[49] = __byte_perm_S (w[ 8], w[ 9], selector); w[48] = __byte_perm_S (w[ 7], w[ 8], selector); w[47] = __byte_perm_S (w[ 6], w[ 7], selector); w[46] = __byte_perm_S (w[ 5], w[ 6], selector); w[45] = __byte_perm_S (w[ 4], w[ 5], selector); w[44] = __byte_perm_S (w[ 3], w[ 4], selector); w[43] = __byte_perm_S (w[ 2], w[ 3], selector); w[42] = __byte_perm_S (w[ 1], w[ 2], selector); w[41] = __byte_perm_S (w[ 0], w[ 1], selector); w[40] = __byte_perm_S ( 0, w[ 0], selector); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = __byte_perm_S (w[21], w[22], selector); w[62] = __byte_perm_S (w[20], w[21], selector); w[61] = __byte_perm_S (w[19], w[20], selector); w[60] = __byte_perm_S (w[18], w[19], selector); w[59] = __byte_perm_S (w[17], w[18], selector); w[58] = __byte_perm_S (w[16], w[17], selector); w[57] = __byte_perm_S (w[15], w[16], selector); w[56] = __byte_perm_S (w[14], w[15], selector); w[55] = __byte_perm_S (w[13], w[14], selector); w[54] = __byte_perm_S (w[12], w[13], selector); w[53] = __byte_perm_S (w[11], w[12], selector); w[52] = __byte_perm_S (w[10], w[11], selector); w[51] = __byte_perm_S (w[ 9], w[10], selector); w[50] = __byte_perm_S (w[ 8], w[ 9], selector); w[49] = __byte_perm_S (w[ 7], w[ 8], selector); w[48] = __byte_perm_S (w[ 6], w[ 7], selector); w[47] = __byte_perm_S (w[ 5], w[ 6], selector); w[46] = __byte_perm_S (w[ 4], w[ 5], selector); w[45] = __byte_perm_S (w[ 3], w[ 4], selector); w[44] = __byte_perm_S (w[ 2], w[ 3], selector); w[43] = __byte_perm_S (w[ 1], w[ 2], selector); w[42] = __byte_perm_S (w[ 0], w[ 1], selector); w[41] = __byte_perm_S ( 0, w[ 0], selector); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = __byte_perm_S (w[20], w[21], selector); w[62] = __byte_perm_S (w[19], w[20], selector); w[61] = __byte_perm_S (w[18], w[19], selector); w[60] = __byte_perm_S (w[17], w[18], selector); w[59] = __byte_perm_S (w[16], w[17], selector); w[58] = __byte_perm_S (w[15], w[16], selector); w[57] = __byte_perm_S (w[14], w[15], selector); w[56] = __byte_perm_S (w[13], w[14], selector); w[55] = __byte_perm_S (w[12], w[13], selector); w[54] = __byte_perm_S (w[11], w[12], selector); w[53] = __byte_perm_S (w[10], w[11], selector); w[52] = __byte_perm_S (w[ 9], w[10], selector); w[51] = __byte_perm_S (w[ 8], w[ 9], selector); w[50] = __byte_perm_S (w[ 7], w[ 8], selector); w[49] = __byte_perm_S (w[ 6], w[ 7], selector); w[48] = __byte_perm_S (w[ 5], w[ 6], selector); w[47] = __byte_perm_S (w[ 4], w[ 5], selector); w[46] = __byte_perm_S (w[ 3], w[ 4], selector); w[45] = __byte_perm_S (w[ 2], w[ 3], selector); w[44] = __byte_perm_S (w[ 1], w[ 2], selector); w[43] = __byte_perm_S (w[ 0], w[ 1], selector); w[42] = __byte_perm_S ( 0, w[ 0], selector); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = __byte_perm_S (w[19], w[20], selector); w[62] = __byte_perm_S (w[18], w[19], selector); w[61] = __byte_perm_S (w[17], w[18], selector); w[60] = __byte_perm_S (w[16], w[17], selector); w[59] = __byte_perm_S (w[15], w[16], selector); w[58] = __byte_perm_S (w[14], w[15], selector); w[57] = __byte_perm_S (w[13], w[14], selector); w[56] = __byte_perm_S (w[12], w[13], selector); w[55] = __byte_perm_S (w[11], w[12], selector); w[54] = __byte_perm_S (w[10], w[11], selector); w[53] = __byte_perm_S (w[ 9], w[10], selector); w[52] = __byte_perm_S (w[ 8], w[ 9], selector); w[51] = __byte_perm_S (w[ 7], w[ 8], selector); w[50] = __byte_perm_S (w[ 6], w[ 7], selector); w[49] = __byte_perm_S (w[ 5], w[ 6], selector); w[48] = __byte_perm_S (w[ 4], w[ 5], selector); w[47] = __byte_perm_S (w[ 3], w[ 4], selector); w[46] = __byte_perm_S (w[ 2], w[ 3], selector); w[45] = __byte_perm_S (w[ 1], w[ 2], selector); w[44] = __byte_perm_S (w[ 0], w[ 1], selector); w[43] = __byte_perm_S ( 0, w[ 0], selector); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = __byte_perm_S (w[18], w[19], selector); w[62] = __byte_perm_S (w[17], w[18], selector); w[61] = __byte_perm_S (w[16], w[17], selector); w[60] = __byte_perm_S (w[15], w[16], selector); w[59] = __byte_perm_S (w[14], w[15], selector); w[58] = __byte_perm_S (w[13], w[14], selector); w[57] = __byte_perm_S (w[12], w[13], selector); w[56] = __byte_perm_S (w[11], w[12], selector); w[55] = __byte_perm_S (w[10], w[11], selector); w[54] = __byte_perm_S (w[ 9], w[10], selector); w[53] = __byte_perm_S (w[ 8], w[ 9], selector); w[52] = __byte_perm_S (w[ 7], w[ 8], selector); w[51] = __byte_perm_S (w[ 6], w[ 7], selector); w[50] = __byte_perm_S (w[ 5], w[ 6], selector); w[49] = __byte_perm_S (w[ 4], w[ 5], selector); w[48] = __byte_perm_S (w[ 3], w[ 4], selector); w[47] = __byte_perm_S (w[ 2], w[ 3], selector); w[46] = __byte_perm_S (w[ 1], w[ 2], selector); w[45] = __byte_perm_S (w[ 0], w[ 1], selector); w[44] = __byte_perm_S ( 0, w[ 0], selector); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = __byte_perm_S (w[17], w[18], selector); w[62] = __byte_perm_S (w[16], w[17], selector); w[61] = __byte_perm_S (w[15], w[16], selector); w[60] = __byte_perm_S (w[14], w[15], selector); w[59] = __byte_perm_S (w[13], w[14], selector); w[58] = __byte_perm_S (w[12], w[13], selector); w[57] = __byte_perm_S (w[11], w[12], selector); w[56] = __byte_perm_S (w[10], w[11], selector); w[55] = __byte_perm_S (w[ 9], w[10], selector); w[54] = __byte_perm_S (w[ 8], w[ 9], selector); w[53] = __byte_perm_S (w[ 7], w[ 8], selector); w[52] = __byte_perm_S (w[ 6], w[ 7], selector); w[51] = __byte_perm_S (w[ 5], w[ 6], selector); w[50] = __byte_perm_S (w[ 4], w[ 5], selector); w[49] = __byte_perm_S (w[ 3], w[ 4], selector); w[48] = __byte_perm_S (w[ 2], w[ 3], selector); w[47] = __byte_perm_S (w[ 1], w[ 2], selector); w[46] = __byte_perm_S (w[ 0], w[ 1], selector); w[45] = __byte_perm_S ( 0, w[ 0], selector); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = __byte_perm_S (w[16], w[17], selector); w[62] = __byte_perm_S (w[15], w[16], selector); w[61] = __byte_perm_S (w[14], w[15], selector); w[60] = __byte_perm_S (w[13], w[14], selector); w[59] = __byte_perm_S (w[12], w[13], selector); w[58] = __byte_perm_S (w[11], w[12], selector); w[57] = __byte_perm_S (w[10], w[11], selector); w[56] = __byte_perm_S (w[ 9], w[10], selector); w[55] = __byte_perm_S (w[ 8], w[ 9], selector); w[54] = __byte_perm_S (w[ 7], w[ 8], selector); w[53] = __byte_perm_S (w[ 6], w[ 7], selector); w[52] = __byte_perm_S (w[ 5], w[ 6], selector); w[51] = __byte_perm_S (w[ 4], w[ 5], selector); w[50] = __byte_perm_S (w[ 3], w[ 4], selector); w[49] = __byte_perm_S (w[ 2], w[ 3], selector); w[48] = __byte_perm_S (w[ 1], w[ 2], selector); w[47] = __byte_perm_S (w[ 0], w[ 1], selector); w[46] = __byte_perm_S ( 0, w[ 0], selector); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = __byte_perm_S (w[15], w[16], selector); w[62] = __byte_perm_S (w[14], w[15], selector); w[61] = __byte_perm_S (w[13], w[14], selector); w[60] = __byte_perm_S (w[12], w[13], selector); w[59] = __byte_perm_S (w[11], w[12], selector); w[58] = __byte_perm_S (w[10], w[11], selector); w[57] = __byte_perm_S (w[ 9], w[10], selector); w[56] = __byte_perm_S (w[ 8], w[ 9], selector); w[55] = __byte_perm_S (w[ 7], w[ 8], selector); w[54] = __byte_perm_S (w[ 6], w[ 7], selector); w[53] = __byte_perm_S (w[ 5], w[ 6], selector); w[52] = __byte_perm_S (w[ 4], w[ 5], selector); w[51] = __byte_perm_S (w[ 3], w[ 4], selector); w[50] = __byte_perm_S (w[ 2], w[ 3], selector); w[49] = __byte_perm_S (w[ 1], w[ 2], selector); w[48] = __byte_perm_S (w[ 0], w[ 1], selector); w[47] = __byte_perm_S ( 0, w[ 0], selector); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = __byte_perm_S (w[14], w[15], selector); w[62] = __byte_perm_S (w[13], w[14], selector); w[61] = __byte_perm_S (w[12], w[13], selector); w[60] = __byte_perm_S (w[11], w[12], selector); w[59] = __byte_perm_S (w[10], w[11], selector); w[58] = __byte_perm_S (w[ 9], w[10], selector); w[57] = __byte_perm_S (w[ 8], w[ 9], selector); w[56] = __byte_perm_S (w[ 7], w[ 8], selector); w[55] = __byte_perm_S (w[ 6], w[ 7], selector); w[54] = __byte_perm_S (w[ 5], w[ 6], selector); w[53] = __byte_perm_S (w[ 4], w[ 5], selector); w[52] = __byte_perm_S (w[ 3], w[ 4], selector); w[51] = __byte_perm_S (w[ 2], w[ 3], selector); w[50] = __byte_perm_S (w[ 1], w[ 2], selector); w[49] = __byte_perm_S (w[ 0], w[ 1], selector); w[48] = __byte_perm_S ( 0, w[ 0], selector); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = __byte_perm_S (w[13], w[14], selector); w[62] = __byte_perm_S (w[12], w[13], selector); w[61] = __byte_perm_S (w[11], w[12], selector); w[60] = __byte_perm_S (w[10], w[11], selector); w[59] = __byte_perm_S (w[ 9], w[10], selector); w[58] = __byte_perm_S (w[ 8], w[ 9], selector); w[57] = __byte_perm_S (w[ 7], w[ 8], selector); w[56] = __byte_perm_S (w[ 6], w[ 7], selector); w[55] = __byte_perm_S (w[ 5], w[ 6], selector); w[54] = __byte_perm_S (w[ 4], w[ 5], selector); w[53] = __byte_perm_S (w[ 3], w[ 4], selector); w[52] = __byte_perm_S (w[ 2], w[ 3], selector); w[51] = __byte_perm_S (w[ 1], w[ 2], selector); w[50] = __byte_perm_S (w[ 0], w[ 1], selector); w[49] = __byte_perm_S ( 0, w[ 0], selector); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = __byte_perm_S (w[12], w[13], selector); w[62] = __byte_perm_S (w[11], w[12], selector); w[61] = __byte_perm_S (w[10], w[11], selector); w[60] = __byte_perm_S (w[ 9], w[10], selector); w[59] = __byte_perm_S (w[ 8], w[ 9], selector); w[58] = __byte_perm_S (w[ 7], w[ 8], selector); w[57] = __byte_perm_S (w[ 6], w[ 7], selector); w[56] = __byte_perm_S (w[ 5], w[ 6], selector); w[55] = __byte_perm_S (w[ 4], w[ 5], selector); w[54] = __byte_perm_S (w[ 3], w[ 4], selector); w[53] = __byte_perm_S (w[ 2], w[ 3], selector); w[52] = __byte_perm_S (w[ 1], w[ 2], selector); w[51] = __byte_perm_S (w[ 0], w[ 1], selector); w[50] = __byte_perm_S ( 0, w[ 0], selector); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = __byte_perm_S (w[11], w[12], selector); w[62] = __byte_perm_S (w[10], w[11], selector); w[61] = __byte_perm_S (w[ 9], w[10], selector); w[60] = __byte_perm_S (w[ 8], w[ 9], selector); w[59] = __byte_perm_S (w[ 7], w[ 8], selector); w[58] = __byte_perm_S (w[ 6], w[ 7], selector); w[57] = __byte_perm_S (w[ 5], w[ 6], selector); w[56] = __byte_perm_S (w[ 4], w[ 5], selector); w[55] = __byte_perm_S (w[ 3], w[ 4], selector); w[54] = __byte_perm_S (w[ 2], w[ 3], selector); w[53] = __byte_perm_S (w[ 1], w[ 2], selector); w[52] = __byte_perm_S (w[ 0], w[ 1], selector); w[51] = __byte_perm_S ( 0, w[ 0], selector); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = __byte_perm_S (w[10], w[11], selector); w[62] = __byte_perm_S (w[ 9], w[10], selector); w[61] = __byte_perm_S (w[ 8], w[ 9], selector); w[60] = __byte_perm_S (w[ 7], w[ 8], selector); w[59] = __byte_perm_S (w[ 6], w[ 7], selector); w[58] = __byte_perm_S (w[ 5], w[ 6], selector); w[57] = __byte_perm_S (w[ 4], w[ 5], selector); w[56] = __byte_perm_S (w[ 3], w[ 4], selector); w[55] = __byte_perm_S (w[ 2], w[ 3], selector); w[54] = __byte_perm_S (w[ 1], w[ 2], selector); w[53] = __byte_perm_S (w[ 0], w[ 1], selector); w[52] = __byte_perm_S ( 0, w[ 0], selector); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = __byte_perm_S (w[ 9], w[10], selector); w[62] = __byte_perm_S (w[ 8], w[ 9], selector); w[61] = __byte_perm_S (w[ 7], w[ 8], selector); w[60] = __byte_perm_S (w[ 6], w[ 7], selector); w[59] = __byte_perm_S (w[ 5], w[ 6], selector); w[58] = __byte_perm_S (w[ 4], w[ 5], selector); w[57] = __byte_perm_S (w[ 3], w[ 4], selector); w[56] = __byte_perm_S (w[ 2], w[ 3], selector); w[55] = __byte_perm_S (w[ 1], w[ 2], selector); w[54] = __byte_perm_S (w[ 0], w[ 1], selector); w[53] = __byte_perm_S ( 0, w[ 0], selector); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = __byte_perm_S (w[ 8], w[ 9], selector); w[62] = __byte_perm_S (w[ 7], w[ 8], selector); w[61] = __byte_perm_S (w[ 6], w[ 7], selector); w[60] = __byte_perm_S (w[ 5], w[ 6], selector); w[59] = __byte_perm_S (w[ 4], w[ 5], selector); w[58] = __byte_perm_S (w[ 3], w[ 4], selector); w[57] = __byte_perm_S (w[ 2], w[ 3], selector); w[56] = __byte_perm_S (w[ 1], w[ 2], selector); w[55] = __byte_perm_S (w[ 0], w[ 1], selector); w[54] = __byte_perm_S ( 0, w[ 0], selector); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = __byte_perm_S (w[ 7], w[ 8], selector); w[62] = __byte_perm_S (w[ 6], w[ 7], selector); w[61] = __byte_perm_S (w[ 5], w[ 6], selector); w[60] = __byte_perm_S (w[ 4], w[ 5], selector); w[59] = __byte_perm_S (w[ 3], w[ 4], selector); w[58] = __byte_perm_S (w[ 2], w[ 3], selector); w[57] = __byte_perm_S (w[ 1], w[ 2], selector); w[56] = __byte_perm_S (w[ 0], w[ 1], selector); w[55] = __byte_perm_S ( 0, w[ 0], selector); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = __byte_perm_S (w[ 6], w[ 7], selector); w[62] = __byte_perm_S (w[ 5], w[ 6], selector); w[61] = __byte_perm_S (w[ 4], w[ 5], selector); w[60] = __byte_perm_S (w[ 3], w[ 4], selector); w[59] = __byte_perm_S (w[ 2], w[ 3], selector); w[58] = __byte_perm_S (w[ 1], w[ 2], selector); w[57] = __byte_perm_S (w[ 0], w[ 1], selector); w[56] = __byte_perm_S ( 0, w[ 0], selector); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = __byte_perm_S (w[ 5], w[ 6], selector); w[62] = __byte_perm_S (w[ 4], w[ 5], selector); w[61] = __byte_perm_S (w[ 3], w[ 4], selector); w[60] = __byte_perm_S (w[ 2], w[ 3], selector); w[59] = __byte_perm_S (w[ 1], w[ 2], selector); w[58] = __byte_perm_S (w[ 0], w[ 1], selector); w[57] = __byte_perm_S ( 0, w[ 0], selector); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = __byte_perm_S (w[ 4], w[ 5], selector); w[62] = __byte_perm_S (w[ 3], w[ 4], selector); w[61] = __byte_perm_S (w[ 2], w[ 3], selector); w[60] = __byte_perm_S (w[ 1], w[ 2], selector); w[59] = __byte_perm_S (w[ 0], w[ 1], selector); w[58] = __byte_perm_S ( 0, w[ 0], selector); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = __byte_perm_S (w[ 3], w[ 4], selector); w[62] = __byte_perm_S (w[ 2], w[ 3], selector); w[61] = __byte_perm_S (w[ 1], w[ 2], selector); w[60] = __byte_perm_S (w[ 0], w[ 1], selector); w[59] = __byte_perm_S ( 0, w[ 0], selector); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = __byte_perm_S (w[ 2], w[ 3], selector); w[62] = __byte_perm_S (w[ 1], w[ 2], selector); w[61] = __byte_perm_S (w[ 0], w[ 1], selector); w[60] = __byte_perm_S ( 0, w[ 0], selector); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = __byte_perm_S (w[ 1], w[ 2], selector); w[62] = __byte_perm_S (w[ 0], w[ 1], selector); w[61] = __byte_perm_S ( 0, w[ 0], selector); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = __byte_perm_S (w[ 0], w[ 1], selector); w[62] = __byte_perm_S ( 0, w[ 0], selector); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = __byte_perm_S ( 0, w[ 0], selector); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif } __kernel void amp (__global pw_t *pws, __global pw_t *pws_amp, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, const u32 combs_mode, const u32 gid_max) { const u32 gid = get_global_id (0); if (gid >= gid_max) return; pw_t pw = pws[gid]; pw_t comb = combs_buf[0]; const u32 pw_len = pw.pw_len; const u32 comb_len = comb.pw_len; if (combs_mode == COMBINATOR_MODE_BASE_LEFT) { switch_buffer_by_offset_1x64_le_S (comb.i, pw_len); } if (combs_mode == COMBINATOR_MODE_BASE_RIGHT) { switch_buffer_by_offset_1x64_le_S (pw.i, comb_len); } #pragma unroll for (int i = 0; i < 64; i++) { pw.i[i] |= comb.i[i]; } pw.pw_len = pw_len + comb_len; pws_amp[gid] = pw; }