From 06344910a48db8d477336810392f17bcf096e7ed Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 10 Jul 2025 13:31:00 +0200 Subject: [PATCH] Refactored HIP kernel code for improved performance and cleanup - Replaced inline asm in hc_byte_perm() with __builtin_amdgcn_perm() - Replaced inline asm in hc_bytealign() with __builtin_amdgcn_alignbyte() - Defined HC_INLINE as default for HIP, significantly boosting kernel performance of pure kernels - Removed IS_ROCM from inc_vendor.h as it's no longer needed - Removed backend-specific code from several hash-modes and inc_rp_optimized.cl, as hc_bytealign_S() is now available on all backends --- OpenCL/inc_common.cl | 31862 +------------------------------- OpenCL/inc_hash_blake2b.cl | 4 +- OpenCL/inc_hash_blake2s.cl | 2 +- OpenCL/inc_rp_optimized.cl | 118 - OpenCL/inc_vendor.h | 11 +- OpenCL/m00500-optimized.cl | 104 +- OpenCL/m01600-optimized.cl | 106 +- OpenCL/m05800-optimized.cl | 41 +- OpenCL/m06300-optimized.cl | 107 +- OpenCL/m07400-optimized.cl | 180 +- OpenCL/m10700-optimized.cl | 56 +- OpenCL/m11600-pure.cl | 20 +- OpenCL/m12500-pure.cl | 20 +- OpenCL/m13800_a0-optimized.cl | 31 - OpenCL/m13800_a1-optimized.cl | 31 - OpenCL/m13800_a3-optimized.cl | 31 - OpenCL/m17010-pure.cl | 39 +- OpenCL/m17020-pure.cl | 39 +- OpenCL/m17030-pure.cl | 39 +- OpenCL/m17040-pure.cl | 39 +- OpenCL/m23700-pure.cl | 20 +- OpenCL/m23800-pure.cl | 20 +- OpenCL/m31400_a0-optimized.cl | 51 - OpenCL/m31400_a0-pure.cl | 51 - OpenCL/m31400_a1-optimized.cl | 51 - OpenCL/m31400_a1-pure.cl | 51 - OpenCL/m31400_a3-optimized.cl | 51 - OpenCL/m31400_a3-pure.cl | 51 - 28 files changed, 324 insertions(+), 32902 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index b13dc4d5f..0bc1063a4 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1425,121 +1425,174 @@ DECLSPEC u64 hc_swap64_S (const u64 v) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { - #define BIT(x) (make_u32x (1u) << (x)) - #define BIT_MASK(x) (BIT (x) - 1) - #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) + u32x r; - return BFE (a, b, c); + #if VECT_SIZE == 1 + r = __builtin_amdgcn_ubfe (a, b, c); + #endif - #undef BIT - #undef BIT_MASK - #undef BFE + #if VECT_SIZE >= 2 + r.s0 = __builtin_amdgcn_ubfe (a.s0, b.s0, c.s0); + r.s1 = __builtin_amdgcn_ubfe (a.s1, b.s1, c.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __builtin_amdgcn_ubfe (a.s2, b.s2, c.s2); + r.s3 = __builtin_amdgcn_ubfe (a.s3, b.s3, c.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __builtin_amdgcn_ubfe (a.s4, b.s4, c.s4); + r.s5 = __builtin_amdgcn_ubfe (a.s5, b.s5, c.s5); + r.s6 = __builtin_amdgcn_ubfe (a.s6, b.s6, c.s6); + r.s7 = __builtin_amdgcn_ubfe (a.s7, b.s7, c.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __builtin_amdgcn_ubfe (a.s8, b.s8, c.s8); + r.s9 = __builtin_amdgcn_ubfe (a.s9, b.s9, c.s9); + r.sa = __builtin_amdgcn_ubfe (a.sa, b.sa, c.sa); + r.sb = __builtin_amdgcn_ubfe (a.sb, b.sb, c.sb); + r.sc = __builtin_amdgcn_ubfe (a.sc, b.sc, c.sc); + r.sd = __builtin_amdgcn_ubfe (a.sd, b.sd, c.sd); + r.se = __builtin_amdgcn_ubfe (a.se, b.se, c.se); + r.sf = __builtin_amdgcn_ubfe (a.sf, b.sf, c.sf); + #endif + + return r; } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { - #define BIT(x) (1u << (x)) - #define BIT_MASK(x) (BIT (x) - 1) - #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) - - return BFE (a, b, c); - - #undef BIT - #undef BIT_MASK - #undef BFE + return __builtin_amdgcn_ubfe (a, b, c); } DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { - u32x r = 0; + u32x r; - const int cm = c & 3; + #if VECT_SIZE == 1 + r = __builtin_amdgcn_alignbyte (a, b, c); + #endif - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a << 24) | (b >> 8); } - else if (cm == 2) { r = (a << 16) | (b >> 16); } - else if (cm == 3) { r = (a << 8) | (b >> 24); } + #if VECT_SIZE >= 2 + r.s0 = __builtin_amdgcn_alignbyte (a.s0, b.s0, c); + r.s1 = __builtin_amdgcn_alignbyte (a.s1, b.s1, c); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __builtin_amdgcn_alignbyte (a.s2, b.s2, c); + r.s3 = __builtin_amdgcn_alignbyte (a.s3, b.s3, c); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __builtin_amdgcn_alignbyte (a.s4, b.s4, c); + r.s5 = __builtin_amdgcn_alignbyte (a.s5, b.s5, c); + r.s6 = __builtin_amdgcn_alignbyte (a.s6, b.s6, c); + r.s7 = __builtin_amdgcn_alignbyte (a.s7, b.s7, c); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __builtin_amdgcn_alignbyte (a.s8, b.s8, c); + r.s9 = __builtin_amdgcn_alignbyte (a.s9, b.s9, c); + r.sa = __builtin_amdgcn_alignbyte (a.sa, b.sa, c); + r.sb = __builtin_amdgcn_alignbyte (a.sb, b.sb, c); + r.sc = __builtin_amdgcn_alignbyte (a.sc, b.sc, c); + r.sd = __builtin_amdgcn_alignbyte (a.sd, b.sd, c); + r.se = __builtin_amdgcn_alignbyte (a.se, b.se, c); + r.sf = __builtin_amdgcn_alignbyte (a.sf, b.sf, c); + #endif return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { - u32 r = 0; - - const int cm = c & 3; - - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a << 24) | (b >> 8); } - else if (cm == 2) { r = (a << 16) | (b >> 16); } - else if (cm == 3) { r = (a << 8) | (b >> 24); } - - return r; + return __builtin_amdgcn_alignbyte (a, b, c); } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { - u32x r = 0; + const int c_mod_4 = c & 3; - const int cm = c & 3; + u32x r; - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a >> 24) | (b << 8); } - else if (cm == 2) { r = (a >> 16) | (b << 16); } - else if (cm == 3) { r = (a >> 8) | (b << 24); } + #if VECT_SIZE == 1 + r = (c_mod_4 == 0) ? b : __builtin_amdgcn_alignbyte (b, a, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 2 + r.s0 = (c_mod_4 == 0) ? b.s0 : __builtin_amdgcn_alignbyte (b.s0, a.s0, 4 - c_mod_4); + r.s1 = (c_mod_4 == 0) ? b.s1 : __builtin_amdgcn_alignbyte (b.s1, a.s1, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 4 + r.s2 = (c_mod_4 == 0) ? b.s2 : __builtin_amdgcn_alignbyte (b.s2, a.s2, 4 - c_mod_4); + r.s3 = (c_mod_4 == 0) ? b.s3 : __builtin_amdgcn_alignbyte (b.s3, a.s3, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 8 + r.s4 = (c_mod_4 == 0) ? b.s4 : __builtin_amdgcn_alignbyte (b.s4, a.s4, 4 - c_mod_4); + r.s5 = (c_mod_4 == 0) ? b.s5 : __builtin_amdgcn_alignbyte (b.s5, a.s5, 4 - c_mod_4); + r.s6 = (c_mod_4 == 0) ? b.s6 : __builtin_amdgcn_alignbyte (b.s6, a.s6, 4 - c_mod_4); + r.s7 = (c_mod_4 == 0) ? b.s7 : __builtin_amdgcn_alignbyte (b.s7, a.s7, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 16 + r.s8 = (c_mod_4 == 0) ? b.s8 : __builtin_amdgcn_alignbyte (b.s8, a.s8, 4 - c_mod_4); + r.s9 = (c_mod_4 == 0) ? b.s9 : __builtin_amdgcn_alignbyte (b.s9, a.s9, 4 - c_mod_4); + r.sa = (c_mod_4 == 0) ? b.sa : __builtin_amdgcn_alignbyte (b.sa, a.sa, 4 - c_mod_4); + r.sb = (c_mod_4 == 0) ? b.sb : __builtin_amdgcn_alignbyte (b.sb, a.sb, 4 - c_mod_4); + r.sc = (c_mod_4 == 0) ? b.sc : __builtin_amdgcn_alignbyte (b.sc, a.sc, 4 - c_mod_4); + r.sd = (c_mod_4 == 0) ? b.sd : __builtin_amdgcn_alignbyte (b.sd, a.sd, 4 - c_mod_4); + r.se = (c_mod_4 == 0) ? b.se : __builtin_amdgcn_alignbyte (b.se, a.se, 4 - c_mod_4); + r.sf = (c_mod_4 == 0) ? b.sf : __builtin_amdgcn_alignbyte (b.sf, a.sf, 4 - c_mod_4); + #endif return r; } DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { - u32 r = 0; + const int c_mod_4 = c & 3; - const int cm = c & 3; - - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a >> 24) | (b << 8); } - else if (cm == 2) { r = (a >> 16) | (b << 16); } - else if (cm == 3) { r = (a >> 8) | (b << 24); } - - return r; + return (c_mod_4 == 0) ? b : __builtin_amdgcn_alignbyte (b, a, 4 - c_mod_4); } -#if HAS_VPERM == 1 DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) { u32x r = 0; #if VECT_SIZE == 1 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); + r = __builtin_amdgcn_perm (b, a, c); #endif #if VECT_SIZE >= 2 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c)); + r.s0 = __builtin_amdgcn_perm (b.s0, a.s0, c); + r.s1 = __builtin_amdgcn_perm (b.s1, a.s1, c); #endif #if VECT_SIZE >= 4 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c)); + r.s2 = __builtin_amdgcn_perm (b.s2, a.s2, c); + r.s3 = __builtin_amdgcn_perm (b.s3, a.s3, c); #endif #if VECT_SIZE >= 8 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c)); + r.s4 = __builtin_amdgcn_perm (b.s4, a.s4, c); + r.s5 = __builtin_amdgcn_perm (b.s5, a.s5, c); + r.s6 = __builtin_amdgcn_perm (b.s6, a.s6, c); + r.s7 = __builtin_amdgcn_perm (b.s7, a.s7, c); #endif #if VECT_SIZE >= 16 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s8) : "v"(b.s8), "v"(a.s8), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s9) : "v"(b.s9), "v"(a.s9), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sa) : "v"(b.sa), "v"(a.sa), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sb) : "v"(b.sb), "v"(a.sb), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sc) : "v"(b.sc), "v"(a.sc), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sd) : "v"(b.sd), "v"(a.sd), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.se) : "v"(b.se), "v"(a.se), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sf) : "v"(b.sf), "v"(a.sf), "v"(c)); + r.s8 = __builtin_amdgcn_perm (b.s8, a.s8, c); + r.s9 = __builtin_amdgcn_perm (b.s9, a.s9, c); + r.sa = __builtin_amdgcn_perm (b.sa, a.sa, c); + r.sb = __builtin_amdgcn_perm (b.sb, a.sb, c); + r.sc = __builtin_amdgcn_perm (b.sc, a.sc, c); + r.sd = __builtin_amdgcn_perm (b.sd, a.sd, c); + r.se = __builtin_amdgcn_perm (b.se, a.se, c); + r.sf = __builtin_amdgcn_perm (b.sf, a.sf, c); #endif return r; @@ -1547,13 +1600,8 @@ DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c) { - u32 r = 0; - - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); - - return r; + return __builtin_amdgcn_perm (b, a, c); } -#endif #if HAS_VADD3 == 1 DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) @@ -1733,11 +1781,52 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) return r; } +DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) +{ + const int c_mod_4 = c & 3; + + u32x r; + + #if VECT_SIZE == 1 + r = __funnelshift_r (b, a, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 2 + r.s0 = __funnelshift_r (b.s0, a.s0, c_mod_4 * 8); + r.s1 = __funnelshift_r (b.s1, a.s1, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __funnelshift_r (b.s2, a.s2, c_mod_4 * 8); + r.s3 = __funnelshift_r (b.s3, a.s3, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __funnelshift_r (b.s4, a.s4, c_mod_4 * 8); + r.s5 = __funnelshift_r (b.s5, a.s5, c_mod_4 * 8); + r.s6 = __funnelshift_r (b.s6, a.s6, c_mod_4 * 8); + r.s7 = __funnelshift_r (b.s7, a.s7, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __funnelshift_r (b.s8, a.s8, c_mod_4 * 8); + r.s9 = __funnelshift_r (b.s9, a.s9, c_mod_4 * 8); + r.sa = __funnelshift_r (b.sa, a.sa, c_mod_4 * 8); + r.sb = __funnelshift_r (b.sb, a.sb, c_mod_4 * 8); + r.sc = __funnelshift_r (b.sc, a.sc, c_mod_4 * 8); + r.sd = __funnelshift_r (b.sd, a.sd, c_mod_4 * 8); + r.se = __funnelshift_r (b.se, a.se, c_mod_4 * 8); + r.sf = __funnelshift_r (b.sf, a.sf, c_mod_4 * 8); + #endif + + return r; +} + DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; - const u32 r = hc_byte_perm_S (b, a, (0x76543210 >> (c_mod_4 * 4)) & 0xffff); + const u32 r = __funnelshift_r (b, a, c_mod_4 * 8); return r; } @@ -1746,9 +1835,39 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { const int c_mod_4 = c & 3; - const int c_minus_4 = 4 - c_mod_4; + u32x r; - const u32x r = hc_byte_perm (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); + #if VECT_SIZE == 1 + r = __funnelshift_l (a, b, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 2 + r.s0 = __funnelshift_l (a.s0, b.s0, c_mod_4 * 8); + r.s1 = __funnelshift_l (a.s1, b.s1, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __funnelshift_l (a.s2, b.s2, c_mod_4 * 8); + r.s3 = __funnelshift_l (a.s3, b.s3, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __funnelshift_l (a.s4, b.s4, c_mod_4 * 8); + r.s5 = __funnelshift_l (a.s5, b.s5, c_mod_4 * 8); + r.s6 = __funnelshift_l (a.s6, b.s6, c_mod_4 * 8); + r.s7 = __funnelshift_l (a.s7, b.s7, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __funnelshift_l (a.s8, b.s8, c_mod_4 * 8); + r.s9 = __funnelshift_l (a.s9, b.s9, c_mod_4 * 8); + r.sa = __funnelshift_l (a.sa, b.sa, c_mod_4 * 8); + r.sb = __funnelshift_l (a.sb, b.sb, c_mod_4 * 8); + r.sc = __funnelshift_l (a.sc, b.sc, c_mod_4 * 8); + r.sd = __funnelshift_l (a.sd, b.sd, c_mod_4 * 8); + r.se = __funnelshift_l (a.se, b.se, c_mod_4 * 8); + r.sf = __funnelshift_l (a.sf, b.sf, c_mod_4 * 8); + #endif return r; } @@ -1757,9 +1876,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; - const int c_minus_4 = 4 - c_mod_4; - - const u32 r = hc_byte_perm_S (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); + const u32 r = __funnelshift_l (a, b, c_mod_4 * 8); return r; } @@ -3095,7 +3212,7 @@ DECLSPEC int count_bits_32 (const u32 v0, const u32 v1) DECLSPEC void make_utf16be (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV - + out2[3] = hc_byte_perm (in[3], 0, 0x3727); out2[2] = hc_byte_perm (in[3], 0, 0x1707); out2[1] = hc_byte_perm (in[2], 0, 0x3727); @@ -3105,7 +3222,7 @@ DECLSPEC void make_utf16be (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PR out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); @@ -3143,7 +3260,7 @@ DECLSPEC void make_utf16beN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, P out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); @@ -3181,7 +3298,7 @@ DECLSPEC void make_utf16le (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PR out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); @@ -3219,7 +3336,7 @@ DECLSPEC void make_utf16leN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, P out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); @@ -3253,7 +3370,7 @@ DECLSPEC void undo_utf16be (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *i out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); @@ -3283,7 +3400,7 @@ DECLSPEC void undo_utf16le (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *i out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); @@ -3407,7 +3524,6 @@ DECLSPEC void switch_buffer_by_offset_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -3730,352 +3846,12 @@ DECLSPEC void switch_buffer_by_offset_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4534,476 +4310,12 @@ DECLSPEC void switch_buffer_by_offset_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS break; } - #endif - - #ifdef IS_NV - // atm only same code as for AMD, but could be improved - switch (offset_switch) - { - case 0: - c0[0] = hc_bytealign (w3[3], 0, offset); - w3[3] = hc_bytealign (w3[2], w3[3], offset); - w3[2] = hc_bytealign (w3[1], w3[2], offset); - w3[1] = hc_bytealign (w3[0], w3[1], offset); - w3[0] = hc_bytealign (w2[3], w3[0], offset); - w2[3] = hc_bytealign (w2[2], w2[3], offset); - w2[2] = hc_bytealign (w2[1], w2[2], offset); - w2[1] = hc_bytealign (w2[0], w2[1], offset); - w2[0] = hc_bytealign (w1[3], w2[0], offset); - w1[3] = hc_bytealign (w1[2], w1[3], offset); - w1[2] = hc_bytealign (w1[1], w1[2], offset); - w1[1] = hc_bytealign (w1[0], w1[1], offset); - w1[0] = hc_bytealign (w0[3], w1[0], offset); - w0[3] = hc_bytealign (w0[2], w0[3], offset); - w0[2] = hc_bytealign (w0[1], w0[2], offset); - w0[1] = hc_bytealign (w0[0], w0[1], offset); - w0[0] = hc_bytealign ( 0, w0[0], offset); - - break; - - case 1: - c0[1] = hc_bytealign (w3[3], 0, offset); - c0[0] = hc_bytealign (w3[2], w3[3], offset); - w3[3] = hc_bytealign (w3[1], w3[2], offset); - w3[2] = hc_bytealign (w3[0], w3[1], offset); - w3[1] = hc_bytealign (w2[3], w3[0], offset); - w3[0] = hc_bytealign (w2[2], w2[3], offset); - w2[3] = hc_bytealign (w2[1], w2[2], offset); - w2[2] = hc_bytealign (w2[0], w2[1], offset); - w2[1] = hc_bytealign (w1[3], w2[0], offset); - w2[0] = hc_bytealign (w1[2], w1[3], offset); - w1[3] = hc_bytealign (w1[1], w1[2], offset); - w1[2] = hc_bytealign (w1[0], w1[1], offset); - w1[1] = hc_bytealign (w0[3], w1[0], offset); - w1[0] = hc_bytealign (w0[2], w0[3], offset); - w0[3] = hc_bytealign (w0[1], w0[2], offset); - w0[2] = hc_bytealign (w0[0], w0[1], offset); - w0[1] = hc_bytealign ( 0, w0[0], offset); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_bytealign (w3[3], 0, offset); - c0[1] = hc_bytealign (w3[2], w3[3], offset); - c0[0] = hc_bytealign (w3[1], w3[2], offset); - w3[3] = hc_bytealign (w3[0], w3[1], offset); - w3[2] = hc_bytealign (w2[3], w3[0], offset); - w3[1] = hc_bytealign (w2[2], w2[3], offset); - w3[0] = hc_bytealign (w2[1], w2[2], offset); - w2[3] = hc_bytealign (w2[0], w2[1], offset); - w2[2] = hc_bytealign (w1[3], w2[0], offset); - w2[1] = hc_bytealign (w1[2], w1[3], offset); - w2[0] = hc_bytealign (w1[1], w1[2], offset); - w1[3] = hc_bytealign (w1[0], w1[1], offset); - w1[2] = hc_bytealign (w0[3], w1[0], offset); - w1[1] = hc_bytealign (w0[2], w0[3], offset); - w1[0] = hc_bytealign (w0[1], w0[2], offset); - w0[3] = hc_bytealign (w0[0], w0[1], offset); - w0[2] = hc_bytealign ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_bytealign (w3[3], 0, offset); - c0[2] = hc_bytealign (w3[2], w3[3], offset); - c0[1] = hc_bytealign (w3[1], w3[2], offset); - c0[0] = hc_bytealign (w3[0], w3[1], offset); - w3[3] = hc_bytealign (w2[3], w3[0], offset); - w3[2] = hc_bytealign (w2[2], w2[3], offset); - w3[1] = hc_bytealign (w2[1], w2[2], offset); - w3[0] = hc_bytealign (w2[0], w2[1], offset); - w2[3] = hc_bytealign (w1[3], w2[0], offset); - w2[2] = hc_bytealign (w1[2], w1[3], offset); - w2[1] = hc_bytealign (w1[1], w1[2], offset); - w2[0] = hc_bytealign (w1[0], w1[1], offset); - w1[3] = hc_bytealign (w0[3], w1[0], offset); - w1[2] = hc_bytealign (w0[2], w0[3], offset); - w1[1] = hc_bytealign (w0[1], w0[2], offset); - w1[0] = hc_bytealign (w0[0], w0[1], offset); - w0[3] = hc_bytealign ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_bytealign (w3[3], 0, offset); - c0[3] = hc_bytealign (w3[2], w3[3], offset); - c0[2] = hc_bytealign (w3[1], w3[2], offset); - c0[1] = hc_bytealign (w3[0], w3[1], offset); - c0[0] = hc_bytealign (w2[3], w3[0], offset); - w3[3] = hc_bytealign (w2[2], w2[3], offset); - w3[2] = hc_bytealign (w2[1], w2[2], offset); - w3[1] = hc_bytealign (w2[0], w2[1], offset); - w3[0] = hc_bytealign (w1[3], w2[0], offset); - w2[3] = hc_bytealign (w1[2], w1[3], offset); - w2[2] = hc_bytealign (w1[1], w1[2], offset); - w2[1] = hc_bytealign (w1[0], w1[1], offset); - w2[0] = hc_bytealign (w0[3], w1[0], offset); - w1[3] = hc_bytealign (w0[2], w0[3], offset); - w1[2] = hc_bytealign (w0[1], w0[2], offset); - w1[1] = hc_bytealign (w0[0], w0[1], offset); - w1[0] = hc_bytealign ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_bytealign (w3[3], 0, offset); - c1[0] = hc_bytealign (w3[2], w3[3], offset); - c0[3] = hc_bytealign (w3[1], w3[2], offset); - c0[2] = hc_bytealign (w3[0], w3[1], offset); - c0[1] = hc_bytealign (w2[3], w3[0], offset); - c0[0] = hc_bytealign (w2[2], w2[3], offset); - w3[3] = hc_bytealign (w2[1], w2[2], offset); - w3[2] = hc_bytealign (w2[0], w2[1], offset); - w3[1] = hc_bytealign (w1[3], w2[0], offset); - w3[0] = hc_bytealign (w1[2], w1[3], offset); - w2[3] = hc_bytealign (w1[1], w1[2], offset); - w2[2] = hc_bytealign (w1[0], w1[1], offset); - w2[1] = hc_bytealign (w0[3], w1[0], offset); - w2[0] = hc_bytealign (w0[2], w0[3], offset); - w1[3] = hc_bytealign (w0[1], w0[2], offset); - w1[2] = hc_bytealign (w0[0], w0[1], offset); - w1[1] = hc_bytealign ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_bytealign (w3[3], 0, offset); - c1[1] = hc_bytealign (w3[2], w3[3], offset); - c1[0] = hc_bytealign (w3[1], w3[2], offset); - c0[3] = hc_bytealign (w3[0], w3[1], offset); - c0[2] = hc_bytealign (w2[3], w3[0], offset); - c0[1] = hc_bytealign (w2[2], w2[3], offset); - c0[0] = hc_bytealign (w2[1], w2[2], offset); - w3[3] = hc_bytealign (w2[0], w2[1], offset); - w3[2] = hc_bytealign (w1[3], w2[0], offset); - w3[1] = hc_bytealign (w1[2], w1[3], offset); - w3[0] = hc_bytealign (w1[1], w1[2], offset); - w2[3] = hc_bytealign (w1[0], w1[1], offset); - w2[2] = hc_bytealign (w0[3], w1[0], offset); - w2[1] = hc_bytealign (w0[2], w0[3], offset); - w2[0] = hc_bytealign (w0[1], w0[2], offset); - w1[3] = hc_bytealign (w0[0], w0[1], offset); - w1[2] = hc_bytealign ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_bytealign (w3[3], 0, offset); - c1[2] = hc_bytealign (w3[2], w3[3], offset); - c1[1] = hc_bytealign (w3[1], w3[2], offset); - c1[0] = hc_bytealign (w3[0], w3[1], offset); - c0[3] = hc_bytealign (w2[3], w3[0], offset); - c0[2] = hc_bytealign (w2[2], w2[3], offset); - c0[1] = hc_bytealign (w2[1], w2[2], offset); - c0[0] = hc_bytealign (w2[0], w2[1], offset); - w3[3] = hc_bytealign (w1[3], w2[0], offset); - w3[2] = hc_bytealign (w1[2], w1[3], offset); - w3[1] = hc_bytealign (w1[1], w1[2], offset); - w3[0] = hc_bytealign (w1[0], w1[1], offset); - w2[3] = hc_bytealign (w0[3], w1[0], offset); - w2[2] = hc_bytealign (w0[2], w0[3], offset); - w2[1] = hc_bytealign (w0[1], w0[2], offset); - w2[0] = hc_bytealign (w0[0], w0[1], offset); - w1[3] = hc_bytealign ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_bytealign (w3[3], 0, offset); - c1[3] = hc_bytealign (w3[2], w3[3], offset); - c1[2] = hc_bytealign (w3[1], w3[2], offset); - c1[1] = hc_bytealign (w3[0], w3[1], offset); - c1[0] = hc_bytealign (w2[3], w3[0], offset); - c0[3] = hc_bytealign (w2[2], w2[3], offset); - c0[2] = hc_bytealign (w2[1], w2[2], offset); - c0[1] = hc_bytealign (w2[0], w2[1], offset); - c0[0] = hc_bytealign (w1[3], w2[0], offset); - w3[3] = hc_bytealign (w1[2], w1[3], offset); - w3[2] = hc_bytealign (w1[1], w1[2], offset); - w3[1] = hc_bytealign (w1[0], w1[1], offset); - w3[0] = hc_bytealign (w0[3], w1[0], offset); - w2[3] = hc_bytealign (w0[2], w0[3], offset); - w2[2] = hc_bytealign (w0[1], w0[2], offset); - w2[1] = hc_bytealign (w0[0], w0[1], offset); - w2[0] = hc_bytealign ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_bytealign (w3[3], 0, offset); - c2[0] = hc_bytealign (w3[2], w3[3], offset); - c1[3] = hc_bytealign (w3[1], w3[2], offset); - c1[2] = hc_bytealign (w3[0], w3[1], offset); - c1[1] = hc_bytealign (w2[3], w3[0], offset); - c1[0] = hc_bytealign (w2[2], w2[3], offset); - c0[3] = hc_bytealign (w2[1], w2[2], offset); - c0[2] = hc_bytealign (w2[0], w2[1], offset); - c0[1] = hc_bytealign (w1[3], w2[0], offset); - c0[0] = hc_bytealign (w1[2], w1[3], offset); - w3[3] = hc_bytealign (w1[1], w1[2], offset); - w3[2] = hc_bytealign (w1[0], w1[1], offset); - w3[1] = hc_bytealign (w0[3], w1[0], offset); - w3[0] = hc_bytealign (w0[2], w0[3], offset); - w2[3] = hc_bytealign (w0[1], w0[2], offset); - w2[2] = hc_bytealign (w0[0], w0[1], offset); - w2[1] = hc_bytealign ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_bytealign (w3[3], 0, offset); - c2[1] = hc_bytealign (w3[2], w3[3], offset); - c2[0] = hc_bytealign (w3[1], w3[2], offset); - c1[3] = hc_bytealign (w3[0], w3[1], offset); - c1[2] = hc_bytealign (w2[3], w3[0], offset); - c1[1] = hc_bytealign (w2[2], w2[3], offset); - c1[0] = hc_bytealign (w2[1], w2[2], offset); - c0[3] = hc_bytealign (w2[0], w2[1], offset); - c0[2] = hc_bytealign (w1[3], w2[0], offset); - c0[1] = hc_bytealign (w1[2], w1[3], offset); - c0[0] = hc_bytealign (w1[1], w1[2], offset); - w3[3] = hc_bytealign (w1[0], w1[1], offset); - w3[2] = hc_bytealign (w0[3], w1[0], offset); - w3[1] = hc_bytealign (w0[2], w0[3], offset); - w3[0] = hc_bytealign (w0[1], w0[2], offset); - w2[3] = hc_bytealign (w0[0], w0[1], offset); - w2[2] = hc_bytealign ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_bytealign (w3[3], 0, offset); - c2[2] = hc_bytealign (w3[2], w3[3], offset); - c2[1] = hc_bytealign (w3[1], w3[2], offset); - c2[0] = hc_bytealign (w3[0], w3[1], offset); - c1[3] = hc_bytealign (w2[3], w3[0], offset); - c1[2] = hc_bytealign (w2[2], w2[3], offset); - c1[1] = hc_bytealign (w2[1], w2[2], offset); - c1[0] = hc_bytealign (w2[0], w2[1], offset); - c0[3] = hc_bytealign (w1[3], w2[0], offset); - c0[2] = hc_bytealign (w1[2], w1[3], offset); - c0[1] = hc_bytealign (w1[1], w1[2], offset); - c0[0] = hc_bytealign (w1[0], w1[1], offset); - w3[3] = hc_bytealign (w0[3], w1[0], offset); - w3[2] = hc_bytealign (w0[2], w0[3], offset); - w3[1] = hc_bytealign (w0[1], w0[2], offset); - w3[0] = hc_bytealign (w0[0], w0[1], offset); - w2[3] = hc_bytealign ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_bytealign (w3[3], 0, offset); - c2[3] = hc_bytealign (w3[2], w3[3], offset); - c2[2] = hc_bytealign (w3[1], w3[2], offset); - c2[1] = hc_bytealign (w3[0], w3[1], offset); - c2[0] = hc_bytealign (w2[3], w3[0], offset); - c1[3] = hc_bytealign (w2[2], w2[3], offset); - c1[2] = hc_bytealign (w2[1], w2[2], offset); - c1[1] = hc_bytealign (w2[0], w2[1], offset); - c1[0] = hc_bytealign (w1[3], w2[0], offset); - c0[3] = hc_bytealign (w1[2], w1[3], offset); - c0[2] = hc_bytealign (w1[1], w1[2], offset); - c0[1] = hc_bytealign (w1[0], w1[1], offset); - c0[0] = hc_bytealign (w0[3], w1[0], offset); - w3[3] = hc_bytealign (w0[2], w0[3], offset); - w3[2] = hc_bytealign (w0[1], w0[2], offset); - w3[1] = hc_bytealign (w0[0], w0[1], offset); - w3[0] = hc_bytealign ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_bytealign (w3[3], 0, offset); - c3[0] = hc_bytealign (w3[2], w3[3], offset); - c2[3] = hc_bytealign (w3[1], w3[2], offset); - c2[2] = hc_bytealign (w3[0], w3[1], offset); - c2[1] = hc_bytealign (w2[3], w3[0], offset); - c2[0] = hc_bytealign (w2[2], w2[3], offset); - c1[3] = hc_bytealign (w2[1], w2[2], offset); - c1[2] = hc_bytealign (w2[0], w2[1], offset); - c1[1] = hc_bytealign (w1[3], w2[0], offset); - c1[0] = hc_bytealign (w1[2], w1[3], offset); - c0[3] = hc_bytealign (w1[1], w1[2], offset); - c0[2] = hc_bytealign (w1[0], w1[1], offset); - c0[1] = hc_bytealign (w0[3], w1[0], offset); - c0[0] = hc_bytealign (w0[2], w0[3], offset); - w3[3] = hc_bytealign (w0[1], w0[2], offset); - w3[2] = hc_bytealign (w0[0], w0[1], offset); - w3[1] = hc_bytealign ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_bytealign (w3[3], 0, offset); - c3[1] = hc_bytealign (w3[2], w3[3], offset); - c3[0] = hc_bytealign (w3[1], w3[2], offset); - c2[3] = hc_bytealign (w3[0], w3[1], offset); - c2[2] = hc_bytealign (w2[3], w3[0], offset); - c2[1] = hc_bytealign (w2[2], w2[3], offset); - c2[0] = hc_bytealign (w2[1], w2[2], offset); - c1[3] = hc_bytealign (w2[0], w2[1], offset); - c1[2] = hc_bytealign (w1[3], w2[0], offset); - c1[1] = hc_bytealign (w1[2], w1[3], offset); - c1[0] = hc_bytealign (w1[1], w1[2], offset); - c0[3] = hc_bytealign (w1[0], w1[1], offset); - c0[2] = hc_bytealign (w0[3], w1[0], offset); - c0[1] = hc_bytealign (w0[2], w0[3], offset); - c0[0] = hc_bytealign (w0[1], w0[2], offset); - w3[3] = hc_bytealign (w0[0], w0[1], offset); - w3[2] = hc_bytealign ( 0, w0[0], offset); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_bytealign (w3[3], 0, offset); - c3[2] = hc_bytealign (w3[2], w3[3], offset); - c3[1] = hc_bytealign (w3[1], w3[2], offset); - c3[0] = hc_bytealign (w3[0], w3[1], offset); - c2[3] = hc_bytealign (w2[3], w3[0], offset); - c2[2] = hc_bytealign (w2[2], w2[3], offset); - c2[1] = hc_bytealign (w2[1], w2[2], offset); - c2[0] = hc_bytealign (w2[0], w2[1], offset); - c1[3] = hc_bytealign (w1[3], w2[0], offset); - c1[2] = hc_bytealign (w1[2], w1[3], offset); - c1[1] = hc_bytealign (w1[1], w1[2], offset); - c1[0] = hc_bytealign (w1[0], w1[1], offset); - c0[3] = hc_bytealign (w0[3], w1[0], offset); - c0[2] = hc_bytealign (w0[2], w0[3], offset); - c0[1] = hc_bytealign (w0[1], w0[2], offset); - c0[0] = hc_bytealign (w0[0], w0[1], offset); - w3[3] = hc_bytealign ( 0, w0[0], offset); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -5326,348 +4638,12 @@ DECLSPEC void switch_buffer_by_offset_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -6126,484 +5102,12 @@ DECLSPEC void switch_buffer_by_offset_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm ( 0, w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm ( 0, w3[3], selector); - c0[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm ( 0, w3[3], selector); - c0[1] = hc_byte_perm (w3[3], w3[2], selector); - c0[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm ( 0, w3[3], selector); - c0[2] = hc_byte_perm (w3[3], w3[2], selector); - c0[1] = hc_byte_perm (w3[2], w3[1], selector); - c0[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm ( 0, w3[3], selector); - c0[3] = hc_byte_perm (w3[3], w3[2], selector); - c0[2] = hc_byte_perm (w3[2], w3[1], selector); - c0[1] = hc_byte_perm (w3[1], w3[0], selector); - c0[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm ( 0, w3[3], selector); - c1[0] = hc_byte_perm (w3[3], w3[2], selector); - c0[3] = hc_byte_perm (w3[2], w3[1], selector); - c0[2] = hc_byte_perm (w3[1], w3[0], selector); - c0[1] = hc_byte_perm (w3[0], w2[3], selector); - c0[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm ( 0, w3[3], selector); - c1[1] = hc_byte_perm (w3[3], w3[2], selector); - c1[0] = hc_byte_perm (w3[2], w3[1], selector); - c0[3] = hc_byte_perm (w3[1], w3[0], selector); - c0[2] = hc_byte_perm (w3[0], w2[3], selector); - c0[1] = hc_byte_perm (w2[3], w2[2], selector); - c0[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm ( 0, w3[3], selector); - c1[2] = hc_byte_perm (w3[3], w3[2], selector); - c1[1] = hc_byte_perm (w3[2], w3[1], selector); - c1[0] = hc_byte_perm (w3[1], w3[0], selector); - c0[3] = hc_byte_perm (w3[0], w2[3], selector); - c0[2] = hc_byte_perm (w2[3], w2[2], selector); - c0[1] = hc_byte_perm (w2[2], w2[1], selector); - c0[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm ( 0, w3[3], selector); - c1[3] = hc_byte_perm (w3[3], w3[2], selector); - c1[2] = hc_byte_perm (w3[2], w3[1], selector); - c1[1] = hc_byte_perm (w3[1], w3[0], selector); - c1[0] = hc_byte_perm (w3[0], w2[3], selector); - c0[3] = hc_byte_perm (w2[3], w2[2], selector); - c0[2] = hc_byte_perm (w2[2], w2[1], selector); - c0[1] = hc_byte_perm (w2[1], w2[0], selector); - c0[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm ( 0, w3[3], selector); - c2[0] = hc_byte_perm (w3[3], w3[2], selector); - c1[3] = hc_byte_perm (w3[2], w3[1], selector); - c1[2] = hc_byte_perm (w3[1], w3[0], selector); - c1[1] = hc_byte_perm (w3[0], w2[3], selector); - c1[0] = hc_byte_perm (w2[3], w2[2], selector); - c0[3] = hc_byte_perm (w2[2], w2[1], selector); - c0[2] = hc_byte_perm (w2[1], w2[0], selector); - c0[1] = hc_byte_perm (w2[0], w1[3], selector); - c0[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm ( 0, w3[3], selector); - c2[1] = hc_byte_perm (w3[3], w3[2], selector); - c2[0] = hc_byte_perm (w3[2], w3[1], selector); - c1[3] = hc_byte_perm (w3[1], w3[0], selector); - c1[2] = hc_byte_perm (w3[0], w2[3], selector); - c1[1] = hc_byte_perm (w2[3], w2[2], selector); - c1[0] = hc_byte_perm (w2[2], w2[1], selector); - c0[3] = hc_byte_perm (w2[1], w2[0], selector); - c0[2] = hc_byte_perm (w2[0], w1[3], selector); - c0[1] = hc_byte_perm (w1[3], w1[2], selector); - c0[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm ( 0, w3[3], selector); - c2[2] = hc_byte_perm (w3[3], w3[2], selector); - c2[1] = hc_byte_perm (w3[2], w3[1], selector); - c2[0] = hc_byte_perm (w3[1], w3[0], selector); - c1[3] = hc_byte_perm (w3[0], w2[3], selector); - c1[2] = hc_byte_perm (w2[3], w2[2], selector); - c1[1] = hc_byte_perm (w2[2], w2[1], selector); - c1[0] = hc_byte_perm (w2[1], w2[0], selector); - c0[3] = hc_byte_perm (w2[0], w1[3], selector); - c0[2] = hc_byte_perm (w1[3], w1[2], selector); - c0[1] = hc_byte_perm (w1[2], w1[1], selector); - c0[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm ( 0, w3[3], selector); - c2[3] = hc_byte_perm (w3[3], w3[2], selector); - c2[2] = hc_byte_perm (w3[2], w3[1], selector); - c2[1] = hc_byte_perm (w3[1], w3[0], selector); - c2[0] = hc_byte_perm (w3[0], w2[3], selector); - c1[3] = hc_byte_perm (w2[3], w2[2], selector); - c1[2] = hc_byte_perm (w2[2], w2[1], selector); - c1[1] = hc_byte_perm (w2[1], w2[0], selector); - c1[0] = hc_byte_perm (w2[0], w1[3], selector); - c0[3] = hc_byte_perm (w1[3], w1[2], selector); - c0[2] = hc_byte_perm (w1[2], w1[1], selector); - c0[1] = hc_byte_perm (w1[1], w1[0], selector); - c0[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm ( 0, w3[3], selector); - c3[0] = hc_byte_perm (w3[3], w3[2], selector); - c2[3] = hc_byte_perm (w3[2], w3[1], selector); - c2[2] = hc_byte_perm (w3[1], w3[0], selector); - c2[1] = hc_byte_perm (w3[0], w2[3], selector); - c2[0] = hc_byte_perm (w2[3], w2[2], selector); - c1[3] = hc_byte_perm (w2[2], w2[1], selector); - c1[2] = hc_byte_perm (w2[1], w2[0], selector); - c1[1] = hc_byte_perm (w2[0], w1[3], selector); - c1[0] = hc_byte_perm (w1[3], w1[2], selector); - c0[3] = hc_byte_perm (w1[2], w1[1], selector); - c0[2] = hc_byte_perm (w1[1], w1[0], selector); - c0[1] = hc_byte_perm (w1[0], w0[3], selector); - c0[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm ( 0, w3[3], selector); - c3[1] = hc_byte_perm (w3[3], w3[2], selector); - c3[0] = hc_byte_perm (w3[2], w3[1], selector); - c2[3] = hc_byte_perm (w3[1], w3[0], selector); - c2[2] = hc_byte_perm (w3[0], w2[3], selector); - c2[1] = hc_byte_perm (w2[3], w2[2], selector); - c2[0] = hc_byte_perm (w2[2], w2[1], selector); - c1[3] = hc_byte_perm (w2[1], w2[0], selector); - c1[2] = hc_byte_perm (w2[0], w1[3], selector); - c1[1] = hc_byte_perm (w1[3], w1[2], selector); - c1[0] = hc_byte_perm (w1[2], w1[1], selector); - c0[3] = hc_byte_perm (w1[1], w1[0], selector); - c0[2] = hc_byte_perm (w1[0], w0[3], selector); - c0[1] = hc_byte_perm (w0[3], w0[2], selector); - c0[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm ( 0, w3[3], selector); - c3[2] = hc_byte_perm (w3[3], w3[2], selector); - c3[1] = hc_byte_perm (w3[2], w3[1], selector); - c3[0] = hc_byte_perm (w3[1], w3[0], selector); - c2[3] = hc_byte_perm (w3[0], w2[3], selector); - c2[2] = hc_byte_perm (w2[3], w2[2], selector); - c2[1] = hc_byte_perm (w2[2], w2[1], selector); - c2[0] = hc_byte_perm (w2[1], w2[0], selector); - c1[3] = hc_byte_perm (w2[0], w1[3], selector); - c1[2] = hc_byte_perm (w1[3], w1[2], selector); - c1[1] = hc_byte_perm (w1[2], w1[1], selector); - c1[0] = hc_byte_perm (w1[1], w1[0], selector); - c0[3] = hc_byte_perm (w1[0], w0[3], selector); - c0[2] = hc_byte_perm (w0[3], w0[2], selector); - c0[1] = hc_byte_perm (w0[2], w0[1], selector); - c0[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -7758,592 +6262,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (PRIVATE_AS u32x *w0, PRIVATE_AS u3 break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm (w7[2], w7[3], selector); - w7[2] = hc_byte_perm (w7[1], w7[2], selector); - w7[1] = hc_byte_perm (w7[0], w7[1], selector); - w7[0] = hc_byte_perm (w6[3], w7[0], selector); - w6[3] = hc_byte_perm (w6[2], w6[3], selector); - w6[2] = hc_byte_perm (w6[1], w6[2], selector); - w6[1] = hc_byte_perm (w6[0], w6[1], selector); - w6[0] = hc_byte_perm (w5[3], w6[0], selector); - w5[3] = hc_byte_perm (w5[2], w5[3], selector); - w5[2] = hc_byte_perm (w5[1], w5[2], selector); - w5[1] = hc_byte_perm (w5[0], w5[1], selector); - w5[0] = hc_byte_perm (w4[3], w5[0], selector); - w4[3] = hc_byte_perm (w4[2], w4[3], selector); - w4[2] = hc_byte_perm (w4[1], w4[2], selector); - w4[1] = hc_byte_perm (w4[0], w4[1], selector); - w4[0] = hc_byte_perm (w3[3], w4[0], selector); - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - break; - - case 1: - w7[3] = hc_byte_perm (w7[1], w7[2], selector); - w7[2] = hc_byte_perm (w7[0], w7[1], selector); - w7[1] = hc_byte_perm (w6[3], w7[0], selector); - w7[0] = hc_byte_perm (w6[2], w6[3], selector); - w6[3] = hc_byte_perm (w6[1], w6[2], selector); - w6[2] = hc_byte_perm (w6[0], w6[1], selector); - w6[1] = hc_byte_perm (w5[3], w6[0], selector); - w6[0] = hc_byte_perm (w5[2], w5[3], selector); - w5[3] = hc_byte_perm (w5[1], w5[2], selector); - w5[2] = hc_byte_perm (w5[0], w5[1], selector); - w5[1] = hc_byte_perm (w4[3], w5[0], selector); - w5[0] = hc_byte_perm (w4[2], w4[3], selector); - w4[3] = hc_byte_perm (w4[1], w4[2], selector); - w4[2] = hc_byte_perm (w4[0], w4[1], selector); - w4[1] = hc_byte_perm (w3[3], w4[0], selector); - w4[0] = hc_byte_perm (w3[2], w3[3], selector); - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - break; - - case 2: - w7[3] = hc_byte_perm (w7[0], w7[1], selector); - w7[2] = hc_byte_perm (w6[3], w7[0], selector); - w7[1] = hc_byte_perm (w6[2], w6[3], selector); - w7[0] = hc_byte_perm (w6[1], w6[2], selector); - w6[3] = hc_byte_perm (w6[0], w6[1], selector); - w6[2] = hc_byte_perm (w5[3], w6[0], selector); - w6[1] = hc_byte_perm (w5[2], w5[3], selector); - w6[0] = hc_byte_perm (w5[1], w5[2], selector); - w5[3] = hc_byte_perm (w5[0], w5[1], selector); - w5[2] = hc_byte_perm (w4[3], w5[0], selector); - w5[1] = hc_byte_perm (w4[2], w4[3], selector); - w5[0] = hc_byte_perm (w4[1], w4[2], selector); - w4[3] = hc_byte_perm (w4[0], w4[1], selector); - w4[2] = hc_byte_perm (w3[3], w4[0], selector); - w4[1] = hc_byte_perm (w3[2], w3[3], selector); - w4[0] = hc_byte_perm (w3[1], w3[2], selector); - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w7[3] = hc_byte_perm (w6[3], w7[0], selector); - w7[2] = hc_byte_perm (w6[2], w6[3], selector); - w7[1] = hc_byte_perm (w6[1], w6[2], selector); - w7[0] = hc_byte_perm (w6[0], w6[1], selector); - w6[3] = hc_byte_perm (w5[3], w6[0], selector); - w6[2] = hc_byte_perm (w5[2], w5[3], selector); - w6[1] = hc_byte_perm (w5[1], w5[2], selector); - w6[0] = hc_byte_perm (w5[0], w5[1], selector); - w5[3] = hc_byte_perm (w4[3], w5[0], selector); - w5[2] = hc_byte_perm (w4[2], w4[3], selector); - w5[1] = hc_byte_perm (w4[1], w4[2], selector); - w5[0] = hc_byte_perm (w4[0], w4[1], selector); - w4[3] = hc_byte_perm (w3[3], w4[0], selector); - w4[2] = hc_byte_perm (w3[2], w3[3], selector); - w4[1] = hc_byte_perm (w3[1], w3[2], selector); - w4[0] = hc_byte_perm (w3[0], w3[1], selector); - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w7[3] = hc_byte_perm (w6[2], w6[3], selector); - w7[2] = hc_byte_perm (w6[1], w6[2], selector); - w7[1] = hc_byte_perm (w6[0], w6[1], selector); - w7[0] = hc_byte_perm (w5[3], w6[0], selector); - w6[3] = hc_byte_perm (w5[2], w5[3], selector); - w6[2] = hc_byte_perm (w5[1], w5[2], selector); - w6[1] = hc_byte_perm (w5[0], w5[1], selector); - w6[0] = hc_byte_perm (w4[3], w5[0], selector); - w5[3] = hc_byte_perm (w4[2], w4[3], selector); - w5[2] = hc_byte_perm (w4[1], w4[2], selector); - w5[1] = hc_byte_perm (w4[0], w4[1], selector); - w5[0] = hc_byte_perm (w3[3], w4[0], selector); - w4[3] = hc_byte_perm (w3[2], w3[3], selector); - w4[2] = hc_byte_perm (w3[1], w3[2], selector); - w4[1] = hc_byte_perm (w3[0], w3[1], selector); - w4[0] = hc_byte_perm (w2[3], w3[0], selector); - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w7[3] = hc_byte_perm (w6[1], w6[2], selector); - w7[2] = hc_byte_perm (w6[0], w6[1], selector); - w7[1] = hc_byte_perm (w5[3], w6[0], selector); - w7[0] = hc_byte_perm (w5[2], w5[3], selector); - w6[3] = hc_byte_perm (w5[1], w5[2], selector); - w6[2] = hc_byte_perm (w5[0], w5[1], selector); - w6[1] = hc_byte_perm (w4[3], w5[0], selector); - w6[0] = hc_byte_perm (w4[2], w4[3], selector); - w5[3] = hc_byte_perm (w4[1], w4[2], selector); - w5[2] = hc_byte_perm (w4[0], w4[1], selector); - w5[1] = hc_byte_perm (w3[3], w4[0], selector); - w5[0] = hc_byte_perm (w3[2], w3[3], selector); - w4[3] = hc_byte_perm (w3[1], w3[2], selector); - w4[2] = hc_byte_perm (w3[0], w3[1], selector); - w4[1] = hc_byte_perm (w2[3], w3[0], selector); - w4[0] = hc_byte_perm (w2[2], w2[3], selector); - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w7[3] = hc_byte_perm (w6[0], w6[1], selector); - w7[2] = hc_byte_perm (w5[3], w6[0], selector); - w7[1] = hc_byte_perm (w5[2], w5[3], selector); - w7[0] = hc_byte_perm (w5[1], w5[2], selector); - w6[3] = hc_byte_perm (w5[0], w5[1], selector); - w6[2] = hc_byte_perm (w4[3], w5[0], selector); - w6[1] = hc_byte_perm (w4[2], w4[3], selector); - w6[0] = hc_byte_perm (w4[1], w4[2], selector); - w5[3] = hc_byte_perm (w4[0], w4[1], selector); - w5[2] = hc_byte_perm (w3[3], w4[0], selector); - w5[1] = hc_byte_perm (w3[2], w3[3], selector); - w5[0] = hc_byte_perm (w3[1], w3[2], selector); - w4[3] = hc_byte_perm (w3[0], w3[1], selector); - w4[2] = hc_byte_perm (w2[3], w3[0], selector); - w4[1] = hc_byte_perm (w2[2], w2[3], selector); - w4[0] = hc_byte_perm (w2[1], w2[2], selector); - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w7[3] = hc_byte_perm (w5[3], w6[0], selector); - w7[2] = hc_byte_perm (w5[2], w5[3], selector); - w7[1] = hc_byte_perm (w5[1], w5[2], selector); - w7[0] = hc_byte_perm (w5[0], w5[1], selector); - w6[3] = hc_byte_perm (w4[3], w5[0], selector); - w6[2] = hc_byte_perm (w4[2], w4[3], selector); - w6[1] = hc_byte_perm (w4[1], w4[2], selector); - w6[0] = hc_byte_perm (w4[0], w4[1], selector); - w5[3] = hc_byte_perm (w3[3], w4[0], selector); - w5[2] = hc_byte_perm (w3[2], w3[3], selector); - w5[1] = hc_byte_perm (w3[1], w3[2], selector); - w5[0] = hc_byte_perm (w3[0], w3[1], selector); - w4[3] = hc_byte_perm (w2[3], w3[0], selector); - w4[2] = hc_byte_perm (w2[2], w2[3], selector); - w4[1] = hc_byte_perm (w2[1], w2[2], selector); - w4[0] = hc_byte_perm (w2[0], w2[1], selector); - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w7[3] = hc_byte_perm (w5[2], w5[3], selector); - w7[2] = hc_byte_perm (w5[1], w5[2], selector); - w7[1] = hc_byte_perm (w5[0], w5[1], selector); - w7[0] = hc_byte_perm (w4[3], w5[0], selector); - w6[3] = hc_byte_perm (w4[2], w4[3], selector); - w6[2] = hc_byte_perm (w4[1], w4[2], selector); - w6[1] = hc_byte_perm (w4[0], w4[1], selector); - w6[0] = hc_byte_perm (w3[3], w4[0], selector); - w5[3] = hc_byte_perm (w3[2], w3[3], selector); - w5[2] = hc_byte_perm (w3[1], w3[2], selector); - w5[1] = hc_byte_perm (w3[0], w3[1], selector); - w5[0] = hc_byte_perm (w2[3], w3[0], selector); - w4[3] = hc_byte_perm (w2[2], w2[3], selector); - w4[2] = hc_byte_perm (w2[1], w2[2], selector); - w4[1] = hc_byte_perm (w2[0], w2[1], selector); - w4[0] = hc_byte_perm (w1[3], w2[0], selector); - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w7[3] = hc_byte_perm (w5[1], w5[2], selector); - w7[2] = hc_byte_perm (w5[0], w5[1], selector); - w7[1] = hc_byte_perm (w4[3], w5[0], selector); - w7[0] = hc_byte_perm (w4[2], w4[3], selector); - w6[3] = hc_byte_perm (w4[1], w4[2], selector); - w6[2] = hc_byte_perm (w4[0], w4[1], selector); - w6[1] = hc_byte_perm (w3[3], w4[0], selector); - w6[0] = hc_byte_perm (w3[2], w3[3], selector); - w5[3] = hc_byte_perm (w3[1], w3[2], selector); - w5[2] = hc_byte_perm (w3[0], w3[1], selector); - w5[1] = hc_byte_perm (w2[3], w3[0], selector); - w5[0] = hc_byte_perm (w2[2], w2[3], selector); - w4[3] = hc_byte_perm (w2[1], w2[2], selector); - w4[2] = hc_byte_perm (w2[0], w2[1], selector); - w4[1] = hc_byte_perm (w1[3], w2[0], selector); - w4[0] = hc_byte_perm (w1[2], w1[3], selector); - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w7[3] = hc_byte_perm (w5[0], w5[1], selector); - w7[2] = hc_byte_perm (w4[3], w5[0], selector); - w7[1] = hc_byte_perm (w4[2], w4[3], selector); - w7[0] = hc_byte_perm (w4[1], w4[2], selector); - w6[3] = hc_byte_perm (w4[0], w4[1], selector); - w6[2] = hc_byte_perm (w3[3], w4[0], selector); - w6[1] = hc_byte_perm (w3[2], w3[3], selector); - w6[0] = hc_byte_perm (w3[1], w3[2], selector); - w5[3] = hc_byte_perm (w3[0], w3[1], selector); - w5[2] = hc_byte_perm (w2[3], w3[0], selector); - w5[1] = hc_byte_perm (w2[2], w2[3], selector); - w5[0] = hc_byte_perm (w2[1], w2[2], selector); - w4[3] = hc_byte_perm (w2[0], w2[1], selector); - w4[2] = hc_byte_perm (w1[3], w2[0], selector); - w4[1] = hc_byte_perm (w1[2], w1[3], selector); - w4[0] = hc_byte_perm (w1[1], w1[2], selector); - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w7[3] = hc_byte_perm (w4[3], w5[0], selector); - w7[2] = hc_byte_perm (w4[2], w4[3], selector); - w7[1] = hc_byte_perm (w4[1], w4[2], selector); - w7[0] = hc_byte_perm (w4[0], w4[1], selector); - w6[3] = hc_byte_perm (w3[3], w4[0], selector); - w6[2] = hc_byte_perm (w3[2], w3[3], selector); - w6[1] = hc_byte_perm (w3[1], w3[2], selector); - w6[0] = hc_byte_perm (w3[0], w3[1], selector); - w5[3] = hc_byte_perm (w2[3], w3[0], selector); - w5[2] = hc_byte_perm (w2[2], w2[3], selector); - w5[1] = hc_byte_perm (w2[1], w2[2], selector); - w5[0] = hc_byte_perm (w2[0], w2[1], selector); - w4[3] = hc_byte_perm (w1[3], w2[0], selector); - w4[2] = hc_byte_perm (w1[2], w1[3], selector); - w4[1] = hc_byte_perm (w1[1], w1[2], selector); - w4[0] = hc_byte_perm (w1[0], w1[1], selector); - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w7[3] = hc_byte_perm (w4[2], w4[3], selector); - w7[2] = hc_byte_perm (w4[1], w4[2], selector); - w7[1] = hc_byte_perm (w4[0], w4[1], selector); - w7[0] = hc_byte_perm (w3[3], w4[0], selector); - w6[3] = hc_byte_perm (w3[2], w3[3], selector); - w6[2] = hc_byte_perm (w3[1], w3[2], selector); - w6[1] = hc_byte_perm (w3[0], w3[1], selector); - w6[0] = hc_byte_perm (w2[3], w3[0], selector); - w5[3] = hc_byte_perm (w2[2], w2[3], selector); - w5[2] = hc_byte_perm (w2[1], w2[2], selector); - w5[1] = hc_byte_perm (w2[0], w2[1], selector); - w5[0] = hc_byte_perm (w1[3], w2[0], selector); - w4[3] = hc_byte_perm (w1[2], w1[3], selector); - w4[2] = hc_byte_perm (w1[1], w1[2], selector); - w4[1] = hc_byte_perm (w1[0], w1[1], selector); - w4[0] = hc_byte_perm (w0[3], w1[0], selector); - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w7[3] = hc_byte_perm (w4[1], w4[2], selector); - w7[2] = hc_byte_perm (w4[0], w4[1], selector); - w7[1] = hc_byte_perm (w3[3], w4[0], selector); - w7[0] = hc_byte_perm (w3[2], w3[3], selector); - w6[3] = hc_byte_perm (w3[1], w3[2], selector); - w6[2] = hc_byte_perm (w3[0], w3[1], selector); - w6[1] = hc_byte_perm (w2[3], w3[0], selector); - w6[0] = hc_byte_perm (w2[2], w2[3], selector); - w5[3] = hc_byte_perm (w2[1], w2[2], selector); - w5[2] = hc_byte_perm (w2[0], w2[1], selector); - w5[1] = hc_byte_perm (w1[3], w2[0], selector); - w5[0] = hc_byte_perm (w1[2], w1[3], selector); - w4[3] = hc_byte_perm (w1[1], w1[2], selector); - w4[2] = hc_byte_perm (w1[0], w1[1], selector); - w4[1] = hc_byte_perm (w0[3], w1[0], selector); - w4[0] = hc_byte_perm (w0[2], w0[3], selector); - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 14: - w7[3] = hc_byte_perm (w4[0], w4[1], selector); - w7[2] = hc_byte_perm (w3[3], w4[0], selector); - w7[1] = hc_byte_perm (w3[2], w3[3], selector); - w7[0] = hc_byte_perm (w3[1], w3[2], selector); - w6[3] = hc_byte_perm (w3[0], w3[1], selector); - w6[2] = hc_byte_perm (w2[3], w3[0], selector); - w6[1] = hc_byte_perm (w2[2], w2[3], selector); - w6[0] = hc_byte_perm (w2[1], w2[2], selector); - w5[3] = hc_byte_perm (w2[0], w2[1], selector); - w5[2] = hc_byte_perm (w1[3], w2[0], selector); - w5[1] = hc_byte_perm (w1[2], w1[3], selector); - w5[0] = hc_byte_perm (w1[1], w1[2], selector); - w4[3] = hc_byte_perm (w1[0], w1[1], selector); - w4[2] = hc_byte_perm (w0[3], w1[0], selector); - w4[1] = hc_byte_perm (w0[2], w0[3], selector); - w4[0] = hc_byte_perm (w0[1], w0[2], selector); - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 15: - w7[3] = hc_byte_perm (w3[3], w4[0], selector); - w7[2] = hc_byte_perm (w3[2], w3[3], selector); - w7[1] = hc_byte_perm (w3[1], w3[2], selector); - w7[0] = hc_byte_perm (w3[0], w3[1], selector); - w6[3] = hc_byte_perm (w2[3], w3[0], selector); - w6[2] = hc_byte_perm (w2[2], w2[3], selector); - w6[1] = hc_byte_perm (w2[1], w2[2], selector); - w6[0] = hc_byte_perm (w2[0], w2[1], selector); - w5[3] = hc_byte_perm (w1[3], w2[0], selector); - w5[2] = hc_byte_perm (w1[2], w1[3], selector); - w5[1] = hc_byte_perm (w1[1], w1[2], selector); - w5[0] = hc_byte_perm (w1[0], w1[1], selector); - w4[3] = hc_byte_perm (w0[3], w1[0], selector); - w4[2] = hc_byte_perm (w0[2], w0[3], selector); - w4[1] = hc_byte_perm (w0[1], w0[2], selector); - w4[0] = hc_byte_perm (w0[0], w0[1], selector); - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -10026,1712 +7950,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (PRIVATE_AS u32x *w0, PRIVATE break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm (w7[3], 0, selector); - w7[3] = hc_byte_perm (w7[2], w7[3], selector); - w7[2] = hc_byte_perm (w7[1], w7[2], selector); - w7[1] = hc_byte_perm (w7[0], w7[1], selector); - w7[0] = hc_byte_perm (w6[3], w7[0], selector); - w6[3] = hc_byte_perm (w6[2], w6[3], selector); - w6[2] = hc_byte_perm (w6[1], w6[2], selector); - w6[1] = hc_byte_perm (w6[0], w6[1], selector); - w6[0] = hc_byte_perm (w5[3], w6[0], selector); - w5[3] = hc_byte_perm (w5[2], w5[3], selector); - w5[2] = hc_byte_perm (w5[1], w5[2], selector); - w5[1] = hc_byte_perm (w5[0], w5[1], selector); - w5[0] = hc_byte_perm (w4[3], w5[0], selector); - w4[3] = hc_byte_perm (w4[2], w4[3], selector); - w4[2] = hc_byte_perm (w4[1], w4[2], selector); - w4[1] = hc_byte_perm (w4[0], w4[1], selector); - w4[0] = hc_byte_perm (w3[3], w4[0], selector); - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - - break; - - case 1: - c0[1] = hc_byte_perm (w7[3], 0, selector); - c0[0] = hc_byte_perm (w7[2], w7[3], selector); - w7[3] = hc_byte_perm (w7[1], w7[2], selector); - w7[2] = hc_byte_perm (w7[0], w7[1], selector); - w7[1] = hc_byte_perm (w6[3], w7[0], selector); - w7[0] = hc_byte_perm (w6[2], w6[3], selector); - w6[3] = hc_byte_perm (w6[1], w6[2], selector); - w6[2] = hc_byte_perm (w6[0], w6[1], selector); - w6[1] = hc_byte_perm (w5[3], w6[0], selector); - w6[0] = hc_byte_perm (w5[2], w5[3], selector); - w5[3] = hc_byte_perm (w5[1], w5[2], selector); - w5[2] = hc_byte_perm (w5[0], w5[1], selector); - w5[1] = hc_byte_perm (w4[3], w5[0], selector); - w5[0] = hc_byte_perm (w4[2], w4[3], selector); - w4[3] = hc_byte_perm (w4[1], w4[2], selector); - w4[2] = hc_byte_perm (w4[0], w4[1], selector); - w4[1] = hc_byte_perm (w3[3], w4[0], selector); - w4[0] = hc_byte_perm (w3[2], w3[3], selector); - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm (w7[3], 0, selector); - c0[1] = hc_byte_perm (w7[2], w7[3], selector); - c0[0] = hc_byte_perm (w7[1], w7[2], selector); - w7[3] = hc_byte_perm (w7[0], w7[1], selector); - w7[2] = hc_byte_perm (w6[3], w7[0], selector); - w7[1] = hc_byte_perm (w6[2], w6[3], selector); - w7[0] = hc_byte_perm (w6[1], w6[2], selector); - w6[3] = hc_byte_perm (w6[0], w6[1], selector); - w6[2] = hc_byte_perm (w5[3], w6[0], selector); - w6[1] = hc_byte_perm (w5[2], w5[3], selector); - w6[0] = hc_byte_perm (w5[1], w5[2], selector); - w5[3] = hc_byte_perm (w5[0], w5[1], selector); - w5[2] = hc_byte_perm (w4[3], w5[0], selector); - w5[1] = hc_byte_perm (w4[2], w4[3], selector); - w5[0] = hc_byte_perm (w4[1], w4[2], selector); - w4[3] = hc_byte_perm (w4[0], w4[1], selector); - w4[2] = hc_byte_perm (w3[3], w4[0], selector); - w4[1] = hc_byte_perm (w3[2], w3[3], selector); - w4[0] = hc_byte_perm (w3[1], w3[2], selector); - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm (w7[3], 0, selector); - c0[2] = hc_byte_perm (w7[2], w7[3], selector); - c0[1] = hc_byte_perm (w7[1], w7[2], selector); - c0[0] = hc_byte_perm (w7[0], w7[1], selector); - w7[3] = hc_byte_perm (w6[3], w7[0], selector); - w7[2] = hc_byte_perm (w6[2], w6[3], selector); - w7[1] = hc_byte_perm (w6[1], w6[2], selector); - w7[0] = hc_byte_perm (w6[0], w6[1], selector); - w6[3] = hc_byte_perm (w5[3], w6[0], selector); - w6[2] = hc_byte_perm (w5[2], w5[3], selector); - w6[1] = hc_byte_perm (w5[1], w5[2], selector); - w6[0] = hc_byte_perm (w5[0], w5[1], selector); - w5[3] = hc_byte_perm (w4[3], w5[0], selector); - w5[2] = hc_byte_perm (w4[2], w4[3], selector); - w5[1] = hc_byte_perm (w4[1], w4[2], selector); - w5[0] = hc_byte_perm (w4[0], w4[1], selector); - w4[3] = hc_byte_perm (w3[3], w4[0], selector); - w4[2] = hc_byte_perm (w3[2], w3[3], selector); - w4[1] = hc_byte_perm (w3[1], w3[2], selector); - w4[0] = hc_byte_perm (w3[0], w3[1], selector); - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm (w7[3], 0, selector); - c0[3] = hc_byte_perm (w7[2], w7[3], selector); - c0[2] = hc_byte_perm (w7[1], w7[2], selector); - c0[1] = hc_byte_perm (w7[0], w7[1], selector); - c0[0] = hc_byte_perm (w6[3], w7[0], selector); - w7[3] = hc_byte_perm (w6[2], w6[3], selector); - w7[2] = hc_byte_perm (w6[1], w6[2], selector); - w7[1] = hc_byte_perm (w6[0], w6[1], selector); - w7[0] = hc_byte_perm (w5[3], w6[0], selector); - w6[3] = hc_byte_perm (w5[2], w5[3], selector); - w6[2] = hc_byte_perm (w5[1], w5[2], selector); - w6[1] = hc_byte_perm (w5[0], w5[1], selector); - w6[0] = hc_byte_perm (w4[3], w5[0], selector); - w5[3] = hc_byte_perm (w4[2], w4[3], selector); - w5[2] = hc_byte_perm (w4[1], w4[2], selector); - w5[1] = hc_byte_perm (w4[0], w4[1], selector); - w5[0] = hc_byte_perm (w3[3], w4[0], selector); - w4[3] = hc_byte_perm (w3[2], w3[3], selector); - w4[2] = hc_byte_perm (w3[1], w3[2], selector); - w4[1] = hc_byte_perm (w3[0], w3[1], selector); - w4[0] = hc_byte_perm (w2[3], w3[0], selector); - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm (w7[3], 0, selector); - c1[0] = hc_byte_perm (w7[2], w7[3], selector); - c0[3] = hc_byte_perm (w7[1], w7[2], selector); - c0[2] = hc_byte_perm (w7[0], w7[1], selector); - c0[1] = hc_byte_perm (w6[3], w7[0], selector); - c0[0] = hc_byte_perm (w6[2], w6[3], selector); - w7[3] = hc_byte_perm (w6[1], w6[2], selector); - w7[2] = hc_byte_perm (w6[0], w6[1], selector); - w7[1] = hc_byte_perm (w5[3], w6[0], selector); - w7[0] = hc_byte_perm (w5[2], w5[3], selector); - w6[3] = hc_byte_perm (w5[1], w5[2], selector); - w6[2] = hc_byte_perm (w5[0], w5[1], selector); - w6[1] = hc_byte_perm (w4[3], w5[0], selector); - w6[0] = hc_byte_perm (w4[2], w4[3], selector); - w5[3] = hc_byte_perm (w4[1], w4[2], selector); - w5[2] = hc_byte_perm (w4[0], w4[1], selector); - w5[1] = hc_byte_perm (w3[3], w4[0], selector); - w5[0] = hc_byte_perm (w3[2], w3[3], selector); - w4[3] = hc_byte_perm (w3[1], w3[2], selector); - w4[2] = hc_byte_perm (w3[0], w3[1], selector); - w4[1] = hc_byte_perm (w2[3], w3[0], selector); - w4[0] = hc_byte_perm (w2[2], w2[3], selector); - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm (w7[3], 0, selector); - c1[1] = hc_byte_perm (w7[2], w7[3], selector); - c1[0] = hc_byte_perm (w7[1], w7[2], selector); - c0[3] = hc_byte_perm (w7[0], w7[1], selector); - c0[2] = hc_byte_perm (w6[3], w7[0], selector); - c0[1] = hc_byte_perm (w6[2], w6[3], selector); - c0[0] = hc_byte_perm (w6[1], w6[2], selector); - w7[3] = hc_byte_perm (w6[0], w6[1], selector); - w7[2] = hc_byte_perm (w5[3], w6[0], selector); - w7[1] = hc_byte_perm (w5[2], w5[3], selector); - w7[0] = hc_byte_perm (w5[1], w5[2], selector); - w6[3] = hc_byte_perm (w5[0], w5[1], selector); - w6[2] = hc_byte_perm (w4[3], w5[0], selector); - w6[1] = hc_byte_perm (w4[2], w4[3], selector); - w6[0] = hc_byte_perm (w4[1], w4[2], selector); - w5[3] = hc_byte_perm (w4[0], w4[1], selector); - w5[2] = hc_byte_perm (w3[3], w4[0], selector); - w5[1] = hc_byte_perm (w3[2], w3[3], selector); - w5[0] = hc_byte_perm (w3[1], w3[2], selector); - w4[3] = hc_byte_perm (w3[0], w3[1], selector); - w4[2] = hc_byte_perm (w2[3], w3[0], selector); - w4[1] = hc_byte_perm (w2[2], w2[3], selector); - w4[0] = hc_byte_perm (w2[1], w2[2], selector); - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm (w7[3], 0, selector); - c1[2] = hc_byte_perm (w7[2], w7[3], selector); - c1[1] = hc_byte_perm (w7[1], w7[2], selector); - c1[0] = hc_byte_perm (w7[0], w7[1], selector); - c0[3] = hc_byte_perm (w6[3], w7[0], selector); - c0[2] = hc_byte_perm (w6[2], w6[3], selector); - c0[1] = hc_byte_perm (w6[1], w6[2], selector); - c0[0] = hc_byte_perm (w6[0], w6[1], selector); - w7[3] = hc_byte_perm (w5[3], w6[0], selector); - w7[2] = hc_byte_perm (w5[2], w5[3], selector); - w7[1] = hc_byte_perm (w5[1], w5[2], selector); - w7[0] = hc_byte_perm (w5[0], w5[1], selector); - w6[3] = hc_byte_perm (w4[3], w5[0], selector); - w6[2] = hc_byte_perm (w4[2], w4[3], selector); - w6[1] = hc_byte_perm (w4[1], w4[2], selector); - w6[0] = hc_byte_perm (w4[0], w4[1], selector); - w5[3] = hc_byte_perm (w3[3], w4[0], selector); - w5[2] = hc_byte_perm (w3[2], w3[3], selector); - w5[1] = hc_byte_perm (w3[1], w3[2], selector); - w5[0] = hc_byte_perm (w3[0], w3[1], selector); - w4[3] = hc_byte_perm (w2[3], w3[0], selector); - w4[2] = hc_byte_perm (w2[2], w2[3], selector); - w4[1] = hc_byte_perm (w2[1], w2[2], selector); - w4[0] = hc_byte_perm (w2[0], w2[1], selector); - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm (w7[3], 0, selector); - c1[3] = hc_byte_perm (w7[2], w7[3], selector); - c1[2] = hc_byte_perm (w7[1], w7[2], selector); - c1[1] = hc_byte_perm (w7[0], w7[1], selector); - c1[0] = hc_byte_perm (w6[3], w7[0], selector); - c0[3] = hc_byte_perm (w6[2], w6[3], selector); - c0[2] = hc_byte_perm (w6[1], w6[2], selector); - c0[1] = hc_byte_perm (w6[0], w6[1], selector); - c0[0] = hc_byte_perm (w5[3], w6[0], selector); - w7[3] = hc_byte_perm (w5[2], w5[3], selector); - w7[2] = hc_byte_perm (w5[1], w5[2], selector); - w7[1] = hc_byte_perm (w5[0], w5[1], selector); - w7[0] = hc_byte_perm (w4[3], w5[0], selector); - w6[3] = hc_byte_perm (w4[2], w4[3], selector); - w6[2] = hc_byte_perm (w4[1], w4[2], selector); - w6[1] = hc_byte_perm (w4[0], w4[1], selector); - w6[0] = hc_byte_perm (w3[3], w4[0], selector); - w5[3] = hc_byte_perm (w3[2], w3[3], selector); - w5[2] = hc_byte_perm (w3[1], w3[2], selector); - w5[1] = hc_byte_perm (w3[0], w3[1], selector); - w5[0] = hc_byte_perm (w2[3], w3[0], selector); - w4[3] = hc_byte_perm (w2[2], w2[3], selector); - w4[2] = hc_byte_perm (w2[1], w2[2], selector); - w4[1] = hc_byte_perm (w2[0], w2[1], selector); - w4[0] = hc_byte_perm (w1[3], w2[0], selector); - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm (w7[3], 0, selector); - c2[0] = hc_byte_perm (w7[2], w7[3], selector); - c1[3] = hc_byte_perm (w7[1], w7[2], selector); - c1[2] = hc_byte_perm (w7[0], w7[1], selector); - c1[1] = hc_byte_perm (w6[3], w7[0], selector); - c1[0] = hc_byte_perm (w6[2], w6[3], selector); - c0[3] = hc_byte_perm (w6[1], w6[2], selector); - c0[2] = hc_byte_perm (w6[0], w6[1], selector); - c0[1] = hc_byte_perm (w5[3], w6[0], selector); - c0[0] = hc_byte_perm (w5[2], w5[3], selector); - w7[3] = hc_byte_perm (w5[1], w5[2], selector); - w7[2] = hc_byte_perm (w5[0], w5[1], selector); - w7[1] = hc_byte_perm (w4[3], w5[0], selector); - w7[0] = hc_byte_perm (w4[2], w4[3], selector); - w6[3] = hc_byte_perm (w4[1], w4[2], selector); - w6[2] = hc_byte_perm (w4[0], w4[1], selector); - w6[1] = hc_byte_perm (w3[3], w4[0], selector); - w6[0] = hc_byte_perm (w3[2], w3[3], selector); - w5[3] = hc_byte_perm (w3[1], w3[2], selector); - w5[2] = hc_byte_perm (w3[0], w3[1], selector); - w5[1] = hc_byte_perm (w2[3], w3[0], selector); - w5[0] = hc_byte_perm (w2[2], w2[3], selector); - w4[3] = hc_byte_perm (w2[1], w2[2], selector); - w4[2] = hc_byte_perm (w2[0], w2[1], selector); - w4[1] = hc_byte_perm (w1[3], w2[0], selector); - w4[0] = hc_byte_perm (w1[2], w1[3], selector); - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm (w7[3], 0, selector); - c2[1] = hc_byte_perm (w7[2], w7[3], selector); - c2[0] = hc_byte_perm (w7[1], w7[2], selector); - c1[3] = hc_byte_perm (w7[0], w7[1], selector); - c1[2] = hc_byte_perm (w6[3], w7[0], selector); - c1[1] = hc_byte_perm (w6[2], w6[3], selector); - c1[0] = hc_byte_perm (w6[1], w6[2], selector); - c0[3] = hc_byte_perm (w6[0], w6[1], selector); - c0[2] = hc_byte_perm (w5[3], w6[0], selector); - c0[1] = hc_byte_perm (w5[2], w5[3], selector); - c0[0] = hc_byte_perm (w5[1], w5[2], selector); - w7[3] = hc_byte_perm (w5[0], w5[1], selector); - w7[2] = hc_byte_perm (w4[3], w5[0], selector); - w7[1] = hc_byte_perm (w4[2], w4[3], selector); - w7[0] = hc_byte_perm (w4[1], w4[2], selector); - w6[3] = hc_byte_perm (w4[0], w4[1], selector); - w6[2] = hc_byte_perm (w3[3], w4[0], selector); - w6[1] = hc_byte_perm (w3[2], w3[3], selector); - w6[0] = hc_byte_perm (w3[1], w3[2], selector); - w5[3] = hc_byte_perm (w3[0], w3[1], selector); - w5[2] = hc_byte_perm (w2[3], w3[0], selector); - w5[1] = hc_byte_perm (w2[2], w2[3], selector); - w5[0] = hc_byte_perm (w2[1], w2[2], selector); - w4[3] = hc_byte_perm (w2[0], w2[1], selector); - w4[2] = hc_byte_perm (w1[3], w2[0], selector); - w4[1] = hc_byte_perm (w1[2], w1[3], selector); - w4[0] = hc_byte_perm (w1[1], w1[2], selector); - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm (w7[3], 0, selector); - c2[2] = hc_byte_perm (w7[2], w7[3], selector); - c2[1] = hc_byte_perm (w7[1], w7[2], selector); - c2[0] = hc_byte_perm (w7[0], w7[1], selector); - c1[3] = hc_byte_perm (w6[3], w7[0], selector); - c1[2] = hc_byte_perm (w6[2], w6[3], selector); - c1[1] = hc_byte_perm (w6[1], w6[2], selector); - c1[0] = hc_byte_perm (w6[0], w6[1], selector); - c0[3] = hc_byte_perm (w5[3], w6[0], selector); - c0[2] = hc_byte_perm (w5[2], w5[3], selector); - c0[1] = hc_byte_perm (w5[1], w5[2], selector); - c0[0] = hc_byte_perm (w5[0], w5[1], selector); - w7[3] = hc_byte_perm (w4[3], w5[0], selector); - w7[2] = hc_byte_perm (w4[2], w4[3], selector); - w7[1] = hc_byte_perm (w4[1], w4[2], selector); - w7[0] = hc_byte_perm (w4[0], w4[1], selector); - w6[3] = hc_byte_perm (w3[3], w4[0], selector); - w6[2] = hc_byte_perm (w3[2], w3[3], selector); - w6[1] = hc_byte_perm (w3[1], w3[2], selector); - w6[0] = hc_byte_perm (w3[0], w3[1], selector); - w5[3] = hc_byte_perm (w2[3], w3[0], selector); - w5[2] = hc_byte_perm (w2[2], w2[3], selector); - w5[1] = hc_byte_perm (w2[1], w2[2], selector); - w5[0] = hc_byte_perm (w2[0], w2[1], selector); - w4[3] = hc_byte_perm (w1[3], w2[0], selector); - w4[2] = hc_byte_perm (w1[2], w1[3], selector); - w4[1] = hc_byte_perm (w1[1], w1[2], selector); - w4[0] = hc_byte_perm (w1[0], w1[1], selector); - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm (w7[3], 0, selector); - c2[3] = hc_byte_perm (w7[2], w7[3], selector); - c2[2] = hc_byte_perm (w7[1], w7[2], selector); - c2[1] = hc_byte_perm (w7[0], w7[1], selector); - c2[0] = hc_byte_perm (w6[3], w7[0], selector); - c1[3] = hc_byte_perm (w6[2], w6[3], selector); - c1[2] = hc_byte_perm (w6[1], w6[2], selector); - c1[1] = hc_byte_perm (w6[0], w6[1], selector); - c1[0] = hc_byte_perm (w5[3], w6[0], selector); - c0[3] = hc_byte_perm (w5[2], w5[3], selector); - c0[2] = hc_byte_perm (w5[1], w5[2], selector); - c0[1] = hc_byte_perm (w5[0], w5[1], selector); - c0[0] = hc_byte_perm (w4[3], w5[0], selector); - w7[3] = hc_byte_perm (w4[2], w4[3], selector); - w7[2] = hc_byte_perm (w4[1], w4[2], selector); - w7[1] = hc_byte_perm (w4[0], w4[1], selector); - w7[0] = hc_byte_perm (w3[3], w4[0], selector); - w6[3] = hc_byte_perm (w3[2], w3[3], selector); - w6[2] = hc_byte_perm (w3[1], w3[2], selector); - w6[1] = hc_byte_perm (w3[0], w3[1], selector); - w6[0] = hc_byte_perm (w2[3], w3[0], selector); - w5[3] = hc_byte_perm (w2[2], w2[3], selector); - w5[2] = hc_byte_perm (w2[1], w2[2], selector); - w5[1] = hc_byte_perm (w2[0], w2[1], selector); - w5[0] = hc_byte_perm (w1[3], w2[0], selector); - w4[3] = hc_byte_perm (w1[2], w1[3], selector); - w4[2] = hc_byte_perm (w1[1], w1[2], selector); - w4[1] = hc_byte_perm (w1[0], w1[1], selector); - w4[0] = hc_byte_perm (w0[3], w1[0], selector); - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm (w7[3], 0, selector); - c3[0] = hc_byte_perm (w7[2], w7[3], selector); - c2[3] = hc_byte_perm (w7[1], w7[2], selector); - c2[2] = hc_byte_perm (w7[0], w7[1], selector); - c2[1] = hc_byte_perm (w6[3], w7[0], selector); - c2[0] = hc_byte_perm (w6[2], w6[3], selector); - c1[3] = hc_byte_perm (w6[1], w6[2], selector); - c1[2] = hc_byte_perm (w6[0], w6[1], selector); - c1[1] = hc_byte_perm (w5[3], w6[0], selector); - c1[0] = hc_byte_perm (w5[2], w5[3], selector); - c0[3] = hc_byte_perm (w5[1], w5[2], selector); - c0[2] = hc_byte_perm (w5[0], w5[1], selector); - c0[1] = hc_byte_perm (w4[3], w5[0], selector); - c0[0] = hc_byte_perm (w4[2], w4[3], selector); - w7[3] = hc_byte_perm (w4[1], w4[2], selector); - w7[2] = hc_byte_perm (w4[0], w4[1], selector); - w7[1] = hc_byte_perm (w3[3], w4[0], selector); - w7[0] = hc_byte_perm (w3[2], w3[3], selector); - w6[3] = hc_byte_perm (w3[1], w3[2], selector); - w6[2] = hc_byte_perm (w3[0], w3[1], selector); - w6[1] = hc_byte_perm (w2[3], w3[0], selector); - w6[0] = hc_byte_perm (w2[2], w2[3], selector); - w5[3] = hc_byte_perm (w2[1], w2[2], selector); - w5[2] = hc_byte_perm (w2[0], w2[1], selector); - w5[1] = hc_byte_perm (w1[3], w2[0], selector); - w5[0] = hc_byte_perm (w1[2], w1[3], selector); - w4[3] = hc_byte_perm (w1[1], w1[2], selector); - w4[2] = hc_byte_perm (w1[0], w1[1], selector); - w4[1] = hc_byte_perm (w0[3], w1[0], selector); - w4[0] = hc_byte_perm (w0[2], w0[3], selector); - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm (w7[3], 0, selector); - c3[1] = hc_byte_perm (w7[2], w7[3], selector); - c3[0] = hc_byte_perm (w7[1], w7[2], selector); - c2[3] = hc_byte_perm (w7[0], w7[1], selector); - c2[2] = hc_byte_perm (w6[3], w7[0], selector); - c2[1] = hc_byte_perm (w6[2], w6[3], selector); - c2[0] = hc_byte_perm (w6[1], w6[2], selector); - c1[3] = hc_byte_perm (w6[0], w6[1], selector); - c1[2] = hc_byte_perm (w5[3], w6[0], selector); - c1[1] = hc_byte_perm (w5[2], w5[3], selector); - c1[0] = hc_byte_perm (w5[1], w5[2], selector); - c0[3] = hc_byte_perm (w5[0], w5[1], selector); - c0[2] = hc_byte_perm (w4[3], w5[0], selector); - c0[1] = hc_byte_perm (w4[2], w4[3], selector); - c0[0] = hc_byte_perm (w4[1], w4[2], selector); - w7[3] = hc_byte_perm (w4[0], w4[1], selector); - w7[2] = hc_byte_perm (w3[3], w4[0], selector); - w7[1] = hc_byte_perm (w3[2], w3[3], selector); - w7[0] = hc_byte_perm (w3[1], w3[2], selector); - w6[3] = hc_byte_perm (w3[0], w3[1], selector); - w6[2] = hc_byte_perm (w2[3], w3[0], selector); - w6[1] = hc_byte_perm (w2[2], w2[3], selector); - w6[0] = hc_byte_perm (w2[1], w2[2], selector); - w5[3] = hc_byte_perm (w2[0], w2[1], selector); - w5[2] = hc_byte_perm (w1[3], w2[0], selector); - w5[1] = hc_byte_perm (w1[2], w1[3], selector); - w5[0] = hc_byte_perm (w1[1], w1[2], selector); - w4[3] = hc_byte_perm (w1[0], w1[1], selector); - w4[2] = hc_byte_perm (w0[3], w1[0], selector); - w4[1] = hc_byte_perm (w0[2], w0[3], selector); - w4[0] = hc_byte_perm (w0[1], w0[2], selector); - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm (w7[3], 0, selector); - c3[2] = hc_byte_perm (w7[2], w7[3], selector); - c3[1] = hc_byte_perm (w7[1], w7[2], selector); - c3[0] = hc_byte_perm (w7[0], w7[1], selector); - c2[3] = hc_byte_perm (w6[3], w7[0], selector); - c2[2] = hc_byte_perm (w6[2], w6[3], selector); - c2[1] = hc_byte_perm (w6[1], w6[2], selector); - c2[0] = hc_byte_perm (w6[0], w6[1], selector); - c1[3] = hc_byte_perm (w5[3], w6[0], selector); - c1[2] = hc_byte_perm (w5[2], w5[3], selector); - c1[1] = hc_byte_perm (w5[1], w5[2], selector); - c1[0] = hc_byte_perm (w5[0], w5[1], selector); - c0[3] = hc_byte_perm (w4[3], w5[0], selector); - c0[2] = hc_byte_perm (w4[2], w4[3], selector); - c0[1] = hc_byte_perm (w4[1], w4[2], selector); - c0[0] = hc_byte_perm (w4[0], w4[1], selector); - w7[3] = hc_byte_perm (w3[3], w4[0], selector); - w7[2] = hc_byte_perm (w3[2], w3[3], selector); - w7[1] = hc_byte_perm (w3[1], w3[2], selector); - w7[0] = hc_byte_perm (w3[0], w3[1], selector); - w6[3] = hc_byte_perm (w2[3], w3[0], selector); - w6[2] = hc_byte_perm (w2[2], w2[3], selector); - w6[1] = hc_byte_perm (w2[1], w2[2], selector); - w6[0] = hc_byte_perm (w2[0], w2[1], selector); - w5[3] = hc_byte_perm (w1[3], w2[0], selector); - w5[2] = hc_byte_perm (w1[2], w1[3], selector); - w5[1] = hc_byte_perm (w1[1], w1[2], selector); - w5[0] = hc_byte_perm (w1[0], w1[1], selector); - w4[3] = hc_byte_perm (w0[3], w1[0], selector); - w4[2] = hc_byte_perm (w0[2], w0[3], selector); - w4[1] = hc_byte_perm (w0[1], w0[2], selector); - w4[0] = hc_byte_perm (w0[0], w0[1], selector); - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm (w7[3], 0, selector); - c3[3] = hc_byte_perm (w7[2], w7[3], selector); - c3[2] = hc_byte_perm (w7[1], w7[2], selector); - c3[1] = hc_byte_perm (w7[0], w7[1], selector); - c3[0] = hc_byte_perm (w6[3], w7[0], selector); - c2[3] = hc_byte_perm (w6[2], w6[3], selector); - c2[2] = hc_byte_perm (w6[1], w6[2], selector); - c2[1] = hc_byte_perm (w6[0], w6[1], selector); - c2[0] = hc_byte_perm (w5[3], w6[0], selector); - c1[3] = hc_byte_perm (w5[2], w5[3], selector); - c1[2] = hc_byte_perm (w5[1], w5[2], selector); - c1[1] = hc_byte_perm (w5[0], w5[1], selector); - c1[0] = hc_byte_perm (w4[3], w5[0], selector); - c0[3] = hc_byte_perm (w4[2], w4[3], selector); - c0[2] = hc_byte_perm (w4[1], w4[2], selector); - c0[1] = hc_byte_perm (w4[0], w4[1], selector); - c0[0] = hc_byte_perm (w3[3], w4[0], selector); - w7[3] = hc_byte_perm (w3[2], w3[3], selector); - w7[2] = hc_byte_perm (w3[1], w3[2], selector); - w7[1] = hc_byte_perm (w3[0], w3[1], selector); - w7[0] = hc_byte_perm (w2[3], w3[0], selector); - w6[3] = hc_byte_perm (w2[2], w2[3], selector); - w6[2] = hc_byte_perm (w2[1], w2[2], selector); - w6[1] = hc_byte_perm (w2[0], w2[1], selector); - w6[0] = hc_byte_perm (w1[3], w2[0], selector); - w5[3] = hc_byte_perm (w1[2], w1[3], selector); - w5[2] = hc_byte_perm (w1[1], w1[2], selector); - w5[1] = hc_byte_perm (w1[0], w1[1], selector); - w5[0] = hc_byte_perm (w0[3], w1[0], selector); - w4[3] = hc_byte_perm (w0[2], w0[3], selector); - w4[2] = hc_byte_perm (w0[1], w0[2], selector); - w4[1] = hc_byte_perm (w0[0], w0[1], selector); - w4[0] = hc_byte_perm ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm (w7[3], 0, selector); - c4[0] = hc_byte_perm (w7[2], w7[3], selector); - c3[3] = hc_byte_perm (w7[1], w7[2], selector); - c3[2] = hc_byte_perm (w7[0], w7[1], selector); - c3[1] = hc_byte_perm (w6[3], w7[0], selector); - c3[0] = hc_byte_perm (w6[2], w6[3], selector); - c2[3] = hc_byte_perm (w6[1], w6[2], selector); - c2[2] = hc_byte_perm (w6[0], w6[1], selector); - c2[1] = hc_byte_perm (w5[3], w6[0], selector); - c2[0] = hc_byte_perm (w5[2], w5[3], selector); - c1[3] = hc_byte_perm (w5[1], w5[2], selector); - c1[2] = hc_byte_perm (w5[0], w5[1], selector); - c1[1] = hc_byte_perm (w4[3], w5[0], selector); - c1[0] = hc_byte_perm (w4[2], w4[3], selector); - c0[3] = hc_byte_perm (w4[1], w4[2], selector); - c0[2] = hc_byte_perm (w4[0], w4[1], selector); - c0[1] = hc_byte_perm (w3[3], w4[0], selector); - c0[0] = hc_byte_perm (w3[2], w3[3], selector); - w7[3] = hc_byte_perm (w3[1], w3[2], selector); - w7[2] = hc_byte_perm (w3[0], w3[1], selector); - w7[1] = hc_byte_perm (w2[3], w3[0], selector); - w7[0] = hc_byte_perm (w2[2], w2[3], selector); - w6[3] = hc_byte_perm (w2[1], w2[2], selector); - w6[2] = hc_byte_perm (w2[0], w2[1], selector); - w6[1] = hc_byte_perm (w1[3], w2[0], selector); - w6[0] = hc_byte_perm (w1[2], w1[3], selector); - w5[3] = hc_byte_perm (w1[1], w1[2], selector); - w5[2] = hc_byte_perm (w1[0], w1[1], selector); - w5[1] = hc_byte_perm (w0[3], w1[0], selector); - w5[0] = hc_byte_perm (w0[2], w0[3], selector); - w4[3] = hc_byte_perm (w0[1], w0[2], selector); - w4[2] = hc_byte_perm (w0[0], w0[1], selector); - w4[1] = hc_byte_perm ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm (w7[3], 0, selector); - c4[1] = hc_byte_perm (w7[2], w7[3], selector); - c4[0] = hc_byte_perm (w7[1], w7[2], selector); - c3[3] = hc_byte_perm (w7[0], w7[1], selector); - c3[2] = hc_byte_perm (w6[3], w7[0], selector); - c3[1] = hc_byte_perm (w6[2], w6[3], selector); - c3[0] = hc_byte_perm (w6[1], w6[2], selector); - c2[3] = hc_byte_perm (w6[0], w6[1], selector); - c2[2] = hc_byte_perm (w5[3], w6[0], selector); - c2[1] = hc_byte_perm (w5[2], w5[3], selector); - c2[0] = hc_byte_perm (w5[1], w5[2], selector); - c1[3] = hc_byte_perm (w5[0], w5[1], selector); - c1[2] = hc_byte_perm (w4[3], w5[0], selector); - c1[1] = hc_byte_perm (w4[2], w4[3], selector); - c1[0] = hc_byte_perm (w4[1], w4[2], selector); - c0[3] = hc_byte_perm (w4[0], w4[1], selector); - c0[2] = hc_byte_perm (w3[3], w4[0], selector); - c0[1] = hc_byte_perm (w3[2], w3[3], selector); - c0[0] = hc_byte_perm (w3[1], w3[2], selector); - w7[3] = hc_byte_perm (w3[0], w3[1], selector); - w7[2] = hc_byte_perm (w2[3], w3[0], selector); - w7[1] = hc_byte_perm (w2[2], w2[3], selector); - w7[0] = hc_byte_perm (w2[1], w2[2], selector); - w6[3] = hc_byte_perm (w2[0], w2[1], selector); - w6[2] = hc_byte_perm (w1[3], w2[0], selector); - w6[1] = hc_byte_perm (w1[2], w1[3], selector); - w6[0] = hc_byte_perm (w1[1], w1[2], selector); - w5[3] = hc_byte_perm (w1[0], w1[1], selector); - w5[2] = hc_byte_perm (w0[3], w1[0], selector); - w5[1] = hc_byte_perm (w0[2], w0[3], selector); - w5[0] = hc_byte_perm (w0[1], w0[2], selector); - w4[3] = hc_byte_perm (w0[0], w0[1], selector); - w4[2] = hc_byte_perm ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm (w7[3], 0, selector); - c4[2] = hc_byte_perm (w7[2], w7[3], selector); - c4[1] = hc_byte_perm (w7[1], w7[2], selector); - c4[0] = hc_byte_perm (w7[0], w7[1], selector); - c3[3] = hc_byte_perm (w6[3], w7[0], selector); - c3[2] = hc_byte_perm (w6[2], w6[3], selector); - c3[1] = hc_byte_perm (w6[1], w6[2], selector); - c3[0] = hc_byte_perm (w6[0], w6[1], selector); - c2[3] = hc_byte_perm (w5[3], w6[0], selector); - c2[2] = hc_byte_perm (w5[2], w5[3], selector); - c2[1] = hc_byte_perm (w5[1], w5[2], selector); - c2[0] = hc_byte_perm (w5[0], w5[1], selector); - c1[3] = hc_byte_perm (w4[3], w5[0], selector); - c1[2] = hc_byte_perm (w4[2], w4[3], selector); - c1[1] = hc_byte_perm (w4[1], w4[2], selector); - c1[0] = hc_byte_perm (w4[0], w4[1], selector); - c0[3] = hc_byte_perm (w3[3], w4[0], selector); - c0[2] = hc_byte_perm (w3[2], w3[3], selector); - c0[1] = hc_byte_perm (w3[1], w3[2], selector); - c0[0] = hc_byte_perm (w3[0], w3[1], selector); - w7[3] = hc_byte_perm (w2[3], w3[0], selector); - w7[2] = hc_byte_perm (w2[2], w2[3], selector); - w7[1] = hc_byte_perm (w2[1], w2[2], selector); - w7[0] = hc_byte_perm (w2[0], w2[1], selector); - w6[3] = hc_byte_perm (w1[3], w2[0], selector); - w6[2] = hc_byte_perm (w1[2], w1[3], selector); - w6[1] = hc_byte_perm (w1[1], w1[2], selector); - w6[0] = hc_byte_perm (w1[0], w1[1], selector); - w5[3] = hc_byte_perm (w0[3], w1[0], selector); - w5[2] = hc_byte_perm (w0[2], w0[3], selector); - w5[1] = hc_byte_perm (w0[1], w0[2], selector); - w5[0] = hc_byte_perm (w0[0], w0[1], selector); - w4[3] = hc_byte_perm ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm (w7[3], 0, selector); - c4[3] = hc_byte_perm (w7[2], w7[3], selector); - c4[2] = hc_byte_perm (w7[1], w7[2], selector); - c4[1] = hc_byte_perm (w7[0], w7[1], selector); - c4[0] = hc_byte_perm (w6[3], w7[0], selector); - c3[3] = hc_byte_perm (w6[2], w6[3], selector); - c3[2] = hc_byte_perm (w6[1], w6[2], selector); - c3[1] = hc_byte_perm (w6[0], w6[1], selector); - c3[0] = hc_byte_perm (w5[3], w6[0], selector); - c2[3] = hc_byte_perm (w5[2], w5[3], selector); - c2[2] = hc_byte_perm (w5[1], w5[2], selector); - c2[1] = hc_byte_perm (w5[0], w5[1], selector); - c2[0] = hc_byte_perm (w4[3], w5[0], selector); - c1[3] = hc_byte_perm (w4[2], w4[3], selector); - c1[2] = hc_byte_perm (w4[1], w4[2], selector); - c1[1] = hc_byte_perm (w4[0], w4[1], selector); - c1[0] = hc_byte_perm (w3[3], w4[0], selector); - c0[3] = hc_byte_perm (w3[2], w3[3], selector); - c0[2] = hc_byte_perm (w3[1], w3[2], selector); - c0[1] = hc_byte_perm (w3[0], w3[1], selector); - c0[0] = hc_byte_perm (w2[3], w3[0], selector); - w7[3] = hc_byte_perm (w2[2], w2[3], selector); - w7[2] = hc_byte_perm (w2[1], w2[2], selector); - w7[1] = hc_byte_perm (w2[0], w2[1], selector); - w7[0] = hc_byte_perm (w1[3], w2[0], selector); - w6[3] = hc_byte_perm (w1[2], w1[3], selector); - w6[2] = hc_byte_perm (w1[1], w1[2], selector); - w6[1] = hc_byte_perm (w1[0], w1[1], selector); - w6[0] = hc_byte_perm (w0[3], w1[0], selector); - w5[3] = hc_byte_perm (w0[2], w0[3], selector); - w5[2] = hc_byte_perm (w0[1], w0[2], selector); - w5[1] = hc_byte_perm (w0[0], w0[1], selector); - w5[0] = hc_byte_perm ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm (w7[3], 0, selector); - c5[0] = hc_byte_perm (w7[2], w7[3], selector); - c4[3] = hc_byte_perm (w7[1], w7[2], selector); - c4[2] = hc_byte_perm (w7[0], w7[1], selector); - c4[1] = hc_byte_perm (w6[3], w7[0], selector); - c4[0] = hc_byte_perm (w6[2], w6[3], selector); - c3[3] = hc_byte_perm (w6[1], w6[2], selector); - c3[2] = hc_byte_perm (w6[0], w6[1], selector); - c3[1] = hc_byte_perm (w5[3], w6[0], selector); - c3[0] = hc_byte_perm (w5[2], w5[3], selector); - c2[3] = hc_byte_perm (w5[1], w5[2], selector); - c2[2] = hc_byte_perm (w5[0], w5[1], selector); - c2[1] = hc_byte_perm (w4[3], w5[0], selector); - c2[0] = hc_byte_perm (w4[2], w4[3], selector); - c1[3] = hc_byte_perm (w4[1], w4[2], selector); - c1[2] = hc_byte_perm (w4[0], w4[1], selector); - c1[1] = hc_byte_perm (w3[3], w4[0], selector); - c1[0] = hc_byte_perm (w3[2], w3[3], selector); - c0[3] = hc_byte_perm (w3[1], w3[2], selector); - c0[2] = hc_byte_perm (w3[0], w3[1], selector); - c0[1] = hc_byte_perm (w2[3], w3[0], selector); - c0[0] = hc_byte_perm (w2[2], w2[3], selector); - w7[3] = hc_byte_perm (w2[1], w2[2], selector); - w7[2] = hc_byte_perm (w2[0], w2[1], selector); - w7[1] = hc_byte_perm (w1[3], w2[0], selector); - w7[0] = hc_byte_perm (w1[2], w1[3], selector); - w6[3] = hc_byte_perm (w1[1], w1[2], selector); - w6[2] = hc_byte_perm (w1[0], w1[1], selector); - w6[1] = hc_byte_perm (w0[3], w1[0], selector); - w6[0] = hc_byte_perm (w0[2], w0[3], selector); - w5[3] = hc_byte_perm (w0[1], w0[2], selector); - w5[2] = hc_byte_perm (w0[0], w0[1], selector); - w5[1] = hc_byte_perm ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm (w7[3], 0, selector); - c5[1] = hc_byte_perm (w7[2], w7[3], selector); - c5[0] = hc_byte_perm (w7[1], w7[2], selector); - c4[3] = hc_byte_perm (w7[0], w7[1], selector); - c4[2] = hc_byte_perm (w6[3], w7[0], selector); - c4[1] = hc_byte_perm (w6[2], w6[3], selector); - c4[0] = hc_byte_perm (w6[1], w6[2], selector); - c3[3] = hc_byte_perm (w6[0], w6[1], selector); - c3[2] = hc_byte_perm (w5[3], w6[0], selector); - c3[1] = hc_byte_perm (w5[2], w5[3], selector); - c3[0] = hc_byte_perm (w5[1], w5[2], selector); - c2[3] = hc_byte_perm (w5[0], w5[1], selector); - c2[2] = hc_byte_perm (w4[3], w5[0], selector); - c2[1] = hc_byte_perm (w4[2], w4[3], selector); - c2[0] = hc_byte_perm (w4[1], w4[2], selector); - c1[3] = hc_byte_perm (w4[0], w4[1], selector); - c1[2] = hc_byte_perm (w3[3], w4[0], selector); - c1[1] = hc_byte_perm (w3[2], w3[3], selector); - c1[0] = hc_byte_perm (w3[1], w3[2], selector); - c0[3] = hc_byte_perm (w3[0], w3[1], selector); - c0[2] = hc_byte_perm (w2[3], w3[0], selector); - c0[1] = hc_byte_perm (w2[2], w2[3], selector); - c0[0] = hc_byte_perm (w2[1], w2[2], selector); - w7[3] = hc_byte_perm (w2[0], w2[1], selector); - w7[2] = hc_byte_perm (w1[3], w2[0], selector); - w7[1] = hc_byte_perm (w1[2], w1[3], selector); - w7[0] = hc_byte_perm (w1[1], w1[2], selector); - w6[3] = hc_byte_perm (w1[0], w1[1], selector); - w6[2] = hc_byte_perm (w0[3], w1[0], selector); - w6[1] = hc_byte_perm (w0[2], w0[3], selector); - w6[0] = hc_byte_perm (w0[1], w0[2], selector); - w5[3] = hc_byte_perm (w0[0], w0[1], selector); - w5[2] = hc_byte_perm ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm (w7[3], 0, selector); - c5[2] = hc_byte_perm (w7[2], w7[3], selector); - c5[1] = hc_byte_perm (w7[1], w7[2], selector); - c5[0] = hc_byte_perm (w7[0], w7[1], selector); - c4[3] = hc_byte_perm (w6[3], w7[0], selector); - c4[2] = hc_byte_perm (w6[2], w6[3], selector); - c4[1] = hc_byte_perm (w6[1], w6[2], selector); - c4[0] = hc_byte_perm (w6[0], w6[1], selector); - c3[3] = hc_byte_perm (w5[3], w6[0], selector); - c3[2] = hc_byte_perm (w5[2], w5[3], selector); - c3[1] = hc_byte_perm (w5[1], w5[2], selector); - c3[0] = hc_byte_perm (w5[0], w5[1], selector); - c2[3] = hc_byte_perm (w4[3], w5[0], selector); - c2[2] = hc_byte_perm (w4[2], w4[3], selector); - c2[1] = hc_byte_perm (w4[1], w4[2], selector); - c2[0] = hc_byte_perm (w4[0], w4[1], selector); - c1[3] = hc_byte_perm (w3[3], w4[0], selector); - c1[2] = hc_byte_perm (w3[2], w3[3], selector); - c1[1] = hc_byte_perm (w3[1], w3[2], selector); - c1[0] = hc_byte_perm (w3[0], w3[1], selector); - c0[3] = hc_byte_perm (w2[3], w3[0], selector); - c0[2] = hc_byte_perm (w2[2], w2[3], selector); - c0[1] = hc_byte_perm (w2[1], w2[2], selector); - c0[0] = hc_byte_perm (w2[0], w2[1], selector); - w7[3] = hc_byte_perm (w1[3], w2[0], selector); - w7[2] = hc_byte_perm (w1[2], w1[3], selector); - w7[1] = hc_byte_perm (w1[1], w1[2], selector); - w7[0] = hc_byte_perm (w1[0], w1[1], selector); - w6[3] = hc_byte_perm (w0[3], w1[0], selector); - w6[2] = hc_byte_perm (w0[2], w0[3], selector); - w6[1] = hc_byte_perm (w0[1], w0[2], selector); - w6[0] = hc_byte_perm (w0[0], w0[1], selector); - w5[3] = hc_byte_perm ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm (w7[3], 0, selector); - c5[3] = hc_byte_perm (w7[2], w7[3], selector); - c5[2] = hc_byte_perm (w7[1], w7[2], selector); - c5[1] = hc_byte_perm (w7[0], w7[1], selector); - c5[0] = hc_byte_perm (w6[3], w7[0], selector); - c4[3] = hc_byte_perm (w6[2], w6[3], selector); - c4[2] = hc_byte_perm (w6[1], w6[2], selector); - c4[1] = hc_byte_perm (w6[0], w6[1], selector); - c4[0] = hc_byte_perm (w5[3], w6[0], selector); - c3[3] = hc_byte_perm (w5[2], w5[3], selector); - c3[2] = hc_byte_perm (w5[1], w5[2], selector); - c3[1] = hc_byte_perm (w5[0], w5[1], selector); - c3[0] = hc_byte_perm (w4[3], w5[0], selector); - c2[3] = hc_byte_perm (w4[2], w4[3], selector); - c2[2] = hc_byte_perm (w4[1], w4[2], selector); - c2[1] = hc_byte_perm (w4[0], w4[1], selector); - c2[0] = hc_byte_perm (w3[3], w4[0], selector); - c1[3] = hc_byte_perm (w3[2], w3[3], selector); - c1[2] = hc_byte_perm (w3[1], w3[2], selector); - c1[1] = hc_byte_perm (w3[0], w3[1], selector); - c1[0] = hc_byte_perm (w2[3], w3[0], selector); - c0[3] = hc_byte_perm (w2[2], w2[3], selector); - c0[2] = hc_byte_perm (w2[1], w2[2], selector); - c0[1] = hc_byte_perm (w2[0], w2[1], selector); - c0[0] = hc_byte_perm (w1[3], w2[0], selector); - w7[3] = hc_byte_perm (w1[2], w1[3], selector); - w7[2] = hc_byte_perm (w1[1], w1[2], selector); - w7[1] = hc_byte_perm (w1[0], w1[1], selector); - w7[0] = hc_byte_perm (w0[3], w1[0], selector); - w6[3] = hc_byte_perm (w0[2], w0[3], selector); - w6[2] = hc_byte_perm (w0[1], w0[2], selector); - w6[1] = hc_byte_perm (w0[0], w0[1], selector); - w6[0] = hc_byte_perm ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm (w7[3], 0, selector); - c6[0] = hc_byte_perm (w7[2], w7[3], selector); - c5[3] = hc_byte_perm (w7[1], w7[2], selector); - c5[2] = hc_byte_perm (w7[0], w7[1], selector); - c5[1] = hc_byte_perm (w6[3], w7[0], selector); - c5[0] = hc_byte_perm (w6[2], w6[3], selector); - c4[3] = hc_byte_perm (w6[1], w6[2], selector); - c4[2] = hc_byte_perm (w6[0], w6[1], selector); - c4[1] = hc_byte_perm (w5[3], w6[0], selector); - c4[0] = hc_byte_perm (w5[2], w5[3], selector); - c3[3] = hc_byte_perm (w5[1], w5[2], selector); - c3[2] = hc_byte_perm (w5[0], w5[1], selector); - c3[1] = hc_byte_perm (w4[3], w5[0], selector); - c3[0] = hc_byte_perm (w4[2], w4[3], selector); - c2[3] = hc_byte_perm (w4[1], w4[2], selector); - c2[2] = hc_byte_perm (w4[0], w4[1], selector); - c2[1] = hc_byte_perm (w3[3], w4[0], selector); - c2[0] = hc_byte_perm (w3[2], w3[3], selector); - c1[3] = hc_byte_perm (w3[1], w3[2], selector); - c1[2] = hc_byte_perm (w3[0], w3[1], selector); - c1[1] = hc_byte_perm (w2[3], w3[0], selector); - c1[0] = hc_byte_perm (w2[2], w2[3], selector); - c0[3] = hc_byte_perm (w2[1], w2[2], selector); - c0[2] = hc_byte_perm (w2[0], w2[1], selector); - c0[1] = hc_byte_perm (w1[3], w2[0], selector); - c0[0] = hc_byte_perm (w1[2], w1[3], selector); - w7[3] = hc_byte_perm (w1[1], w1[2], selector); - w7[2] = hc_byte_perm (w1[0], w1[1], selector); - w7[1] = hc_byte_perm (w0[3], w1[0], selector); - w7[0] = hc_byte_perm (w0[2], w0[3], selector); - w6[3] = hc_byte_perm (w0[1], w0[2], selector); - w6[2] = hc_byte_perm (w0[0], w0[1], selector); - w6[1] = hc_byte_perm ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm (w7[3], 0, selector); - c6[1] = hc_byte_perm (w7[2], w7[3], selector); - c6[0] = hc_byte_perm (w7[1], w7[2], selector); - c5[3] = hc_byte_perm (w7[0], w7[1], selector); - c5[2] = hc_byte_perm (w6[3], w7[0], selector); - c5[1] = hc_byte_perm (w6[2], w6[3], selector); - c5[0] = hc_byte_perm (w6[1], w6[2], selector); - c4[3] = hc_byte_perm (w6[0], w6[1], selector); - c4[2] = hc_byte_perm (w5[3], w6[0], selector); - c4[1] = hc_byte_perm (w5[2], w5[3], selector); - c4[0] = hc_byte_perm (w5[1], w5[2], selector); - c3[3] = hc_byte_perm (w5[0], w5[1], selector); - c3[2] = hc_byte_perm (w4[3], w5[0], selector); - c3[1] = hc_byte_perm (w4[2], w4[3], selector); - c3[0] = hc_byte_perm (w4[1], w4[2], selector); - c2[3] = hc_byte_perm (w4[0], w4[1], selector); - c2[2] = hc_byte_perm (w3[3], w4[0], selector); - c2[1] = hc_byte_perm (w3[2], w3[3], selector); - c2[0] = hc_byte_perm (w3[1], w3[2], selector); - c1[3] = hc_byte_perm (w3[0], w3[1], selector); - c1[2] = hc_byte_perm (w2[3], w3[0], selector); - c1[1] = hc_byte_perm (w2[2], w2[3], selector); - c1[0] = hc_byte_perm (w2[1], w2[2], selector); - c0[3] = hc_byte_perm (w2[0], w2[1], selector); - c0[2] = hc_byte_perm (w1[3], w2[0], selector); - c0[1] = hc_byte_perm (w1[2], w1[3], selector); - c0[0] = hc_byte_perm (w1[1], w1[2], selector); - w7[3] = hc_byte_perm (w1[0], w1[1], selector); - w7[2] = hc_byte_perm (w0[3], w1[0], selector); - w7[1] = hc_byte_perm (w0[2], w0[3], selector); - w7[0] = hc_byte_perm (w0[1], w0[2], selector); - w6[3] = hc_byte_perm (w0[0], w0[1], selector); - w6[2] = hc_byte_perm ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm (w7[3], 0, selector); - c6[2] = hc_byte_perm (w7[2], w7[3], selector); - c6[1] = hc_byte_perm (w7[1], w7[2], selector); - c6[0] = hc_byte_perm (w7[0], w7[1], selector); - c5[3] = hc_byte_perm (w6[3], w7[0], selector); - c5[2] = hc_byte_perm (w6[2], w6[3], selector); - c5[1] = hc_byte_perm (w6[1], w6[2], selector); - c5[0] = hc_byte_perm (w6[0], w6[1], selector); - c4[3] = hc_byte_perm (w5[3], w6[0], selector); - c4[2] = hc_byte_perm (w5[2], w5[3], selector); - c4[1] = hc_byte_perm (w5[1], w5[2], selector); - c4[0] = hc_byte_perm (w5[0], w5[1], selector); - c3[3] = hc_byte_perm (w4[3], w5[0], selector); - c3[2] = hc_byte_perm (w4[2], w4[3], selector); - c3[1] = hc_byte_perm (w4[1], w4[2], selector); - c3[0] = hc_byte_perm (w4[0], w4[1], selector); - c2[3] = hc_byte_perm (w3[3], w4[0], selector); - c2[2] = hc_byte_perm (w3[2], w3[3], selector); - c2[1] = hc_byte_perm (w3[1], w3[2], selector); - c2[0] = hc_byte_perm (w3[0], w3[1], selector); - c1[3] = hc_byte_perm (w2[3], w3[0], selector); - c1[2] = hc_byte_perm (w2[2], w2[3], selector); - c1[1] = hc_byte_perm (w2[1], w2[2], selector); - c1[0] = hc_byte_perm (w2[0], w2[1], selector); - c0[3] = hc_byte_perm (w1[3], w2[0], selector); - c0[2] = hc_byte_perm (w1[2], w1[3], selector); - c0[1] = hc_byte_perm (w1[1], w1[2], selector); - c0[0] = hc_byte_perm (w1[0], w1[1], selector); - w7[3] = hc_byte_perm (w0[3], w1[0], selector); - w7[2] = hc_byte_perm (w0[2], w0[3], selector); - w7[1] = hc_byte_perm (w0[1], w0[2], selector); - w7[0] = hc_byte_perm (w0[0], w0[1], selector); - w6[3] = hc_byte_perm ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm (w7[3], 0, selector); - c6[3] = hc_byte_perm (w7[2], w7[3], selector); - c6[2] = hc_byte_perm (w7[1], w7[2], selector); - c6[1] = hc_byte_perm (w7[0], w7[1], selector); - c6[0] = hc_byte_perm (w6[3], w7[0], selector); - c5[3] = hc_byte_perm (w6[2], w6[3], selector); - c5[2] = hc_byte_perm (w6[1], w6[2], selector); - c5[1] = hc_byte_perm (w6[0], w6[1], selector); - c5[0] = hc_byte_perm (w5[3], w6[0], selector); - c4[3] = hc_byte_perm (w5[2], w5[3], selector); - c4[2] = hc_byte_perm (w5[1], w5[2], selector); - c4[1] = hc_byte_perm (w5[0], w5[1], selector); - c4[0] = hc_byte_perm (w4[3], w5[0], selector); - c3[3] = hc_byte_perm (w4[2], w4[3], selector); - c3[2] = hc_byte_perm (w4[1], w4[2], selector); - c3[1] = hc_byte_perm (w4[0], w4[1], selector); - c3[0] = hc_byte_perm (w3[3], w4[0], selector); - c2[3] = hc_byte_perm (w3[2], w3[3], selector); - c2[2] = hc_byte_perm (w3[1], w3[2], selector); - c2[1] = hc_byte_perm (w3[0], w3[1], selector); - c2[0] = hc_byte_perm (w2[3], w3[0], selector); - c1[3] = hc_byte_perm (w2[2], w2[3], selector); - c1[2] = hc_byte_perm (w2[1], w2[2], selector); - c1[1] = hc_byte_perm (w2[0], w2[1], selector); - c1[0] = hc_byte_perm (w1[3], w2[0], selector); - c0[3] = hc_byte_perm (w1[2], w1[3], selector); - c0[2] = hc_byte_perm (w1[1], w1[2], selector); - c0[1] = hc_byte_perm (w1[0], w1[1], selector); - c0[0] = hc_byte_perm (w0[3], w1[0], selector); - w7[3] = hc_byte_perm (w0[2], w0[3], selector); - w7[2] = hc_byte_perm (w0[1], w0[2], selector); - w7[1] = hc_byte_perm (w0[0], w0[1], selector); - w7[0] = hc_byte_perm ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm (w7[3], 0, selector); - c7[0] = hc_byte_perm (w7[2], w7[3], selector); - c6[3] = hc_byte_perm (w7[1], w7[2], selector); - c6[2] = hc_byte_perm (w7[0], w7[1], selector); - c6[1] = hc_byte_perm (w6[3], w7[0], selector); - c6[0] = hc_byte_perm (w6[2], w6[3], selector); - c5[3] = hc_byte_perm (w6[1], w6[2], selector); - c5[2] = hc_byte_perm (w6[0], w6[1], selector); - c5[1] = hc_byte_perm (w5[3], w6[0], selector); - c5[0] = hc_byte_perm (w5[2], w5[3], selector); - c4[3] = hc_byte_perm (w5[1], w5[2], selector); - c4[2] = hc_byte_perm (w5[0], w5[1], selector); - c4[1] = hc_byte_perm (w4[3], w5[0], selector); - c4[0] = hc_byte_perm (w4[2], w4[3], selector); - c3[3] = hc_byte_perm (w4[1], w4[2], selector); - c3[2] = hc_byte_perm (w4[0], w4[1], selector); - c3[1] = hc_byte_perm (w3[3], w4[0], selector); - c3[0] = hc_byte_perm (w3[2], w3[3], selector); - c2[3] = hc_byte_perm (w3[1], w3[2], selector); - c2[2] = hc_byte_perm (w3[0], w3[1], selector); - c2[1] = hc_byte_perm (w2[3], w3[0], selector); - c2[0] = hc_byte_perm (w2[2], w2[3], selector); - c1[3] = hc_byte_perm (w2[1], w2[2], selector); - c1[2] = hc_byte_perm (w2[0], w2[1], selector); - c1[1] = hc_byte_perm (w1[3], w2[0], selector); - c1[0] = hc_byte_perm (w1[2], w1[3], selector); - c0[3] = hc_byte_perm (w1[1], w1[2], selector); - c0[2] = hc_byte_perm (w1[0], w1[1], selector); - c0[1] = hc_byte_perm (w0[3], w1[0], selector); - c0[0] = hc_byte_perm (w0[2], w0[3], selector); - w7[3] = hc_byte_perm (w0[1], w0[2], selector); - w7[2] = hc_byte_perm (w0[0], w0[1], selector); - w7[1] = hc_byte_perm ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm (w7[3], 0, selector); - c7[1] = hc_byte_perm (w7[2], w7[3], selector); - c7[0] = hc_byte_perm (w7[1], w7[2], selector); - c6[3] = hc_byte_perm (w7[0], w7[1], selector); - c6[2] = hc_byte_perm (w6[3], w7[0], selector); - c6[1] = hc_byte_perm (w6[2], w6[3], selector); - c6[0] = hc_byte_perm (w6[1], w6[2], selector); - c5[3] = hc_byte_perm (w6[0], w6[1], selector); - c5[2] = hc_byte_perm (w5[3], w6[0], selector); - c5[1] = hc_byte_perm (w5[2], w5[3], selector); - c5[0] = hc_byte_perm (w5[1], w5[2], selector); - c4[3] = hc_byte_perm (w5[0], w5[1], selector); - c4[2] = hc_byte_perm (w4[3], w5[0], selector); - c4[1] = hc_byte_perm (w4[2], w4[3], selector); - c4[0] = hc_byte_perm (w4[1], w4[2], selector); - c3[3] = hc_byte_perm (w4[0], w4[1], selector); - c3[2] = hc_byte_perm (w3[3], w4[0], selector); - c3[1] = hc_byte_perm (w3[2], w3[3], selector); - c3[0] = hc_byte_perm (w3[1], w3[2], selector); - c2[3] = hc_byte_perm (w3[0], w3[1], selector); - c2[2] = hc_byte_perm (w2[3], w3[0], selector); - c2[1] = hc_byte_perm (w2[2], w2[3], selector); - c2[0] = hc_byte_perm (w2[1], w2[2], selector); - c1[3] = hc_byte_perm (w2[0], w2[1], selector); - c1[2] = hc_byte_perm (w1[3], w2[0], selector); - c1[1] = hc_byte_perm (w1[2], w1[3], selector); - c1[0] = hc_byte_perm (w1[1], w1[2], selector); - c0[3] = hc_byte_perm (w1[0], w1[1], selector); - c0[2] = hc_byte_perm (w0[3], w1[0], selector); - c0[1] = hc_byte_perm (w0[2], w0[3], selector); - c0[0] = hc_byte_perm (w0[1], w0[2], selector); - w7[3] = hc_byte_perm (w0[0], w0[1], selector); - w7[2] = hc_byte_perm ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm (w7[3], 0, selector); - c7[2] = hc_byte_perm (w7[2], w7[3], selector); - c7[1] = hc_byte_perm (w7[1], w7[2], selector); - c7[0] = hc_byte_perm (w7[0], w7[1], selector); - c6[3] = hc_byte_perm (w6[3], w7[0], selector); - c6[2] = hc_byte_perm (w6[2], w6[3], selector); - c6[1] = hc_byte_perm (w6[1], w6[2], selector); - c6[0] = hc_byte_perm (w6[0], w6[1], selector); - c5[3] = hc_byte_perm (w5[3], w6[0], selector); - c5[2] = hc_byte_perm (w5[2], w5[3], selector); - c5[1] = hc_byte_perm (w5[1], w5[2], selector); - c5[0] = hc_byte_perm (w5[0], w5[1], selector); - c4[3] = hc_byte_perm (w4[3], w5[0], selector); - c4[2] = hc_byte_perm (w4[2], w4[3], selector); - c4[1] = hc_byte_perm (w4[1], w4[2], selector); - c4[0] = hc_byte_perm (w4[0], w4[1], selector); - c3[3] = hc_byte_perm (w3[3], w4[0], selector); - c3[2] = hc_byte_perm (w3[2], w3[3], selector); - c3[1] = hc_byte_perm (w3[1], w3[2], selector); - c3[0] = hc_byte_perm (w3[0], w3[1], selector); - c2[3] = hc_byte_perm (w2[3], w3[0], selector); - c2[2] = hc_byte_perm (w2[2], w2[3], selector); - c2[1] = hc_byte_perm (w2[1], w2[2], selector); - c2[0] = hc_byte_perm (w2[0], w2[1], selector); - c1[3] = hc_byte_perm (w1[3], w2[0], selector); - c1[2] = hc_byte_perm (w1[2], w1[3], selector); - c1[1] = hc_byte_perm (w1[1], w1[2], selector); - c1[0] = hc_byte_perm (w1[0], w1[1], selector); - c0[3] = hc_byte_perm (w0[3], w1[0], selector); - c0[2] = hc_byte_perm (w0[2], w0[3], selector); - c0[1] = hc_byte_perm (w0[1], w0[2], selector); - c0[0] = hc_byte_perm (w0[0], w0[1], selector); - w7[3] = hc_byte_perm ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -12886,1180 +9110,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (PRIVATE_AS u32x *w0, PRIVATE_AS u3 break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm (w7[3], w7[2], selector); - w7[2] = hc_byte_perm (w7[2], w7[1], selector); - w7[1] = hc_byte_perm (w7[1], w7[0], selector); - w7[0] = hc_byte_perm (w7[0], w6[3], selector); - w6[3] = hc_byte_perm (w6[3], w6[2], selector); - w6[2] = hc_byte_perm (w6[2], w6[1], selector); - w6[1] = hc_byte_perm (w6[1], w6[0], selector); - w6[0] = hc_byte_perm (w6[0], w5[3], selector); - w5[3] = hc_byte_perm (w5[3], w5[2], selector); - w5[2] = hc_byte_perm (w5[2], w5[1], selector); - w5[1] = hc_byte_perm (w5[1], w5[0], selector); - w5[0] = hc_byte_perm (w5[0], w4[3], selector); - w4[3] = hc_byte_perm (w4[3], w4[2], selector); - w4[2] = hc_byte_perm (w4[2], w4[1], selector); - w4[1] = hc_byte_perm (w4[1], w4[0], selector); - w4[0] = hc_byte_perm (w4[0], w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - w7[3] = hc_byte_perm (w7[2], w7[1], selector); - w7[2] = hc_byte_perm (w7[1], w7[0], selector); - w7[1] = hc_byte_perm (w7[0], w6[3], selector); - w7[0] = hc_byte_perm (w6[3], w6[2], selector); - w6[3] = hc_byte_perm (w6[2], w6[1], selector); - w6[2] = hc_byte_perm (w6[1], w6[0], selector); - w6[1] = hc_byte_perm (w6[0], w5[3], selector); - w6[0] = hc_byte_perm (w5[3], w5[2], selector); - w5[3] = hc_byte_perm (w5[2], w5[1], selector); - w5[2] = hc_byte_perm (w5[1], w5[0], selector); - w5[1] = hc_byte_perm (w5[0], w4[3], selector); - w5[0] = hc_byte_perm (w4[3], w4[2], selector); - w4[3] = hc_byte_perm (w4[2], w4[1], selector); - w4[2] = hc_byte_perm (w4[1], w4[0], selector); - w4[1] = hc_byte_perm (w4[0], w3[3], selector); - w4[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w7[3] = hc_byte_perm (w7[1], w7[0], selector); - w7[2] = hc_byte_perm (w7[0], w6[3], selector); - w7[1] = hc_byte_perm (w6[3], w6[2], selector); - w7[0] = hc_byte_perm (w6[2], w6[1], selector); - w6[3] = hc_byte_perm (w6[1], w6[0], selector); - w6[2] = hc_byte_perm (w6[0], w5[3], selector); - w6[1] = hc_byte_perm (w5[3], w5[2], selector); - w6[0] = hc_byte_perm (w5[2], w5[1], selector); - w5[3] = hc_byte_perm (w5[1], w5[0], selector); - w5[2] = hc_byte_perm (w5[0], w4[3], selector); - w5[1] = hc_byte_perm (w4[3], w4[2], selector); - w5[0] = hc_byte_perm (w4[2], w4[1], selector); - w4[3] = hc_byte_perm (w4[1], w4[0], selector); - w4[2] = hc_byte_perm (w4[0], w3[3], selector); - w4[1] = hc_byte_perm (w3[3], w3[2], selector); - w4[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w7[3] = hc_byte_perm (w7[0], w6[3], selector); - w7[2] = hc_byte_perm (w6[3], w6[2], selector); - w7[1] = hc_byte_perm (w6[2], w6[1], selector); - w7[0] = hc_byte_perm (w6[1], w6[0], selector); - w6[3] = hc_byte_perm (w6[0], w5[3], selector); - w6[2] = hc_byte_perm (w5[3], w5[2], selector); - w6[1] = hc_byte_perm (w5[2], w5[1], selector); - w6[0] = hc_byte_perm (w5[1], w5[0], selector); - w5[3] = hc_byte_perm (w5[0], w4[3], selector); - w5[2] = hc_byte_perm (w4[3], w4[2], selector); - w5[1] = hc_byte_perm (w4[2], w4[1], selector); - w5[0] = hc_byte_perm (w4[1], w4[0], selector); - w4[3] = hc_byte_perm (w4[0], w3[3], selector); - w4[2] = hc_byte_perm (w3[3], w3[2], selector); - w4[1] = hc_byte_perm (w3[2], w3[1], selector); - w4[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w7[3] = hc_byte_perm (w6[3], w6[2], selector); - w7[2] = hc_byte_perm (w6[2], w6[1], selector); - w7[1] = hc_byte_perm (w6[1], w6[0], selector); - w7[0] = hc_byte_perm (w6[0], w5[3], selector); - w6[3] = hc_byte_perm (w5[3], w5[2], selector); - w6[2] = hc_byte_perm (w5[2], w5[1], selector); - w6[1] = hc_byte_perm (w5[1], w5[0], selector); - w6[0] = hc_byte_perm (w5[0], w4[3], selector); - w5[3] = hc_byte_perm (w4[3], w4[2], selector); - w5[2] = hc_byte_perm (w4[2], w4[1], selector); - w5[1] = hc_byte_perm (w4[1], w4[0], selector); - w5[0] = hc_byte_perm (w4[0], w3[3], selector); - w4[3] = hc_byte_perm (w3[3], w3[2], selector); - w4[2] = hc_byte_perm (w3[2], w3[1], selector); - w4[1] = hc_byte_perm (w3[1], w3[0], selector); - w4[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w7[3] = hc_byte_perm (w6[2], w6[1], selector); - w7[2] = hc_byte_perm (w6[1], w6[0], selector); - w7[1] = hc_byte_perm (w6[0], w5[3], selector); - w7[0] = hc_byte_perm (w5[3], w5[2], selector); - w6[3] = hc_byte_perm (w5[2], w5[1], selector); - w6[2] = hc_byte_perm (w5[1], w5[0], selector); - w6[1] = hc_byte_perm (w5[0], w4[3], selector); - w6[0] = hc_byte_perm (w4[3], w4[2], selector); - w5[3] = hc_byte_perm (w4[2], w4[1], selector); - w5[2] = hc_byte_perm (w4[1], w4[0], selector); - w5[1] = hc_byte_perm (w4[0], w3[3], selector); - w5[0] = hc_byte_perm (w3[3], w3[2], selector); - w4[3] = hc_byte_perm (w3[2], w3[1], selector); - w4[2] = hc_byte_perm (w3[1], w3[0], selector); - w4[1] = hc_byte_perm (w3[0], w2[3], selector); - w4[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w7[3] = hc_byte_perm (w6[1], w6[0], selector); - w7[2] = hc_byte_perm (w6[0], w5[3], selector); - w7[1] = hc_byte_perm (w5[3], w5[2], selector); - w7[0] = hc_byte_perm (w5[2], w5[1], selector); - w6[3] = hc_byte_perm (w5[1], w5[0], selector); - w6[2] = hc_byte_perm (w5[0], w4[3], selector); - w6[1] = hc_byte_perm (w4[3], w4[2], selector); - w6[0] = hc_byte_perm (w4[2], w4[1], selector); - w5[3] = hc_byte_perm (w4[1], w4[0], selector); - w5[2] = hc_byte_perm (w4[0], w3[3], selector); - w5[1] = hc_byte_perm (w3[3], w3[2], selector); - w5[0] = hc_byte_perm (w3[2], w3[1], selector); - w4[3] = hc_byte_perm (w3[1], w3[0], selector); - w4[2] = hc_byte_perm (w3[0], w2[3], selector); - w4[1] = hc_byte_perm (w2[3], w2[2], selector); - w4[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w7[3] = hc_byte_perm (w6[0], w5[3], selector); - w7[2] = hc_byte_perm (w5[3], w5[2], selector); - w7[1] = hc_byte_perm (w5[2], w5[1], selector); - w7[0] = hc_byte_perm (w5[1], w5[0], selector); - w6[3] = hc_byte_perm (w5[0], w4[3], selector); - w6[2] = hc_byte_perm (w4[3], w4[2], selector); - w6[1] = hc_byte_perm (w4[2], w4[1], selector); - w6[0] = hc_byte_perm (w4[1], w4[0], selector); - w5[3] = hc_byte_perm (w4[0], w3[3], selector); - w5[2] = hc_byte_perm (w3[3], w3[2], selector); - w5[1] = hc_byte_perm (w3[2], w3[1], selector); - w5[0] = hc_byte_perm (w3[1], w3[0], selector); - w4[3] = hc_byte_perm (w3[0], w2[3], selector); - w4[2] = hc_byte_perm (w2[3], w2[2], selector); - w4[1] = hc_byte_perm (w2[2], w2[1], selector); - w4[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w7[3] = hc_byte_perm (w5[3], w5[2], selector); - w7[2] = hc_byte_perm (w5[2], w5[1], selector); - w7[1] = hc_byte_perm (w5[1], w5[0], selector); - w7[0] = hc_byte_perm (w5[0], w4[3], selector); - w6[3] = hc_byte_perm (w4[3], w4[2], selector); - w6[2] = hc_byte_perm (w4[2], w4[1], selector); - w6[1] = hc_byte_perm (w4[1], w4[0], selector); - w6[0] = hc_byte_perm (w4[0], w3[3], selector); - w5[3] = hc_byte_perm (w3[3], w3[2], selector); - w5[2] = hc_byte_perm (w3[2], w3[1], selector); - w5[1] = hc_byte_perm (w3[1], w3[0], selector); - w5[0] = hc_byte_perm (w3[0], w2[3], selector); - w4[3] = hc_byte_perm (w2[3], w2[2], selector); - w4[2] = hc_byte_perm (w2[2], w2[1], selector); - w4[1] = hc_byte_perm (w2[1], w2[0], selector); - w4[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w7[3] = hc_byte_perm (w5[2], w5[1], selector); - w7[2] = hc_byte_perm (w5[1], w5[0], selector); - w7[1] = hc_byte_perm (w5[0], w4[3], selector); - w7[0] = hc_byte_perm (w4[3], w4[2], selector); - w6[3] = hc_byte_perm (w4[2], w4[1], selector); - w6[2] = hc_byte_perm (w4[1], w4[0], selector); - w6[1] = hc_byte_perm (w4[0], w3[3], selector); - w6[0] = hc_byte_perm (w3[3], w3[2], selector); - w5[3] = hc_byte_perm (w3[2], w3[1], selector); - w5[2] = hc_byte_perm (w3[1], w3[0], selector); - w5[1] = hc_byte_perm (w3[0], w2[3], selector); - w5[0] = hc_byte_perm (w2[3], w2[2], selector); - w4[3] = hc_byte_perm (w2[2], w2[1], selector); - w4[2] = hc_byte_perm (w2[1], w2[0], selector); - w4[1] = hc_byte_perm (w2[0], w1[3], selector); - w4[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w7[3] = hc_byte_perm (w5[1], w5[0], selector); - w7[2] = hc_byte_perm (w5[0], w4[3], selector); - w7[1] = hc_byte_perm (w4[3], w4[2], selector); - w7[0] = hc_byte_perm (w4[2], w4[1], selector); - w6[3] = hc_byte_perm (w4[1], w4[0], selector); - w6[2] = hc_byte_perm (w4[0], w3[3], selector); - w6[1] = hc_byte_perm (w3[3], w3[2], selector); - w6[0] = hc_byte_perm (w3[2], w3[1], selector); - w5[3] = hc_byte_perm (w3[1], w3[0], selector); - w5[2] = hc_byte_perm (w3[0], w2[3], selector); - w5[1] = hc_byte_perm (w2[3], w2[2], selector); - w5[0] = hc_byte_perm (w2[2], w2[1], selector); - w4[3] = hc_byte_perm (w2[1], w2[0], selector); - w4[2] = hc_byte_perm (w2[0], w1[3], selector); - w4[1] = hc_byte_perm (w1[3], w1[2], selector); - w4[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w7[3] = hc_byte_perm (w5[0], w4[3], selector); - w7[2] = hc_byte_perm (w4[3], w4[2], selector); - w7[1] = hc_byte_perm (w4[2], w4[1], selector); - w7[0] = hc_byte_perm (w4[1], w4[0], selector); - w6[3] = hc_byte_perm (w4[0], w3[3], selector); - w6[2] = hc_byte_perm (w3[3], w3[2], selector); - w6[1] = hc_byte_perm (w3[2], w3[1], selector); - w6[0] = hc_byte_perm (w3[1], w3[0], selector); - w5[3] = hc_byte_perm (w3[0], w2[3], selector); - w5[2] = hc_byte_perm (w2[3], w2[2], selector); - w5[1] = hc_byte_perm (w2[2], w2[1], selector); - w5[0] = hc_byte_perm (w2[1], w2[0], selector); - w4[3] = hc_byte_perm (w2[0], w1[3], selector); - w4[2] = hc_byte_perm (w1[3], w1[2], selector); - w4[1] = hc_byte_perm (w1[2], w1[1], selector); - w4[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w7[3] = hc_byte_perm (w4[3], w4[2], selector); - w7[2] = hc_byte_perm (w4[2], w4[1], selector); - w7[1] = hc_byte_perm (w4[1], w4[0], selector); - w7[0] = hc_byte_perm (w4[0], w3[3], selector); - w6[3] = hc_byte_perm (w3[3], w3[2], selector); - w6[2] = hc_byte_perm (w3[2], w3[1], selector); - w6[1] = hc_byte_perm (w3[1], w3[0], selector); - w6[0] = hc_byte_perm (w3[0], w2[3], selector); - w5[3] = hc_byte_perm (w2[3], w2[2], selector); - w5[2] = hc_byte_perm (w2[2], w2[1], selector); - w5[1] = hc_byte_perm (w2[1], w2[0], selector); - w5[0] = hc_byte_perm (w2[0], w1[3], selector); - w4[3] = hc_byte_perm (w1[3], w1[2], selector); - w4[2] = hc_byte_perm (w1[2], w1[1], selector); - w4[1] = hc_byte_perm (w1[1], w1[0], selector); - w4[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w7[3] = hc_byte_perm (w4[2], w4[1], selector); - w7[2] = hc_byte_perm (w4[1], w4[0], selector); - w7[1] = hc_byte_perm (w4[0], w3[3], selector); - w7[0] = hc_byte_perm (w3[3], w3[2], selector); - w6[3] = hc_byte_perm (w3[2], w3[1], selector); - w6[2] = hc_byte_perm (w3[1], w3[0], selector); - w6[1] = hc_byte_perm (w3[0], w2[3], selector); - w6[0] = hc_byte_perm (w2[3], w2[2], selector); - w5[3] = hc_byte_perm (w2[2], w2[1], selector); - w5[2] = hc_byte_perm (w2[1], w2[0], selector); - w5[1] = hc_byte_perm (w2[0], w1[3], selector); - w5[0] = hc_byte_perm (w1[3], w1[2], selector); - w4[3] = hc_byte_perm (w1[2], w1[1], selector); - w4[2] = hc_byte_perm (w1[1], w1[0], selector); - w4[1] = hc_byte_perm (w1[0], w0[3], selector); - w4[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w7[3] = hc_byte_perm (w4[1], w4[0], selector); - w7[2] = hc_byte_perm (w4[0], w3[3], selector); - w7[1] = hc_byte_perm (w3[3], w3[2], selector); - w7[0] = hc_byte_perm (w3[2], w3[1], selector); - w6[3] = hc_byte_perm (w3[1], w3[0], selector); - w6[2] = hc_byte_perm (w3[0], w2[3], selector); - w6[1] = hc_byte_perm (w2[3], w2[2], selector); - w6[0] = hc_byte_perm (w2[2], w2[1], selector); - w5[3] = hc_byte_perm (w2[1], w2[0], selector); - w5[2] = hc_byte_perm (w2[0], w1[3], selector); - w5[1] = hc_byte_perm (w1[3], w1[2], selector); - w5[0] = hc_byte_perm (w1[2], w1[1], selector); - w4[3] = hc_byte_perm (w1[1], w1[0], selector); - w4[2] = hc_byte_perm (w1[0], w0[3], selector); - w4[1] = hc_byte_perm (w0[3], w0[2], selector); - w4[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w7[3] = hc_byte_perm (w4[0], w3[3], selector); - w7[2] = hc_byte_perm (w3[3], w3[2], selector); - w7[1] = hc_byte_perm (w3[2], w3[1], selector); - w7[0] = hc_byte_perm (w3[1], w3[0], selector); - w6[3] = hc_byte_perm (w3[0], w2[3], selector); - w6[2] = hc_byte_perm (w2[3], w2[2], selector); - w6[1] = hc_byte_perm (w2[2], w2[1], selector); - w6[0] = hc_byte_perm (w2[1], w2[0], selector); - w5[3] = hc_byte_perm (w2[0], w1[3], selector); - w5[2] = hc_byte_perm (w1[3], w1[2], selector); - w5[1] = hc_byte_perm (w1[2], w1[1], selector); - w5[0] = hc_byte_perm (w1[1], w1[0], selector); - w4[3] = hc_byte_perm (w1[0], w0[3], selector); - w4[2] = hc_byte_perm (w0[3], w0[2], selector); - w4[1] = hc_byte_perm (w0[2], w0[1], selector); - w4[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - w7[3] = hc_byte_perm (w3[3], w3[2], selector); - w7[2] = hc_byte_perm (w3[2], w3[1], selector); - w7[1] = hc_byte_perm (w3[1], w3[0], selector); - w7[0] = hc_byte_perm (w3[0], w2[3], selector); - w6[3] = hc_byte_perm (w2[3], w2[2], selector); - w6[2] = hc_byte_perm (w2[2], w2[1], selector); - w6[1] = hc_byte_perm (w2[1], w2[0], selector); - w6[0] = hc_byte_perm (w2[0], w1[3], selector); - w5[3] = hc_byte_perm (w1[3], w1[2], selector); - w5[2] = hc_byte_perm (w1[2], w1[1], selector); - w5[1] = hc_byte_perm (w1[1], w1[0], selector); - w5[0] = hc_byte_perm (w1[0], w0[3], selector); - w4[3] = hc_byte_perm (w0[3], w0[2], selector); - w4[2] = hc_byte_perm (w0[2], w0[1], selector); - w4[1] = hc_byte_perm (w0[1], w0[0], selector); - w4[0] = hc_byte_perm (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm (w3[2], w3[1], selector); - w7[2] = hc_byte_perm (w3[1], w3[0], selector); - w7[1] = hc_byte_perm (w3[0], w2[3], selector); - w7[0] = hc_byte_perm (w2[3], w2[2], selector); - w6[3] = hc_byte_perm (w2[2], w2[1], selector); - w6[2] = hc_byte_perm (w2[1], w2[0], selector); - w6[1] = hc_byte_perm (w2[0], w1[3], selector); - w6[0] = hc_byte_perm (w1[3], w1[2], selector); - w5[3] = hc_byte_perm (w1[2], w1[1], selector); - w5[2] = hc_byte_perm (w1[1], w1[0], selector); - w5[1] = hc_byte_perm (w1[0], w0[3], selector); - w5[0] = hc_byte_perm (w0[3], w0[2], selector); - w4[3] = hc_byte_perm (w0[2], w0[1], selector); - w4[2] = hc_byte_perm (w0[1], w0[0], selector); - w4[1] = hc_byte_perm (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm (w3[1], w3[0], selector); - w7[2] = hc_byte_perm (w3[0], w2[3], selector); - w7[1] = hc_byte_perm (w2[3], w2[2], selector); - w7[0] = hc_byte_perm (w2[2], w2[1], selector); - w6[3] = hc_byte_perm (w2[1], w2[0], selector); - w6[2] = hc_byte_perm (w2[0], w1[3], selector); - w6[1] = hc_byte_perm (w1[3], w1[2], selector); - w6[0] = hc_byte_perm (w1[2], w1[1], selector); - w5[3] = hc_byte_perm (w1[1], w1[0], selector); - w5[2] = hc_byte_perm (w1[0], w0[3], selector); - w5[1] = hc_byte_perm (w0[3], w0[2], selector); - w5[0] = hc_byte_perm (w0[2], w0[1], selector); - w4[3] = hc_byte_perm (w0[1], w0[0], selector); - w4[2] = hc_byte_perm (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm (w3[0], w2[3], selector); - w7[2] = hc_byte_perm (w2[3], w2[2], selector); - w7[1] = hc_byte_perm (w2[2], w2[1], selector); - w7[0] = hc_byte_perm (w2[1], w2[0], selector); - w6[3] = hc_byte_perm (w2[0], w1[3], selector); - w6[2] = hc_byte_perm (w1[3], w1[2], selector); - w6[1] = hc_byte_perm (w1[2], w1[1], selector); - w6[0] = hc_byte_perm (w1[1], w1[0], selector); - w5[3] = hc_byte_perm (w1[0], w0[3], selector); - w5[2] = hc_byte_perm (w0[3], w0[2], selector); - w5[1] = hc_byte_perm (w0[2], w0[1], selector); - w5[0] = hc_byte_perm (w0[1], w0[0], selector); - w4[3] = hc_byte_perm (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm (w2[3], w2[2], selector); - w7[2] = hc_byte_perm (w2[2], w2[1], selector); - w7[1] = hc_byte_perm (w2[1], w2[0], selector); - w7[0] = hc_byte_perm (w2[0], w1[3], selector); - w6[3] = hc_byte_perm (w1[3], w1[2], selector); - w6[2] = hc_byte_perm (w1[2], w1[1], selector); - w6[1] = hc_byte_perm (w1[1], w1[0], selector); - w6[0] = hc_byte_perm (w1[0], w0[3], selector); - w5[3] = hc_byte_perm (w0[3], w0[2], selector); - w5[2] = hc_byte_perm (w0[2], w0[1], selector); - w5[1] = hc_byte_perm (w0[1], w0[0], selector); - w5[0] = hc_byte_perm (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm (w2[2], w2[1], selector); - w7[2] = hc_byte_perm (w2[1], w2[0], selector); - w7[1] = hc_byte_perm (w2[0], w1[3], selector); - w7[0] = hc_byte_perm (w1[3], w1[2], selector); - w6[3] = hc_byte_perm (w1[2], w1[1], selector); - w6[2] = hc_byte_perm (w1[1], w1[0], selector); - w6[1] = hc_byte_perm (w1[0], w0[3], selector); - w6[0] = hc_byte_perm (w0[3], w0[2], selector); - w5[3] = hc_byte_perm (w0[2], w0[1], selector); - w5[2] = hc_byte_perm (w0[1], w0[0], selector); - w5[1] = hc_byte_perm (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm (w2[1], w2[0], selector); - w7[2] = hc_byte_perm (w2[0], w1[3], selector); - w7[1] = hc_byte_perm (w1[3], w1[2], selector); - w7[0] = hc_byte_perm (w1[2], w1[1], selector); - w6[3] = hc_byte_perm (w1[1], w1[0], selector); - w6[2] = hc_byte_perm (w1[0], w0[3], selector); - w6[1] = hc_byte_perm (w0[3], w0[2], selector); - w6[0] = hc_byte_perm (w0[2], w0[1], selector); - w5[3] = hc_byte_perm (w0[1], w0[0], selector); - w5[2] = hc_byte_perm (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm (w2[0], w1[3], selector); - w7[2] = hc_byte_perm (w1[3], w1[2], selector); - w7[1] = hc_byte_perm (w1[2], w1[1], selector); - w7[0] = hc_byte_perm (w1[1], w1[0], selector); - w6[3] = hc_byte_perm (w1[0], w0[3], selector); - w6[2] = hc_byte_perm (w0[3], w0[2], selector); - w6[1] = hc_byte_perm (w0[2], w0[1], selector); - w6[0] = hc_byte_perm (w0[1], w0[0], selector); - w5[3] = hc_byte_perm (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm (w1[3], w1[2], selector); - w7[2] = hc_byte_perm (w1[2], w1[1], selector); - w7[1] = hc_byte_perm (w1[1], w1[0], selector); - w7[0] = hc_byte_perm (w1[0], w0[3], selector); - w6[3] = hc_byte_perm (w0[3], w0[2], selector); - w6[2] = hc_byte_perm (w0[2], w0[1], selector); - w6[1] = hc_byte_perm (w0[1], w0[0], selector); - w6[0] = hc_byte_perm (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm (w1[2], w1[1], selector); - w7[2] = hc_byte_perm (w1[1], w1[0], selector); - w7[1] = hc_byte_perm (w1[0], w0[3], selector); - w7[0] = hc_byte_perm (w0[3], w0[2], selector); - w6[3] = hc_byte_perm (w0[2], w0[1], selector); - w6[2] = hc_byte_perm (w0[1], w0[0], selector); - w6[1] = hc_byte_perm (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm (w1[1], w1[0], selector); - w7[2] = hc_byte_perm (w1[0], w0[3], selector); - w7[1] = hc_byte_perm (w0[3], w0[2], selector); - w7[0] = hc_byte_perm (w0[2], w0[1], selector); - w6[3] = hc_byte_perm (w0[1], w0[0], selector); - w6[2] = hc_byte_perm (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm (w1[0], w0[3], selector); - w7[2] = hc_byte_perm (w0[3], w0[2], selector); - w7[1] = hc_byte_perm (w0[2], w0[1], selector); - w7[0] = hc_byte_perm (w0[1], w0[0], selector); - w6[3] = hc_byte_perm (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm (w0[3], w0[2], selector); - w7[2] = hc_byte_perm (w0[2], w0[1], selector); - w7[1] = hc_byte_perm (w0[1], w0[0], selector); - w7[0] = hc_byte_perm (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm (w0[2], w0[1], selector); - w7[2] = hc_byte_perm (w0[1], w0[0], selector); - w7[1] = hc_byte_perm (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm (w0[1], w0[0], selector); - w7[2] = hc_byte_perm (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -15742,1708 +10798,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (PRIVATE_AS u32x *w0, PRIVATE break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm ( 0, w7[3], selector); - w7[3] = hc_byte_perm (w7[3], w7[2], selector); - w7[2] = hc_byte_perm (w7[2], w7[1], selector); - w7[1] = hc_byte_perm (w7[1], w7[0], selector); - w7[0] = hc_byte_perm (w7[0], w6[3], selector); - w6[3] = hc_byte_perm (w6[3], w6[2], selector); - w6[2] = hc_byte_perm (w6[2], w6[1], selector); - w6[1] = hc_byte_perm (w6[1], w6[0], selector); - w6[0] = hc_byte_perm (w6[0], w5[3], selector); - w5[3] = hc_byte_perm (w5[3], w5[2], selector); - w5[2] = hc_byte_perm (w5[2], w5[1], selector); - w5[1] = hc_byte_perm (w5[1], w5[0], selector); - w5[0] = hc_byte_perm (w5[0], w4[3], selector); - w4[3] = hc_byte_perm (w4[3], w4[2], selector); - w4[2] = hc_byte_perm (w4[2], w4[1], selector); - w4[1] = hc_byte_perm (w4[1], w4[0], selector); - w4[0] = hc_byte_perm (w4[0], w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm ( 0, w7[3], selector); - c0[0] = hc_byte_perm (w7[3], w7[2], selector); - w7[3] = hc_byte_perm (w7[2], w7[1], selector); - w7[2] = hc_byte_perm (w7[1], w7[0], selector); - w7[1] = hc_byte_perm (w7[0], w6[3], selector); - w7[0] = hc_byte_perm (w6[3], w6[2], selector); - w6[3] = hc_byte_perm (w6[2], w6[1], selector); - w6[2] = hc_byte_perm (w6[1], w6[0], selector); - w6[1] = hc_byte_perm (w6[0], w5[3], selector); - w6[0] = hc_byte_perm (w5[3], w5[2], selector); - w5[3] = hc_byte_perm (w5[2], w5[1], selector); - w5[2] = hc_byte_perm (w5[1], w5[0], selector); - w5[1] = hc_byte_perm (w5[0], w4[3], selector); - w5[0] = hc_byte_perm (w4[3], w4[2], selector); - w4[3] = hc_byte_perm (w4[2], w4[1], selector); - w4[2] = hc_byte_perm (w4[1], w4[0], selector); - w4[1] = hc_byte_perm (w4[0], w3[3], selector); - w4[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm ( 0, w7[3], selector); - c0[1] = hc_byte_perm (w7[3], w7[2], selector); - c0[0] = hc_byte_perm (w7[2], w7[1], selector); - w7[3] = hc_byte_perm (w7[1], w7[0], selector); - w7[2] = hc_byte_perm (w7[0], w6[3], selector); - w7[1] = hc_byte_perm (w6[3], w6[2], selector); - w7[0] = hc_byte_perm (w6[2], w6[1], selector); - w6[3] = hc_byte_perm (w6[1], w6[0], selector); - w6[2] = hc_byte_perm (w6[0], w5[3], selector); - w6[1] = hc_byte_perm (w5[3], w5[2], selector); - w6[0] = hc_byte_perm (w5[2], w5[1], selector); - w5[3] = hc_byte_perm (w5[1], w5[0], selector); - w5[2] = hc_byte_perm (w5[0], w4[3], selector); - w5[1] = hc_byte_perm (w4[3], w4[2], selector); - w5[0] = hc_byte_perm (w4[2], w4[1], selector); - w4[3] = hc_byte_perm (w4[1], w4[0], selector); - w4[2] = hc_byte_perm (w4[0], w3[3], selector); - w4[1] = hc_byte_perm (w3[3], w3[2], selector); - w4[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm ( 0, w7[3], selector); - c0[2] = hc_byte_perm (w7[3], w7[2], selector); - c0[1] = hc_byte_perm (w7[2], w7[1], selector); - c0[0] = hc_byte_perm (w7[1], w7[0], selector); - w7[3] = hc_byte_perm (w7[0], w6[3], selector); - w7[2] = hc_byte_perm (w6[3], w6[2], selector); - w7[1] = hc_byte_perm (w6[2], w6[1], selector); - w7[0] = hc_byte_perm (w6[1], w6[0], selector); - w6[3] = hc_byte_perm (w6[0], w5[3], selector); - w6[2] = hc_byte_perm (w5[3], w5[2], selector); - w6[1] = hc_byte_perm (w5[2], w5[1], selector); - w6[0] = hc_byte_perm (w5[1], w5[0], selector); - w5[3] = hc_byte_perm (w5[0], w4[3], selector); - w5[2] = hc_byte_perm (w4[3], w4[2], selector); - w5[1] = hc_byte_perm (w4[2], w4[1], selector); - w5[0] = hc_byte_perm (w4[1], w4[0], selector); - w4[3] = hc_byte_perm (w4[0], w3[3], selector); - w4[2] = hc_byte_perm (w3[3], w3[2], selector); - w4[1] = hc_byte_perm (w3[2], w3[1], selector); - w4[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm ( 0, w7[3], selector); - c0[3] = hc_byte_perm (w7[3], w7[2], selector); - c0[2] = hc_byte_perm (w7[2], w7[1], selector); - c0[1] = hc_byte_perm (w7[1], w7[0], selector); - c0[0] = hc_byte_perm (w7[0], w6[3], selector); - w7[3] = hc_byte_perm (w6[3], w6[2], selector); - w7[2] = hc_byte_perm (w6[2], w6[1], selector); - w7[1] = hc_byte_perm (w6[1], w6[0], selector); - w7[0] = hc_byte_perm (w6[0], w5[3], selector); - w6[3] = hc_byte_perm (w5[3], w5[2], selector); - w6[2] = hc_byte_perm (w5[2], w5[1], selector); - w6[1] = hc_byte_perm (w5[1], w5[0], selector); - w6[0] = hc_byte_perm (w5[0], w4[3], selector); - w5[3] = hc_byte_perm (w4[3], w4[2], selector); - w5[2] = hc_byte_perm (w4[2], w4[1], selector); - w5[1] = hc_byte_perm (w4[1], w4[0], selector); - w5[0] = hc_byte_perm (w4[0], w3[3], selector); - w4[3] = hc_byte_perm (w3[3], w3[2], selector); - w4[2] = hc_byte_perm (w3[2], w3[1], selector); - w4[1] = hc_byte_perm (w3[1], w3[0], selector); - w4[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm ( 0, w7[3], selector); - c1[0] = hc_byte_perm (w7[3], w7[2], selector); - c0[3] = hc_byte_perm (w7[2], w7[1], selector); - c0[2] = hc_byte_perm (w7[1], w7[0], selector); - c0[1] = hc_byte_perm (w7[0], w6[3], selector); - c0[0] = hc_byte_perm (w6[3], w6[2], selector); - w7[3] = hc_byte_perm (w6[2], w6[1], selector); - w7[2] = hc_byte_perm (w6[1], w6[0], selector); - w7[1] = hc_byte_perm (w6[0], w5[3], selector); - w7[0] = hc_byte_perm (w5[3], w5[2], selector); - w6[3] = hc_byte_perm (w5[2], w5[1], selector); - w6[2] = hc_byte_perm (w5[1], w5[0], selector); - w6[1] = hc_byte_perm (w5[0], w4[3], selector); - w6[0] = hc_byte_perm (w4[3], w4[2], selector); - w5[3] = hc_byte_perm (w4[2], w4[1], selector); - w5[2] = hc_byte_perm (w4[1], w4[0], selector); - w5[1] = hc_byte_perm (w4[0], w3[3], selector); - w5[0] = hc_byte_perm (w3[3], w3[2], selector); - w4[3] = hc_byte_perm (w3[2], w3[1], selector); - w4[2] = hc_byte_perm (w3[1], w3[0], selector); - w4[1] = hc_byte_perm (w3[0], w2[3], selector); - w4[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm ( 0, w7[3], selector); - c1[1] = hc_byte_perm (w7[3], w7[2], selector); - c1[0] = hc_byte_perm (w7[2], w7[1], selector); - c0[3] = hc_byte_perm (w7[1], w7[0], selector); - c0[2] = hc_byte_perm (w7[0], w6[3], selector); - c0[1] = hc_byte_perm (w6[3], w6[2], selector); - c0[0] = hc_byte_perm (w6[2], w6[1], selector); - w7[3] = hc_byte_perm (w6[1], w6[0], selector); - w7[2] = hc_byte_perm (w6[0], w5[3], selector); - w7[1] = hc_byte_perm (w5[3], w5[2], selector); - w7[0] = hc_byte_perm (w5[2], w5[1], selector); - w6[3] = hc_byte_perm (w5[1], w5[0], selector); - w6[2] = hc_byte_perm (w5[0], w4[3], selector); - w6[1] = hc_byte_perm (w4[3], w4[2], selector); - w6[0] = hc_byte_perm (w4[2], w4[1], selector); - w5[3] = hc_byte_perm (w4[1], w4[0], selector); - w5[2] = hc_byte_perm (w4[0], w3[3], selector); - w5[1] = hc_byte_perm (w3[3], w3[2], selector); - w5[0] = hc_byte_perm (w3[2], w3[1], selector); - w4[3] = hc_byte_perm (w3[1], w3[0], selector); - w4[2] = hc_byte_perm (w3[0], w2[3], selector); - w4[1] = hc_byte_perm (w2[3], w2[2], selector); - w4[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm ( 0, w7[3], selector); - c1[2] = hc_byte_perm (w7[3], w7[2], selector); - c1[1] = hc_byte_perm (w7[2], w7[1], selector); - c1[0] = hc_byte_perm (w7[1], w7[0], selector); - c0[3] = hc_byte_perm (w7[0], w6[3], selector); - c0[2] = hc_byte_perm (w6[3], w6[2], selector); - c0[1] = hc_byte_perm (w6[2], w6[1], selector); - c0[0] = hc_byte_perm (w6[1], w6[0], selector); - w7[3] = hc_byte_perm (w6[0], w5[3], selector); - w7[2] = hc_byte_perm (w5[3], w5[2], selector); - w7[1] = hc_byte_perm (w5[2], w5[1], selector); - w7[0] = hc_byte_perm (w5[1], w5[0], selector); - w6[3] = hc_byte_perm (w5[0], w4[3], selector); - w6[2] = hc_byte_perm (w4[3], w4[2], selector); - w6[1] = hc_byte_perm (w4[2], w4[1], selector); - w6[0] = hc_byte_perm (w4[1], w4[0], selector); - w5[3] = hc_byte_perm (w4[0], w3[3], selector); - w5[2] = hc_byte_perm (w3[3], w3[2], selector); - w5[1] = hc_byte_perm (w3[2], w3[1], selector); - w5[0] = hc_byte_perm (w3[1], w3[0], selector); - w4[3] = hc_byte_perm (w3[0], w2[3], selector); - w4[2] = hc_byte_perm (w2[3], w2[2], selector); - w4[1] = hc_byte_perm (w2[2], w2[1], selector); - w4[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm ( 0, w7[3], selector); - c1[3] = hc_byte_perm (w7[3], w7[2], selector); - c1[2] = hc_byte_perm (w7[2], w7[1], selector); - c1[1] = hc_byte_perm (w7[1], w7[0], selector); - c1[0] = hc_byte_perm (w7[0], w6[3], selector); - c0[3] = hc_byte_perm (w6[3], w6[2], selector); - c0[2] = hc_byte_perm (w6[2], w6[1], selector); - c0[1] = hc_byte_perm (w6[1], w6[0], selector); - c0[0] = hc_byte_perm (w6[0], w5[3], selector); - w7[3] = hc_byte_perm (w5[3], w5[2], selector); - w7[2] = hc_byte_perm (w5[2], w5[1], selector); - w7[1] = hc_byte_perm (w5[1], w5[0], selector); - w7[0] = hc_byte_perm (w5[0], w4[3], selector); - w6[3] = hc_byte_perm (w4[3], w4[2], selector); - w6[2] = hc_byte_perm (w4[2], w4[1], selector); - w6[1] = hc_byte_perm (w4[1], w4[0], selector); - w6[0] = hc_byte_perm (w4[0], w3[3], selector); - w5[3] = hc_byte_perm (w3[3], w3[2], selector); - w5[2] = hc_byte_perm (w3[2], w3[1], selector); - w5[1] = hc_byte_perm (w3[1], w3[0], selector); - w5[0] = hc_byte_perm (w3[0], w2[3], selector); - w4[3] = hc_byte_perm (w2[3], w2[2], selector); - w4[2] = hc_byte_perm (w2[2], w2[1], selector); - w4[1] = hc_byte_perm (w2[1], w2[0], selector); - w4[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm ( 0, w7[3], selector); - c2[0] = hc_byte_perm (w7[3], w7[2], selector); - c1[3] = hc_byte_perm (w7[2], w7[1], selector); - c1[2] = hc_byte_perm (w7[1], w7[0], selector); - c1[1] = hc_byte_perm (w7[0], w6[3], selector); - c1[0] = hc_byte_perm (w6[3], w6[2], selector); - c0[3] = hc_byte_perm (w6[2], w6[1], selector); - c0[2] = hc_byte_perm (w6[1], w6[0], selector); - c0[1] = hc_byte_perm (w6[0], w5[3], selector); - c0[0] = hc_byte_perm (w5[3], w5[2], selector); - w7[3] = hc_byte_perm (w5[2], w5[1], selector); - w7[2] = hc_byte_perm (w5[1], w5[0], selector); - w7[1] = hc_byte_perm (w5[0], w4[3], selector); - w7[0] = hc_byte_perm (w4[3], w4[2], selector); - w6[3] = hc_byte_perm (w4[2], w4[1], selector); - w6[2] = hc_byte_perm (w4[1], w4[0], selector); - w6[1] = hc_byte_perm (w4[0], w3[3], selector); - w6[0] = hc_byte_perm (w3[3], w3[2], selector); - w5[3] = hc_byte_perm (w3[2], w3[1], selector); - w5[2] = hc_byte_perm (w3[1], w3[0], selector); - w5[1] = hc_byte_perm (w3[0], w2[3], selector); - w5[0] = hc_byte_perm (w2[3], w2[2], selector); - w4[3] = hc_byte_perm (w2[2], w2[1], selector); - w4[2] = hc_byte_perm (w2[1], w2[0], selector); - w4[1] = hc_byte_perm (w2[0], w1[3], selector); - w4[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm ( 0, w7[3], selector); - c2[1] = hc_byte_perm (w7[3], w7[2], selector); - c2[0] = hc_byte_perm (w7[2], w7[1], selector); - c1[3] = hc_byte_perm (w7[1], w7[0], selector); - c1[2] = hc_byte_perm (w7[0], w6[3], selector); - c1[1] = hc_byte_perm (w6[3], w6[2], selector); - c1[0] = hc_byte_perm (w6[2], w6[1], selector); - c0[3] = hc_byte_perm (w6[1], w6[0], selector); - c0[2] = hc_byte_perm (w6[0], w5[3], selector); - c0[1] = hc_byte_perm (w5[3], w5[2], selector); - c0[0] = hc_byte_perm (w5[2], w5[1], selector); - w7[3] = hc_byte_perm (w5[1], w5[0], selector); - w7[2] = hc_byte_perm (w5[0], w4[3], selector); - w7[1] = hc_byte_perm (w4[3], w4[2], selector); - w7[0] = hc_byte_perm (w4[2], w4[1], selector); - w6[3] = hc_byte_perm (w4[1], w4[0], selector); - w6[2] = hc_byte_perm (w4[0], w3[3], selector); - w6[1] = hc_byte_perm (w3[3], w3[2], selector); - w6[0] = hc_byte_perm (w3[2], w3[1], selector); - w5[3] = hc_byte_perm (w3[1], w3[0], selector); - w5[2] = hc_byte_perm (w3[0], w2[3], selector); - w5[1] = hc_byte_perm (w2[3], w2[2], selector); - w5[0] = hc_byte_perm (w2[2], w2[1], selector); - w4[3] = hc_byte_perm (w2[1], w2[0], selector); - w4[2] = hc_byte_perm (w2[0], w1[3], selector); - w4[1] = hc_byte_perm (w1[3], w1[2], selector); - w4[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm ( 0, w7[3], selector); - c2[2] = hc_byte_perm (w7[3], w7[2], selector); - c2[1] = hc_byte_perm (w7[2], w7[1], selector); - c2[0] = hc_byte_perm (w7[1], w7[0], selector); - c1[3] = hc_byte_perm (w7[0], w6[3], selector); - c1[2] = hc_byte_perm (w6[3], w6[2], selector); - c1[1] = hc_byte_perm (w6[2], w6[1], selector); - c1[0] = hc_byte_perm (w6[1], w6[0], selector); - c0[3] = hc_byte_perm (w6[0], w5[3], selector); - c0[2] = hc_byte_perm (w5[3], w5[2], selector); - c0[1] = hc_byte_perm (w5[2], w5[1], selector); - c0[0] = hc_byte_perm (w5[1], w5[0], selector); - w7[3] = hc_byte_perm (w5[0], w4[3], selector); - w7[2] = hc_byte_perm (w4[3], w4[2], selector); - w7[1] = hc_byte_perm (w4[2], w4[1], selector); - w7[0] = hc_byte_perm (w4[1], w4[0], selector); - w6[3] = hc_byte_perm (w4[0], w3[3], selector); - w6[2] = hc_byte_perm (w3[3], w3[2], selector); - w6[1] = hc_byte_perm (w3[2], w3[1], selector); - w6[0] = hc_byte_perm (w3[1], w3[0], selector); - w5[3] = hc_byte_perm (w3[0], w2[3], selector); - w5[2] = hc_byte_perm (w2[3], w2[2], selector); - w5[1] = hc_byte_perm (w2[2], w2[1], selector); - w5[0] = hc_byte_perm (w2[1], w2[0], selector); - w4[3] = hc_byte_perm (w2[0], w1[3], selector); - w4[2] = hc_byte_perm (w1[3], w1[2], selector); - w4[1] = hc_byte_perm (w1[2], w1[1], selector); - w4[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm ( 0, w7[3], selector); - c2[3] = hc_byte_perm (w7[3], w7[2], selector); - c2[2] = hc_byte_perm (w7[2], w7[1], selector); - c2[1] = hc_byte_perm (w7[1], w7[0], selector); - c2[0] = hc_byte_perm (w7[0], w6[3], selector); - c1[3] = hc_byte_perm (w6[3], w6[2], selector); - c1[2] = hc_byte_perm (w6[2], w6[1], selector); - c1[1] = hc_byte_perm (w6[1], w6[0], selector); - c1[0] = hc_byte_perm (w6[0], w5[3], selector); - c0[3] = hc_byte_perm (w5[3], w5[2], selector); - c0[2] = hc_byte_perm (w5[2], w5[1], selector); - c0[1] = hc_byte_perm (w5[1], w5[0], selector); - c0[0] = hc_byte_perm (w5[0], w4[3], selector); - w7[3] = hc_byte_perm (w4[3], w4[2], selector); - w7[2] = hc_byte_perm (w4[2], w4[1], selector); - w7[1] = hc_byte_perm (w4[1], w4[0], selector); - w7[0] = hc_byte_perm (w4[0], w3[3], selector); - w6[3] = hc_byte_perm (w3[3], w3[2], selector); - w6[2] = hc_byte_perm (w3[2], w3[1], selector); - w6[1] = hc_byte_perm (w3[1], w3[0], selector); - w6[0] = hc_byte_perm (w3[0], w2[3], selector); - w5[3] = hc_byte_perm (w2[3], w2[2], selector); - w5[2] = hc_byte_perm (w2[2], w2[1], selector); - w5[1] = hc_byte_perm (w2[1], w2[0], selector); - w5[0] = hc_byte_perm (w2[0], w1[3], selector); - w4[3] = hc_byte_perm (w1[3], w1[2], selector); - w4[2] = hc_byte_perm (w1[2], w1[1], selector); - w4[1] = hc_byte_perm (w1[1], w1[0], selector); - w4[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm ( 0, w7[3], selector); - c3[0] = hc_byte_perm (w7[3], w7[2], selector); - c2[3] = hc_byte_perm (w7[2], w7[1], selector); - c2[2] = hc_byte_perm (w7[1], w7[0], selector); - c2[1] = hc_byte_perm (w7[0], w6[3], selector); - c2[0] = hc_byte_perm (w6[3], w6[2], selector); - c1[3] = hc_byte_perm (w6[2], w6[1], selector); - c1[2] = hc_byte_perm (w6[1], w6[0], selector); - c1[1] = hc_byte_perm (w6[0], w5[3], selector); - c1[0] = hc_byte_perm (w5[3], w5[2], selector); - c0[3] = hc_byte_perm (w5[2], w5[1], selector); - c0[2] = hc_byte_perm (w5[1], w5[0], selector); - c0[1] = hc_byte_perm (w5[0], w4[3], selector); - c0[0] = hc_byte_perm (w4[3], w4[2], selector); - w7[3] = hc_byte_perm (w4[2], w4[1], selector); - w7[2] = hc_byte_perm (w4[1], w4[0], selector); - w7[1] = hc_byte_perm (w4[0], w3[3], selector); - w7[0] = hc_byte_perm (w3[3], w3[2], selector); - w6[3] = hc_byte_perm (w3[2], w3[1], selector); - w6[2] = hc_byte_perm (w3[1], w3[0], selector); - w6[1] = hc_byte_perm (w3[0], w2[3], selector); - w6[0] = hc_byte_perm (w2[3], w2[2], selector); - w5[3] = hc_byte_perm (w2[2], w2[1], selector); - w5[2] = hc_byte_perm (w2[1], w2[0], selector); - w5[1] = hc_byte_perm (w2[0], w1[3], selector); - w5[0] = hc_byte_perm (w1[3], w1[2], selector); - w4[3] = hc_byte_perm (w1[2], w1[1], selector); - w4[2] = hc_byte_perm (w1[1], w1[0], selector); - w4[1] = hc_byte_perm (w1[0], w0[3], selector); - w4[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm ( 0, w7[3], selector); - c3[1] = hc_byte_perm (w7[3], w7[2], selector); - c3[0] = hc_byte_perm (w7[2], w7[1], selector); - c2[3] = hc_byte_perm (w7[1], w7[0], selector); - c2[2] = hc_byte_perm (w7[0], w6[3], selector); - c2[1] = hc_byte_perm (w6[3], w6[2], selector); - c2[0] = hc_byte_perm (w6[2], w6[1], selector); - c1[3] = hc_byte_perm (w6[1], w6[0], selector); - c1[2] = hc_byte_perm (w6[0], w5[3], selector); - c1[1] = hc_byte_perm (w5[3], w5[2], selector); - c1[0] = hc_byte_perm (w5[2], w5[1], selector); - c0[3] = hc_byte_perm (w5[1], w5[0], selector); - c0[2] = hc_byte_perm (w5[0], w4[3], selector); - c0[1] = hc_byte_perm (w4[3], w4[2], selector); - c0[0] = hc_byte_perm (w4[2], w4[1], selector); - w7[3] = hc_byte_perm (w4[1], w4[0], selector); - w7[2] = hc_byte_perm (w4[0], w3[3], selector); - w7[1] = hc_byte_perm (w3[3], w3[2], selector); - w7[0] = hc_byte_perm (w3[2], w3[1], selector); - w6[3] = hc_byte_perm (w3[1], w3[0], selector); - w6[2] = hc_byte_perm (w3[0], w2[3], selector); - w6[1] = hc_byte_perm (w2[3], w2[2], selector); - w6[0] = hc_byte_perm (w2[2], w2[1], selector); - w5[3] = hc_byte_perm (w2[1], w2[0], selector); - w5[2] = hc_byte_perm (w2[0], w1[3], selector); - w5[1] = hc_byte_perm (w1[3], w1[2], selector); - w5[0] = hc_byte_perm (w1[2], w1[1], selector); - w4[3] = hc_byte_perm (w1[1], w1[0], selector); - w4[2] = hc_byte_perm (w1[0], w0[3], selector); - w4[1] = hc_byte_perm (w0[3], w0[2], selector); - w4[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm ( 0, w7[3], selector); - c3[2] = hc_byte_perm (w7[3], w7[2], selector); - c3[1] = hc_byte_perm (w7[2], w7[1], selector); - c3[0] = hc_byte_perm (w7[1], w7[0], selector); - c2[3] = hc_byte_perm (w7[0], w6[3], selector); - c2[2] = hc_byte_perm (w6[3], w6[2], selector); - c2[1] = hc_byte_perm (w6[2], w6[1], selector); - c2[0] = hc_byte_perm (w6[1], w6[0], selector); - c1[3] = hc_byte_perm (w6[0], w5[3], selector); - c1[2] = hc_byte_perm (w5[3], w5[2], selector); - c1[1] = hc_byte_perm (w5[2], w5[1], selector); - c1[0] = hc_byte_perm (w5[1], w5[0], selector); - c0[3] = hc_byte_perm (w5[0], w4[3], selector); - c0[2] = hc_byte_perm (w4[3], w4[2], selector); - c0[1] = hc_byte_perm (w4[2], w4[1], selector); - c0[0] = hc_byte_perm (w4[1], w4[0], selector); - w7[3] = hc_byte_perm (w4[0], w3[3], selector); - w7[2] = hc_byte_perm (w3[3], w3[2], selector); - w7[1] = hc_byte_perm (w3[2], w3[1], selector); - w7[0] = hc_byte_perm (w3[1], w3[0], selector); - w6[3] = hc_byte_perm (w3[0], w2[3], selector); - w6[2] = hc_byte_perm (w2[3], w2[2], selector); - w6[1] = hc_byte_perm (w2[2], w2[1], selector); - w6[0] = hc_byte_perm (w2[1], w2[0], selector); - w5[3] = hc_byte_perm (w2[0], w1[3], selector); - w5[2] = hc_byte_perm (w1[3], w1[2], selector); - w5[1] = hc_byte_perm (w1[2], w1[1], selector); - w5[0] = hc_byte_perm (w1[1], w1[0], selector); - w4[3] = hc_byte_perm (w1[0], w0[3], selector); - w4[2] = hc_byte_perm (w0[3], w0[2], selector); - w4[1] = hc_byte_perm (w0[2], w0[1], selector); - w4[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm ( 0, w7[3], selector); - c3[3] = hc_byte_perm (w7[3], w7[2], selector); - c3[2] = hc_byte_perm (w7[2], w7[1], selector); - c3[1] = hc_byte_perm (w7[1], w7[0], selector); - c3[0] = hc_byte_perm (w7[0], w6[3], selector); - c2[3] = hc_byte_perm (w6[3], w6[2], selector); - c2[2] = hc_byte_perm (w6[2], w6[1], selector); - c2[1] = hc_byte_perm (w6[1], w6[0], selector); - c2[0] = hc_byte_perm (w6[0], w5[3], selector); - c1[3] = hc_byte_perm (w5[3], w5[2], selector); - c1[2] = hc_byte_perm (w5[2], w5[1], selector); - c1[1] = hc_byte_perm (w5[1], w5[0], selector); - c1[0] = hc_byte_perm (w5[0], w4[3], selector); - c0[3] = hc_byte_perm (w4[3], w4[2], selector); - c0[2] = hc_byte_perm (w4[2], w4[1], selector); - c0[1] = hc_byte_perm (w4[1], w4[0], selector); - c0[0] = hc_byte_perm (w4[0], w3[3], selector); - w7[3] = hc_byte_perm (w3[3], w3[2], selector); - w7[2] = hc_byte_perm (w3[2], w3[1], selector); - w7[1] = hc_byte_perm (w3[1], w3[0], selector); - w7[0] = hc_byte_perm (w3[0], w2[3], selector); - w6[3] = hc_byte_perm (w2[3], w2[2], selector); - w6[2] = hc_byte_perm (w2[2], w2[1], selector); - w6[1] = hc_byte_perm (w2[1], w2[0], selector); - w6[0] = hc_byte_perm (w2[0], w1[3], selector); - w5[3] = hc_byte_perm (w1[3], w1[2], selector); - w5[2] = hc_byte_perm (w1[2], w1[1], selector); - w5[1] = hc_byte_perm (w1[1], w1[0], selector); - w5[0] = hc_byte_perm (w1[0], w0[3], selector); - w4[3] = hc_byte_perm (w0[3], w0[2], selector); - w4[2] = hc_byte_perm (w0[2], w0[1], selector); - w4[1] = hc_byte_perm (w0[1], w0[0], selector); - w4[0] = hc_byte_perm (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm ( 0, w7[3], selector); - c4[0] = hc_byte_perm (w7[3], w7[2], selector); - c3[3] = hc_byte_perm (w7[2], w7[1], selector); - c3[2] = hc_byte_perm (w7[1], w7[0], selector); - c3[1] = hc_byte_perm (w7[0], w6[3], selector); - c3[0] = hc_byte_perm (w6[3], w6[2], selector); - c2[3] = hc_byte_perm (w6[2], w6[1], selector); - c2[2] = hc_byte_perm (w6[1], w6[0], selector); - c2[1] = hc_byte_perm (w6[0], w5[3], selector); - c2[0] = hc_byte_perm (w5[3], w5[2], selector); - c1[3] = hc_byte_perm (w5[2], w5[1], selector); - c1[2] = hc_byte_perm (w5[1], w5[0], selector); - c1[1] = hc_byte_perm (w5[0], w4[3], selector); - c1[0] = hc_byte_perm (w4[3], w4[2], selector); - c0[3] = hc_byte_perm (w4[2], w4[1], selector); - c0[2] = hc_byte_perm (w4[1], w4[0], selector); - c0[1] = hc_byte_perm (w4[0], w3[3], selector); - c0[0] = hc_byte_perm (w3[3], w3[2], selector); - w7[3] = hc_byte_perm (w3[2], w3[1], selector); - w7[2] = hc_byte_perm (w3[1], w3[0], selector); - w7[1] = hc_byte_perm (w3[0], w2[3], selector); - w7[0] = hc_byte_perm (w2[3], w2[2], selector); - w6[3] = hc_byte_perm (w2[2], w2[1], selector); - w6[2] = hc_byte_perm (w2[1], w2[0], selector); - w6[1] = hc_byte_perm (w2[0], w1[3], selector); - w6[0] = hc_byte_perm (w1[3], w1[2], selector); - w5[3] = hc_byte_perm (w1[2], w1[1], selector); - w5[2] = hc_byte_perm (w1[1], w1[0], selector); - w5[1] = hc_byte_perm (w1[0], w0[3], selector); - w5[0] = hc_byte_perm (w0[3], w0[2], selector); - w4[3] = hc_byte_perm (w0[2], w0[1], selector); - w4[2] = hc_byte_perm (w0[1], w0[0], selector); - w4[1] = hc_byte_perm (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm ( 0, w7[3], selector); - c4[1] = hc_byte_perm (w7[3], w7[2], selector); - c4[0] = hc_byte_perm (w7[2], w7[1], selector); - c3[3] = hc_byte_perm (w7[1], w7[0], selector); - c3[2] = hc_byte_perm (w7[0], w6[3], selector); - c3[1] = hc_byte_perm (w6[3], w6[2], selector); - c3[0] = hc_byte_perm (w6[2], w6[1], selector); - c2[3] = hc_byte_perm (w6[1], w6[0], selector); - c2[2] = hc_byte_perm (w6[0], w5[3], selector); - c2[1] = hc_byte_perm (w5[3], w5[2], selector); - c2[0] = hc_byte_perm (w5[2], w5[1], selector); - c1[3] = hc_byte_perm (w5[1], w5[0], selector); - c1[2] = hc_byte_perm (w5[0], w4[3], selector); - c1[1] = hc_byte_perm (w4[3], w4[2], selector); - c1[0] = hc_byte_perm (w4[2], w4[1], selector); - c0[3] = hc_byte_perm (w4[1], w4[0], selector); - c0[2] = hc_byte_perm (w4[0], w3[3], selector); - c0[1] = hc_byte_perm (w3[3], w3[2], selector); - c0[0] = hc_byte_perm (w3[2], w3[1], selector); - w7[3] = hc_byte_perm (w3[1], w3[0], selector); - w7[2] = hc_byte_perm (w3[0], w2[3], selector); - w7[1] = hc_byte_perm (w2[3], w2[2], selector); - w7[0] = hc_byte_perm (w2[2], w2[1], selector); - w6[3] = hc_byte_perm (w2[1], w2[0], selector); - w6[2] = hc_byte_perm (w2[0], w1[3], selector); - w6[1] = hc_byte_perm (w1[3], w1[2], selector); - w6[0] = hc_byte_perm (w1[2], w1[1], selector); - w5[3] = hc_byte_perm (w1[1], w1[0], selector); - w5[2] = hc_byte_perm (w1[0], w0[3], selector); - w5[1] = hc_byte_perm (w0[3], w0[2], selector); - w5[0] = hc_byte_perm (w0[2], w0[1], selector); - w4[3] = hc_byte_perm (w0[1], w0[0], selector); - w4[2] = hc_byte_perm (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm ( 0, w7[3], selector); - c4[2] = hc_byte_perm (w7[3], w7[2], selector); - c4[1] = hc_byte_perm (w7[2], w7[1], selector); - c4[0] = hc_byte_perm (w7[1], w7[0], selector); - c3[3] = hc_byte_perm (w7[0], w6[3], selector); - c3[2] = hc_byte_perm (w6[3], w6[2], selector); - c3[1] = hc_byte_perm (w6[2], w6[1], selector); - c3[0] = hc_byte_perm (w6[1], w6[0], selector); - c2[3] = hc_byte_perm (w6[0], w5[3], selector); - c2[2] = hc_byte_perm (w5[3], w5[2], selector); - c2[1] = hc_byte_perm (w5[2], w5[1], selector); - c2[0] = hc_byte_perm (w5[1], w5[0], selector); - c1[3] = hc_byte_perm (w5[0], w4[3], selector); - c1[2] = hc_byte_perm (w4[3], w4[2], selector); - c1[1] = hc_byte_perm (w4[2], w4[1], selector); - c1[0] = hc_byte_perm (w4[1], w4[0], selector); - c0[3] = hc_byte_perm (w4[0], w3[3], selector); - c0[2] = hc_byte_perm (w3[3], w3[2], selector); - c0[1] = hc_byte_perm (w3[2], w3[1], selector); - c0[0] = hc_byte_perm (w3[1], w3[0], selector); - w7[3] = hc_byte_perm (w3[0], w2[3], selector); - w7[2] = hc_byte_perm (w2[3], w2[2], selector); - w7[1] = hc_byte_perm (w2[2], w2[1], selector); - w7[0] = hc_byte_perm (w2[1], w2[0], selector); - w6[3] = hc_byte_perm (w2[0], w1[3], selector); - w6[2] = hc_byte_perm (w1[3], w1[2], selector); - w6[1] = hc_byte_perm (w1[2], w1[1], selector); - w6[0] = hc_byte_perm (w1[1], w1[0], selector); - w5[3] = hc_byte_perm (w1[0], w0[3], selector); - w5[2] = hc_byte_perm (w0[3], w0[2], selector); - w5[1] = hc_byte_perm (w0[2], w0[1], selector); - w5[0] = hc_byte_perm (w0[1], w0[0], selector); - w4[3] = hc_byte_perm (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm ( 0, w7[3], selector); - c4[3] = hc_byte_perm (w7[3], w7[2], selector); - c4[2] = hc_byte_perm (w7[2], w7[1], selector); - c4[1] = hc_byte_perm (w7[1], w7[0], selector); - c4[0] = hc_byte_perm (w7[0], w6[3], selector); - c3[3] = hc_byte_perm (w6[3], w6[2], selector); - c3[2] = hc_byte_perm (w6[2], w6[1], selector); - c3[1] = hc_byte_perm (w6[1], w6[0], selector); - c3[0] = hc_byte_perm (w6[0], w5[3], selector); - c2[3] = hc_byte_perm (w5[3], w5[2], selector); - c2[2] = hc_byte_perm (w5[2], w5[1], selector); - c2[1] = hc_byte_perm (w5[1], w5[0], selector); - c2[0] = hc_byte_perm (w5[0], w4[3], selector); - c1[3] = hc_byte_perm (w4[3], w4[2], selector); - c1[2] = hc_byte_perm (w4[2], w4[1], selector); - c1[1] = hc_byte_perm (w4[1], w4[0], selector); - c1[0] = hc_byte_perm (w4[0], w3[3], selector); - c0[3] = hc_byte_perm (w3[3], w3[2], selector); - c0[2] = hc_byte_perm (w3[2], w3[1], selector); - c0[1] = hc_byte_perm (w3[1], w3[0], selector); - c0[0] = hc_byte_perm (w3[0], w2[3], selector); - w7[3] = hc_byte_perm (w2[3], w2[2], selector); - w7[2] = hc_byte_perm (w2[2], w2[1], selector); - w7[1] = hc_byte_perm (w2[1], w2[0], selector); - w7[0] = hc_byte_perm (w2[0], w1[3], selector); - w6[3] = hc_byte_perm (w1[3], w1[2], selector); - w6[2] = hc_byte_perm (w1[2], w1[1], selector); - w6[1] = hc_byte_perm (w1[1], w1[0], selector); - w6[0] = hc_byte_perm (w1[0], w0[3], selector); - w5[3] = hc_byte_perm (w0[3], w0[2], selector); - w5[2] = hc_byte_perm (w0[2], w0[1], selector); - w5[1] = hc_byte_perm (w0[1], w0[0], selector); - w5[0] = hc_byte_perm (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm ( 0, w7[3], selector); - c5[0] = hc_byte_perm (w7[3], w7[2], selector); - c4[3] = hc_byte_perm (w7[2], w7[1], selector); - c4[2] = hc_byte_perm (w7[1], w7[0], selector); - c4[1] = hc_byte_perm (w7[0], w6[3], selector); - c4[0] = hc_byte_perm (w6[3], w6[2], selector); - c3[3] = hc_byte_perm (w6[2], w6[1], selector); - c3[2] = hc_byte_perm (w6[1], w6[0], selector); - c3[1] = hc_byte_perm (w6[0], w5[3], selector); - c3[0] = hc_byte_perm (w5[3], w5[2], selector); - c2[3] = hc_byte_perm (w5[2], w5[1], selector); - c2[2] = hc_byte_perm (w5[1], w5[0], selector); - c2[1] = hc_byte_perm (w5[0], w4[3], selector); - c2[0] = hc_byte_perm (w4[3], w4[2], selector); - c1[3] = hc_byte_perm (w4[2], w4[1], selector); - c1[2] = hc_byte_perm (w4[1], w4[0], selector); - c1[1] = hc_byte_perm (w4[0], w3[3], selector); - c1[0] = hc_byte_perm (w3[3], w3[2], selector); - c0[3] = hc_byte_perm (w3[2], w3[1], selector); - c0[2] = hc_byte_perm (w3[1], w3[0], selector); - c0[1] = hc_byte_perm (w3[0], w2[3], selector); - c0[0] = hc_byte_perm (w2[3], w2[2], selector); - w7[3] = hc_byte_perm (w2[2], w2[1], selector); - w7[2] = hc_byte_perm (w2[1], w2[0], selector); - w7[1] = hc_byte_perm (w2[0], w1[3], selector); - w7[0] = hc_byte_perm (w1[3], w1[2], selector); - w6[3] = hc_byte_perm (w1[2], w1[1], selector); - w6[2] = hc_byte_perm (w1[1], w1[0], selector); - w6[1] = hc_byte_perm (w1[0], w0[3], selector); - w6[0] = hc_byte_perm (w0[3], w0[2], selector); - w5[3] = hc_byte_perm (w0[2], w0[1], selector); - w5[2] = hc_byte_perm (w0[1], w0[0], selector); - w5[1] = hc_byte_perm (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm ( 0, w7[3], selector); - c5[1] = hc_byte_perm (w7[3], w7[2], selector); - c5[0] = hc_byte_perm (w7[2], w7[1], selector); - c4[3] = hc_byte_perm (w7[1], w7[0], selector); - c4[2] = hc_byte_perm (w7[0], w6[3], selector); - c4[1] = hc_byte_perm (w6[3], w6[2], selector); - c4[0] = hc_byte_perm (w6[2], w6[1], selector); - c3[3] = hc_byte_perm (w6[1], w6[0], selector); - c3[2] = hc_byte_perm (w6[0], w5[3], selector); - c3[1] = hc_byte_perm (w5[3], w5[2], selector); - c3[0] = hc_byte_perm (w5[2], w5[1], selector); - c2[3] = hc_byte_perm (w5[1], w5[0], selector); - c2[2] = hc_byte_perm (w5[0], w4[3], selector); - c2[1] = hc_byte_perm (w4[3], w4[2], selector); - c2[0] = hc_byte_perm (w4[2], w4[1], selector); - c1[3] = hc_byte_perm (w4[1], w4[0], selector); - c1[2] = hc_byte_perm (w4[0], w3[3], selector); - c1[1] = hc_byte_perm (w3[3], w3[2], selector); - c1[0] = hc_byte_perm (w3[2], w3[1], selector); - c0[3] = hc_byte_perm (w3[1], w3[0], selector); - c0[2] = hc_byte_perm (w3[0], w2[3], selector); - c0[1] = hc_byte_perm (w2[3], w2[2], selector); - c0[0] = hc_byte_perm (w2[2], w2[1], selector); - w7[3] = hc_byte_perm (w2[1], w2[0], selector); - w7[2] = hc_byte_perm (w2[0], w1[3], selector); - w7[1] = hc_byte_perm (w1[3], w1[2], selector); - w7[0] = hc_byte_perm (w1[2], w1[1], selector); - w6[3] = hc_byte_perm (w1[1], w1[0], selector); - w6[2] = hc_byte_perm (w1[0], w0[3], selector); - w6[1] = hc_byte_perm (w0[3], w0[2], selector); - w6[0] = hc_byte_perm (w0[2], w0[1], selector); - w5[3] = hc_byte_perm (w0[1], w0[0], selector); - w5[2] = hc_byte_perm (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm ( 0, w7[3], selector); - c5[2] = hc_byte_perm (w7[3], w7[2], selector); - c5[1] = hc_byte_perm (w7[2], w7[1], selector); - c5[0] = hc_byte_perm (w7[1], w7[0], selector); - c4[3] = hc_byte_perm (w7[0], w6[3], selector); - c4[2] = hc_byte_perm (w6[3], w6[2], selector); - c4[1] = hc_byte_perm (w6[2], w6[1], selector); - c4[0] = hc_byte_perm (w6[1], w6[0], selector); - c3[3] = hc_byte_perm (w6[0], w5[3], selector); - c3[2] = hc_byte_perm (w5[3], w5[2], selector); - c3[1] = hc_byte_perm (w5[2], w5[1], selector); - c3[0] = hc_byte_perm (w5[1], w5[0], selector); - c2[3] = hc_byte_perm (w5[0], w4[3], selector); - c2[2] = hc_byte_perm (w4[3], w4[2], selector); - c2[1] = hc_byte_perm (w4[2], w4[1], selector); - c2[0] = hc_byte_perm (w4[1], w4[0], selector); - c1[3] = hc_byte_perm (w4[0], w3[3], selector); - c1[2] = hc_byte_perm (w3[3], w3[2], selector); - c1[1] = hc_byte_perm (w3[2], w3[1], selector); - c1[0] = hc_byte_perm (w3[1], w3[0], selector); - c0[3] = hc_byte_perm (w3[0], w2[3], selector); - c0[2] = hc_byte_perm (w2[3], w2[2], selector); - c0[1] = hc_byte_perm (w2[2], w2[1], selector); - c0[0] = hc_byte_perm (w2[1], w2[0], selector); - w7[3] = hc_byte_perm (w2[0], w1[3], selector); - w7[2] = hc_byte_perm (w1[3], w1[2], selector); - w7[1] = hc_byte_perm (w1[2], w1[1], selector); - w7[0] = hc_byte_perm (w1[1], w1[0], selector); - w6[3] = hc_byte_perm (w1[0], w0[3], selector); - w6[2] = hc_byte_perm (w0[3], w0[2], selector); - w6[1] = hc_byte_perm (w0[2], w0[1], selector); - w6[0] = hc_byte_perm (w0[1], w0[0], selector); - w5[3] = hc_byte_perm (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm ( 0, w7[3], selector); - c5[3] = hc_byte_perm (w7[3], w7[2], selector); - c5[2] = hc_byte_perm (w7[2], w7[1], selector); - c5[1] = hc_byte_perm (w7[1], w7[0], selector); - c5[0] = hc_byte_perm (w7[0], w6[3], selector); - c4[3] = hc_byte_perm (w6[3], w6[2], selector); - c4[2] = hc_byte_perm (w6[2], w6[1], selector); - c4[1] = hc_byte_perm (w6[1], w6[0], selector); - c4[0] = hc_byte_perm (w6[0], w5[3], selector); - c3[3] = hc_byte_perm (w5[3], w5[2], selector); - c3[2] = hc_byte_perm (w5[2], w5[1], selector); - c3[1] = hc_byte_perm (w5[1], w5[0], selector); - c3[0] = hc_byte_perm (w5[0], w4[3], selector); - c2[3] = hc_byte_perm (w4[3], w4[2], selector); - c2[2] = hc_byte_perm (w4[2], w4[1], selector); - c2[1] = hc_byte_perm (w4[1], w4[0], selector); - c2[0] = hc_byte_perm (w4[0], w3[3], selector); - c1[3] = hc_byte_perm (w3[3], w3[2], selector); - c1[2] = hc_byte_perm (w3[2], w3[1], selector); - c1[1] = hc_byte_perm (w3[1], w3[0], selector); - c1[0] = hc_byte_perm (w3[0], w2[3], selector); - c0[3] = hc_byte_perm (w2[3], w2[2], selector); - c0[2] = hc_byte_perm (w2[2], w2[1], selector); - c0[1] = hc_byte_perm (w2[1], w2[0], selector); - c0[0] = hc_byte_perm (w2[0], w1[3], selector); - w7[3] = hc_byte_perm (w1[3], w1[2], selector); - w7[2] = hc_byte_perm (w1[2], w1[1], selector); - w7[1] = hc_byte_perm (w1[1], w1[0], selector); - w7[0] = hc_byte_perm (w1[0], w0[3], selector); - w6[3] = hc_byte_perm (w0[3], w0[2], selector); - w6[2] = hc_byte_perm (w0[2], w0[1], selector); - w6[1] = hc_byte_perm (w0[1], w0[0], selector); - w6[0] = hc_byte_perm (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm ( 0, w7[3], selector); - c6[0] = hc_byte_perm (w7[3], w7[2], selector); - c5[3] = hc_byte_perm (w7[2], w7[1], selector); - c5[2] = hc_byte_perm (w7[1], w7[0], selector); - c5[1] = hc_byte_perm (w7[0], w6[3], selector); - c5[0] = hc_byte_perm (w6[3], w6[2], selector); - c4[3] = hc_byte_perm (w6[2], w6[1], selector); - c4[2] = hc_byte_perm (w6[1], w6[0], selector); - c4[1] = hc_byte_perm (w6[0], w5[3], selector); - c4[0] = hc_byte_perm (w5[3], w5[2], selector); - c3[3] = hc_byte_perm (w5[2], w5[1], selector); - c3[2] = hc_byte_perm (w5[1], w5[0], selector); - c3[1] = hc_byte_perm (w5[0], w4[3], selector); - c3[0] = hc_byte_perm (w4[3], w4[2], selector); - c2[3] = hc_byte_perm (w4[2], w4[1], selector); - c2[2] = hc_byte_perm (w4[1], w4[0], selector); - c2[1] = hc_byte_perm (w4[0], w3[3], selector); - c2[0] = hc_byte_perm (w3[3], w3[2], selector); - c1[3] = hc_byte_perm (w3[2], w3[1], selector); - c1[2] = hc_byte_perm (w3[1], w3[0], selector); - c1[1] = hc_byte_perm (w3[0], w2[3], selector); - c1[0] = hc_byte_perm (w2[3], w2[2], selector); - c0[3] = hc_byte_perm (w2[2], w2[1], selector); - c0[2] = hc_byte_perm (w2[1], w2[0], selector); - c0[1] = hc_byte_perm (w2[0], w1[3], selector); - c0[0] = hc_byte_perm (w1[3], w1[2], selector); - w7[3] = hc_byte_perm (w1[2], w1[1], selector); - w7[2] = hc_byte_perm (w1[1], w1[0], selector); - w7[1] = hc_byte_perm (w1[0], w0[3], selector); - w7[0] = hc_byte_perm (w0[3], w0[2], selector); - w6[3] = hc_byte_perm (w0[2], w0[1], selector); - w6[2] = hc_byte_perm (w0[1], w0[0], selector); - w6[1] = hc_byte_perm (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm ( 0, w7[3], selector); - c6[1] = hc_byte_perm (w7[3], w7[2], selector); - c6[0] = hc_byte_perm (w7[2], w7[1], selector); - c5[3] = hc_byte_perm (w7[1], w7[0], selector); - c5[2] = hc_byte_perm (w7[0], w6[3], selector); - c5[1] = hc_byte_perm (w6[3], w6[2], selector); - c5[0] = hc_byte_perm (w6[2], w6[1], selector); - c4[3] = hc_byte_perm (w6[1], w6[0], selector); - c4[2] = hc_byte_perm (w6[0], w5[3], selector); - c4[1] = hc_byte_perm (w5[3], w5[2], selector); - c4[0] = hc_byte_perm (w5[2], w5[1], selector); - c3[3] = hc_byte_perm (w5[1], w5[0], selector); - c3[2] = hc_byte_perm (w5[0], w4[3], selector); - c3[1] = hc_byte_perm (w4[3], w4[2], selector); - c3[0] = hc_byte_perm (w4[2], w4[1], selector); - c2[3] = hc_byte_perm (w4[1], w4[0], selector); - c2[2] = hc_byte_perm (w4[0], w3[3], selector); - c2[1] = hc_byte_perm (w3[3], w3[2], selector); - c2[0] = hc_byte_perm (w3[2], w3[1], selector); - c1[3] = hc_byte_perm (w3[1], w3[0], selector); - c1[2] = hc_byte_perm (w3[0], w2[3], selector); - c1[1] = hc_byte_perm (w2[3], w2[2], selector); - c1[0] = hc_byte_perm (w2[2], w2[1], selector); - c0[3] = hc_byte_perm (w2[1], w2[0], selector); - c0[2] = hc_byte_perm (w2[0], w1[3], selector); - c0[1] = hc_byte_perm (w1[3], w1[2], selector); - c0[0] = hc_byte_perm (w1[2], w1[1], selector); - w7[3] = hc_byte_perm (w1[1], w1[0], selector); - w7[2] = hc_byte_perm (w1[0], w0[3], selector); - w7[1] = hc_byte_perm (w0[3], w0[2], selector); - w7[0] = hc_byte_perm (w0[2], w0[1], selector); - w6[3] = hc_byte_perm (w0[1], w0[0], selector); - w6[2] = hc_byte_perm (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm ( 0, w7[3], selector); - c6[2] = hc_byte_perm (w7[3], w7[2], selector); - c6[1] = hc_byte_perm (w7[2], w7[1], selector); - c6[0] = hc_byte_perm (w7[1], w7[0], selector); - c5[3] = hc_byte_perm (w7[0], w6[3], selector); - c5[2] = hc_byte_perm (w6[3], w6[2], selector); - c5[1] = hc_byte_perm (w6[2], w6[1], selector); - c5[0] = hc_byte_perm (w6[1], w6[0], selector); - c4[3] = hc_byte_perm (w6[0], w5[3], selector); - c4[2] = hc_byte_perm (w5[3], w5[2], selector); - c4[1] = hc_byte_perm (w5[2], w5[1], selector); - c4[0] = hc_byte_perm (w5[1], w5[0], selector); - c3[3] = hc_byte_perm (w5[0], w4[3], selector); - c3[2] = hc_byte_perm (w4[3], w4[2], selector); - c3[1] = hc_byte_perm (w4[2], w4[1], selector); - c3[0] = hc_byte_perm (w4[1], w4[0], selector); - c2[3] = hc_byte_perm (w4[0], w3[3], selector); - c2[2] = hc_byte_perm (w3[3], w3[2], selector); - c2[1] = hc_byte_perm (w3[2], w3[1], selector); - c2[0] = hc_byte_perm (w3[1], w3[0], selector); - c1[3] = hc_byte_perm (w3[0], w2[3], selector); - c1[2] = hc_byte_perm (w2[3], w2[2], selector); - c1[1] = hc_byte_perm (w2[2], w2[1], selector); - c1[0] = hc_byte_perm (w2[1], w2[0], selector); - c0[3] = hc_byte_perm (w2[0], w1[3], selector); - c0[2] = hc_byte_perm (w1[3], w1[2], selector); - c0[1] = hc_byte_perm (w1[2], w1[1], selector); - c0[0] = hc_byte_perm (w1[1], w1[0], selector); - w7[3] = hc_byte_perm (w1[0], w0[3], selector); - w7[2] = hc_byte_perm (w0[3], w0[2], selector); - w7[1] = hc_byte_perm (w0[2], w0[1], selector); - w7[0] = hc_byte_perm (w0[1], w0[0], selector); - w6[3] = hc_byte_perm (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm ( 0, w7[3], selector); - c6[3] = hc_byte_perm (w7[3], w7[2], selector); - c6[2] = hc_byte_perm (w7[2], w7[1], selector); - c6[1] = hc_byte_perm (w7[1], w7[0], selector); - c6[0] = hc_byte_perm (w7[0], w6[3], selector); - c5[3] = hc_byte_perm (w6[3], w6[2], selector); - c5[2] = hc_byte_perm (w6[2], w6[1], selector); - c5[1] = hc_byte_perm (w6[1], w6[0], selector); - c5[0] = hc_byte_perm (w6[0], w5[3], selector); - c4[3] = hc_byte_perm (w5[3], w5[2], selector); - c4[2] = hc_byte_perm (w5[2], w5[1], selector); - c4[1] = hc_byte_perm (w5[1], w5[0], selector); - c4[0] = hc_byte_perm (w5[0], w4[3], selector); - c3[3] = hc_byte_perm (w4[3], w4[2], selector); - c3[2] = hc_byte_perm (w4[2], w4[1], selector); - c3[1] = hc_byte_perm (w4[1], w4[0], selector); - c3[0] = hc_byte_perm (w4[0], w3[3], selector); - c2[3] = hc_byte_perm (w3[3], w3[2], selector); - c2[2] = hc_byte_perm (w3[2], w3[1], selector); - c2[1] = hc_byte_perm (w3[1], w3[0], selector); - c2[0] = hc_byte_perm (w3[0], w2[3], selector); - c1[3] = hc_byte_perm (w2[3], w2[2], selector); - c1[2] = hc_byte_perm (w2[2], w2[1], selector); - c1[1] = hc_byte_perm (w2[1], w2[0], selector); - c1[0] = hc_byte_perm (w2[0], w1[3], selector); - c0[3] = hc_byte_perm (w1[3], w1[2], selector); - c0[2] = hc_byte_perm (w1[2], w1[1], selector); - c0[1] = hc_byte_perm (w1[1], w1[0], selector); - c0[0] = hc_byte_perm (w1[0], w0[3], selector); - w7[3] = hc_byte_perm (w0[3], w0[2], selector); - w7[2] = hc_byte_perm (w0[2], w0[1], selector); - w7[1] = hc_byte_perm (w0[1], w0[0], selector); - w7[0] = hc_byte_perm (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm ( 0, w7[3], selector); - c7[0] = hc_byte_perm (w7[3], w7[2], selector); - c6[3] = hc_byte_perm (w7[2], w7[1], selector); - c6[2] = hc_byte_perm (w7[1], w7[0], selector); - c6[1] = hc_byte_perm (w7[0], w6[3], selector); - c6[0] = hc_byte_perm (w6[3], w6[2], selector); - c5[3] = hc_byte_perm (w6[2], w6[1], selector); - c5[2] = hc_byte_perm (w6[1], w6[0], selector); - c5[1] = hc_byte_perm (w6[0], w5[3], selector); - c5[0] = hc_byte_perm (w5[3], w5[2], selector); - c4[3] = hc_byte_perm (w5[2], w5[1], selector); - c4[2] = hc_byte_perm (w5[1], w5[0], selector); - c4[1] = hc_byte_perm (w5[0], w4[3], selector); - c4[0] = hc_byte_perm (w4[3], w4[2], selector); - c3[3] = hc_byte_perm (w4[2], w4[1], selector); - c3[2] = hc_byte_perm (w4[1], w4[0], selector); - c3[1] = hc_byte_perm (w4[0], w3[3], selector); - c3[0] = hc_byte_perm (w3[3], w3[2], selector); - c2[3] = hc_byte_perm (w3[2], w3[1], selector); - c2[2] = hc_byte_perm (w3[1], w3[0], selector); - c2[1] = hc_byte_perm (w3[0], w2[3], selector); - c2[0] = hc_byte_perm (w2[3], w2[2], selector); - c1[3] = hc_byte_perm (w2[2], w2[1], selector); - c1[2] = hc_byte_perm (w2[1], w2[0], selector); - c1[1] = hc_byte_perm (w2[0], w1[3], selector); - c1[0] = hc_byte_perm (w1[3], w1[2], selector); - c0[3] = hc_byte_perm (w1[2], w1[1], selector); - c0[2] = hc_byte_perm (w1[1], w1[0], selector); - c0[1] = hc_byte_perm (w1[0], w0[3], selector); - c0[0] = hc_byte_perm (w0[3], w0[2], selector); - w7[3] = hc_byte_perm (w0[2], w0[1], selector); - w7[2] = hc_byte_perm (w0[1], w0[0], selector); - w7[1] = hc_byte_perm (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm ( 0, w7[3], selector); - c7[1] = hc_byte_perm (w7[3], w7[2], selector); - c7[0] = hc_byte_perm (w7[2], w7[1], selector); - c6[3] = hc_byte_perm (w7[1], w7[0], selector); - c6[2] = hc_byte_perm (w7[0], w6[3], selector); - c6[1] = hc_byte_perm (w6[3], w6[2], selector); - c6[0] = hc_byte_perm (w6[2], w6[1], selector); - c5[3] = hc_byte_perm (w6[1], w6[0], selector); - c5[2] = hc_byte_perm (w6[0], w5[3], selector); - c5[1] = hc_byte_perm (w5[3], w5[2], selector); - c5[0] = hc_byte_perm (w5[2], w5[1], selector); - c4[3] = hc_byte_perm (w5[1], w5[0], selector); - c4[2] = hc_byte_perm (w5[0], w4[3], selector); - c4[1] = hc_byte_perm (w4[3], w4[2], selector); - c4[0] = hc_byte_perm (w4[2], w4[1], selector); - c3[3] = hc_byte_perm (w4[1], w4[0], selector); - c3[2] = hc_byte_perm (w4[0], w3[3], selector); - c3[1] = hc_byte_perm (w3[3], w3[2], selector); - c3[0] = hc_byte_perm (w3[2], w3[1], selector); - c2[3] = hc_byte_perm (w3[1], w3[0], selector); - c2[2] = hc_byte_perm (w3[0], w2[3], selector); - c2[1] = hc_byte_perm (w2[3], w2[2], selector); - c2[0] = hc_byte_perm (w2[2], w2[1], selector); - c1[3] = hc_byte_perm (w2[1], w2[0], selector); - c1[2] = hc_byte_perm (w2[0], w1[3], selector); - c1[1] = hc_byte_perm (w1[3], w1[2], selector); - c1[0] = hc_byte_perm (w1[2], w1[1], selector); - c0[3] = hc_byte_perm (w1[1], w1[0], selector); - c0[2] = hc_byte_perm (w1[0], w0[3], selector); - c0[1] = hc_byte_perm (w0[3], w0[2], selector); - c0[0] = hc_byte_perm (w0[2], w0[1], selector); - w7[3] = hc_byte_perm (w0[1], w0[0], selector); - w7[2] = hc_byte_perm (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm ( 0, w7[3], selector); - c7[2] = hc_byte_perm (w7[3], w7[2], selector); - c7[1] = hc_byte_perm (w7[2], w7[1], selector); - c7[0] = hc_byte_perm (w7[1], w7[0], selector); - c6[3] = hc_byte_perm (w7[0], w6[3], selector); - c6[2] = hc_byte_perm (w6[3], w6[2], selector); - c6[1] = hc_byte_perm (w6[2], w6[1], selector); - c6[0] = hc_byte_perm (w6[1], w6[0], selector); - c5[3] = hc_byte_perm (w6[0], w5[3], selector); - c5[2] = hc_byte_perm (w5[3], w5[2], selector); - c5[1] = hc_byte_perm (w5[2], w5[1], selector); - c5[0] = hc_byte_perm (w5[1], w5[0], selector); - c4[3] = hc_byte_perm (w5[0], w4[3], selector); - c4[2] = hc_byte_perm (w4[3], w4[2], selector); - c4[1] = hc_byte_perm (w4[2], w4[1], selector); - c4[0] = hc_byte_perm (w4[1], w4[0], selector); - c3[3] = hc_byte_perm (w4[0], w3[3], selector); - c3[2] = hc_byte_perm (w3[3], w3[2], selector); - c3[1] = hc_byte_perm (w3[2], w3[1], selector); - c3[0] = hc_byte_perm (w3[1], w3[0], selector); - c2[3] = hc_byte_perm (w3[0], w2[3], selector); - c2[2] = hc_byte_perm (w2[3], w2[2], selector); - c2[1] = hc_byte_perm (w2[2], w2[1], selector); - c2[0] = hc_byte_perm (w2[1], w2[0], selector); - c1[3] = hc_byte_perm (w2[0], w1[3], selector); - c1[2] = hc_byte_perm (w1[3], w1[2], selector); - c1[1] = hc_byte_perm (w1[2], w1[1], selector); - c1[0] = hc_byte_perm (w1[1], w1[0], selector); - c0[3] = hc_byte_perm (w1[0], w0[3], selector); - c0[2] = hc_byte_perm (w0[3], w0[2], selector); - c0[1] = hc_byte_perm (w0[2], w0[1], selector); - c0[0] = hc_byte_perm (w0[1], w0[0], selector); - w7[3] = hc_byte_perm (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_le (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -21798,4384 +15158,12 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (PRIVATE_AS u32x *w, const u32 off break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm (w[62], w[63], selector); - w[62] = hc_byte_perm (w[61], w[62], selector); - w[61] = hc_byte_perm (w[60], w[61], selector); - w[60] = hc_byte_perm (w[59], w[60], selector); - w[59] = hc_byte_perm (w[58], w[59], selector); - w[58] = hc_byte_perm (w[57], w[58], selector); - w[57] = hc_byte_perm (w[56], w[57], selector); - w[56] = hc_byte_perm (w[55], w[56], selector); - w[55] = hc_byte_perm (w[54], w[55], selector); - w[54] = hc_byte_perm (w[53], w[54], selector); - w[53] = hc_byte_perm (w[52], w[53], selector); - w[52] = hc_byte_perm (w[51], w[52], selector); - w[51] = hc_byte_perm (w[50], w[51], selector); - w[50] = hc_byte_perm (w[49], w[50], selector); - w[49] = hc_byte_perm (w[48], w[49], selector); - w[48] = hc_byte_perm (w[47], w[48], selector); - w[47] = hc_byte_perm (w[46], w[47], selector); - w[46] = hc_byte_perm (w[45], w[46], selector); - w[45] = hc_byte_perm (w[44], w[45], selector); - w[44] = hc_byte_perm (w[43], w[44], selector); - w[43] = hc_byte_perm (w[42], w[43], selector); - w[42] = hc_byte_perm (w[41], w[42], selector); - w[41] = hc_byte_perm (w[40], w[41], selector); - w[40] = hc_byte_perm (w[39], w[40], selector); - w[39] = hc_byte_perm (w[38], w[39], selector); - w[38] = hc_byte_perm (w[37], w[38], selector); - w[37] = hc_byte_perm (w[36], w[37], selector); - w[36] = hc_byte_perm (w[35], w[36], selector); - w[35] = hc_byte_perm (w[34], w[35], selector); - w[34] = hc_byte_perm (w[33], w[34], selector); - w[33] = hc_byte_perm (w[32], w[33], selector); - w[32] = hc_byte_perm (w[31], w[32], selector); - w[31] = hc_byte_perm (w[30], w[31], selector); - w[30] = hc_byte_perm (w[29], w[30], selector); - w[29] = hc_byte_perm (w[28], w[29], selector); - w[28] = hc_byte_perm (w[27], w[28], selector); - w[27] = hc_byte_perm (w[26], w[27], selector); - w[26] = hc_byte_perm (w[25], w[26], selector); - w[25] = hc_byte_perm (w[24], w[25], selector); - w[24] = hc_byte_perm (w[23], w[24], selector); - w[23] = hc_byte_perm (w[22], w[23], selector); - w[22] = hc_byte_perm (w[21], w[22], selector); - w[21] = hc_byte_perm (w[20], w[21], selector); - w[20] = hc_byte_perm (w[19], w[20], selector); - w[19] = hc_byte_perm (w[18], w[19], selector); - w[18] = hc_byte_perm (w[17], w[18], selector); - w[17] = hc_byte_perm (w[16], w[17], selector); - w[16] = hc_byte_perm (w[15], w[16], selector); - w[15] = hc_byte_perm (w[14], w[15], selector); - w[14] = hc_byte_perm (w[13], w[14], selector); - w[13] = hc_byte_perm (w[12], w[13], selector); - w[12] = hc_byte_perm (w[11], w[12], selector); - w[11] = hc_byte_perm (w[10], w[11], selector); - w[10] = hc_byte_perm (w[ 9], w[10], selector); - w[ 9] = hc_byte_perm (w[ 8], w[ 9], selector); - w[ 8] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 7] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 6] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 5] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 4] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 3] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 2] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 1] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 0] = hc_byte_perm ( 0, w[ 0], selector); - - break; - - case 1: - w[63] = hc_byte_perm (w[61], w[62], selector); - w[62] = hc_byte_perm (w[60], w[61], selector); - w[61] = hc_byte_perm (w[59], w[60], selector); - w[60] = hc_byte_perm (w[58], w[59], selector); - w[59] = hc_byte_perm (w[57], w[58], selector); - w[58] = hc_byte_perm (w[56], w[57], selector); - w[57] = hc_byte_perm (w[55], w[56], selector); - w[56] = hc_byte_perm (w[54], w[55], selector); - w[55] = hc_byte_perm (w[53], w[54], selector); - w[54] = hc_byte_perm (w[52], w[53], selector); - w[53] = hc_byte_perm (w[51], w[52], selector); - w[52] = hc_byte_perm (w[50], w[51], selector); - w[51] = hc_byte_perm (w[49], w[50], selector); - w[50] = hc_byte_perm (w[48], w[49], selector); - w[49] = hc_byte_perm (w[47], w[48], selector); - w[48] = hc_byte_perm (w[46], w[47], selector); - w[47] = hc_byte_perm (w[45], w[46], selector); - w[46] = hc_byte_perm (w[44], w[45], selector); - w[45] = hc_byte_perm (w[43], w[44], selector); - w[44] = hc_byte_perm (w[42], w[43], selector); - w[43] = hc_byte_perm (w[41], w[42], selector); - w[42] = hc_byte_perm (w[40], w[41], selector); - w[41] = hc_byte_perm (w[39], w[40], selector); - w[40] = hc_byte_perm (w[38], w[39], selector); - w[39] = hc_byte_perm (w[37], w[38], selector); - w[38] = hc_byte_perm (w[36], w[37], selector); - w[37] = hc_byte_perm (w[35], w[36], selector); - w[36] = hc_byte_perm (w[34], w[35], selector); - w[35] = hc_byte_perm (w[33], w[34], selector); - w[34] = hc_byte_perm (w[32], w[33], selector); - w[33] = hc_byte_perm (w[31], w[32], selector); - w[32] = hc_byte_perm (w[30], w[31], selector); - w[31] = hc_byte_perm (w[29], w[30], selector); - w[30] = hc_byte_perm (w[28], w[29], selector); - w[29] = hc_byte_perm (w[27], w[28], selector); - w[28] = hc_byte_perm (w[26], w[27], selector); - w[27] = hc_byte_perm (w[25], w[26], selector); - w[26] = hc_byte_perm (w[24], w[25], selector); - w[25] = hc_byte_perm (w[23], w[24], selector); - w[24] = hc_byte_perm (w[22], w[23], selector); - w[23] = hc_byte_perm (w[21], w[22], selector); - w[22] = hc_byte_perm (w[20], w[21], selector); - w[21] = hc_byte_perm (w[19], w[20], selector); - w[20] = hc_byte_perm (w[18], w[19], selector); - w[19] = hc_byte_perm (w[17], w[18], selector); - w[18] = hc_byte_perm (w[16], w[17], selector); - w[17] = hc_byte_perm (w[15], w[16], selector); - w[16] = hc_byte_perm (w[14], w[15], selector); - w[15] = hc_byte_perm (w[13], w[14], selector); - w[14] = hc_byte_perm (w[12], w[13], selector); - w[13] = hc_byte_perm (w[11], w[12], selector); - w[12] = hc_byte_perm (w[10], w[11], selector); - w[11] = hc_byte_perm (w[ 9], w[10], selector); - w[10] = hc_byte_perm (w[ 8], w[ 9], selector); - w[ 9] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 8] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 7] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 6] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 5] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 4] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 3] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 2] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 1] = hc_byte_perm ( 0, w[ 0], selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm (w[60], w[61], selector); - w[62] = hc_byte_perm (w[59], w[60], selector); - w[61] = hc_byte_perm (w[58], w[59], selector); - w[60] = hc_byte_perm (w[57], w[58], selector); - w[59] = hc_byte_perm (w[56], w[57], selector); - w[58] = hc_byte_perm (w[55], w[56], selector); - w[57] = hc_byte_perm (w[54], w[55], selector); - w[56] = hc_byte_perm (w[53], w[54], selector); - w[55] = hc_byte_perm (w[52], w[53], selector); - w[54] = hc_byte_perm (w[51], w[52], selector); - w[53] = hc_byte_perm (w[50], w[51], selector); - w[52] = hc_byte_perm (w[49], w[50], selector); - w[51] = hc_byte_perm (w[48], w[49], selector); - w[50] = hc_byte_perm (w[47], w[48], selector); - w[49] = hc_byte_perm (w[46], w[47], selector); - w[48] = hc_byte_perm (w[45], w[46], selector); - w[47] = hc_byte_perm (w[44], w[45], selector); - w[46] = hc_byte_perm (w[43], w[44], selector); - w[45] = hc_byte_perm (w[42], w[43], selector); - w[44] = hc_byte_perm (w[41], w[42], selector); - w[43] = hc_byte_perm (w[40], w[41], selector); - w[42] = hc_byte_perm (w[39], w[40], selector); - w[41] = hc_byte_perm (w[38], w[39], selector); - w[40] = hc_byte_perm (w[37], w[38], selector); - w[39] = hc_byte_perm (w[36], w[37], selector); - w[38] = hc_byte_perm (w[35], w[36], selector); - w[37] = hc_byte_perm (w[34], w[35], selector); - w[36] = hc_byte_perm (w[33], w[34], selector); - w[35] = hc_byte_perm (w[32], w[33], selector); - w[34] = hc_byte_perm (w[31], w[32], selector); - w[33] = hc_byte_perm (w[30], w[31], selector); - w[32] = hc_byte_perm (w[29], w[30], selector); - w[31] = hc_byte_perm (w[28], w[29], selector); - w[30] = hc_byte_perm (w[27], w[28], selector); - w[29] = hc_byte_perm (w[26], w[27], selector); - w[28] = hc_byte_perm (w[25], w[26], selector); - w[27] = hc_byte_perm (w[24], w[25], selector); - w[26] = hc_byte_perm (w[23], w[24], selector); - w[25] = hc_byte_perm (w[22], w[23], selector); - w[24] = hc_byte_perm (w[21], w[22], selector); - w[23] = hc_byte_perm (w[20], w[21], selector); - w[22] = hc_byte_perm (w[19], w[20], selector); - w[21] = hc_byte_perm (w[18], w[19], selector); - w[20] = hc_byte_perm (w[17], w[18], selector); - w[19] = hc_byte_perm (w[16], w[17], selector); - w[18] = hc_byte_perm (w[15], w[16], selector); - w[17] = hc_byte_perm (w[14], w[15], selector); - w[16] = hc_byte_perm (w[13], w[14], selector); - w[15] = hc_byte_perm (w[12], w[13], selector); - w[14] = hc_byte_perm (w[11], w[12], selector); - w[13] = hc_byte_perm (w[10], w[11], selector); - w[12] = hc_byte_perm (w[ 9], w[10], selector); - w[11] = hc_byte_perm (w[ 8], w[ 9], selector); - w[10] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 9] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 8] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 7] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 6] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 5] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 4] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 3] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 2] = hc_byte_perm ( 0, w[ 0], selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm (w[59], w[60], selector); - w[62] = hc_byte_perm (w[58], w[59], selector); - w[61] = hc_byte_perm (w[57], w[58], selector); - w[60] = hc_byte_perm (w[56], w[57], selector); - w[59] = hc_byte_perm (w[55], w[56], selector); - w[58] = hc_byte_perm (w[54], w[55], selector); - w[57] = hc_byte_perm (w[53], w[54], selector); - w[56] = hc_byte_perm (w[52], w[53], selector); - w[55] = hc_byte_perm (w[51], w[52], selector); - w[54] = hc_byte_perm (w[50], w[51], selector); - w[53] = hc_byte_perm (w[49], w[50], selector); - w[52] = hc_byte_perm (w[48], w[49], selector); - w[51] = hc_byte_perm (w[47], w[48], selector); - w[50] = hc_byte_perm (w[46], w[47], selector); - w[49] = hc_byte_perm (w[45], w[46], selector); - w[48] = hc_byte_perm (w[44], w[45], selector); - w[47] = hc_byte_perm (w[43], w[44], selector); - w[46] = hc_byte_perm (w[42], w[43], selector); - w[45] = hc_byte_perm (w[41], w[42], selector); - w[44] = hc_byte_perm (w[40], w[41], selector); - w[43] = hc_byte_perm (w[39], w[40], selector); - w[42] = hc_byte_perm (w[38], w[39], selector); - w[41] = hc_byte_perm (w[37], w[38], selector); - w[40] = hc_byte_perm (w[36], w[37], selector); - w[39] = hc_byte_perm (w[35], w[36], selector); - w[38] = hc_byte_perm (w[34], w[35], selector); - w[37] = hc_byte_perm (w[33], w[34], selector); - w[36] = hc_byte_perm (w[32], w[33], selector); - w[35] = hc_byte_perm (w[31], w[32], selector); - w[34] = hc_byte_perm (w[30], w[31], selector); - w[33] = hc_byte_perm (w[29], w[30], selector); - w[32] = hc_byte_perm (w[28], w[29], selector); - w[31] = hc_byte_perm (w[27], w[28], selector); - w[30] = hc_byte_perm (w[26], w[27], selector); - w[29] = hc_byte_perm (w[25], w[26], selector); - w[28] = hc_byte_perm (w[24], w[25], selector); - w[27] = hc_byte_perm (w[23], w[24], selector); - w[26] = hc_byte_perm (w[22], w[23], selector); - w[25] = hc_byte_perm (w[21], w[22], selector); - w[24] = hc_byte_perm (w[20], w[21], selector); - w[23] = hc_byte_perm (w[19], w[20], selector); - w[22] = hc_byte_perm (w[18], w[19], selector); - w[21] = hc_byte_perm (w[17], w[18], selector); - w[20] = hc_byte_perm (w[16], w[17], selector); - w[19] = hc_byte_perm (w[15], w[16], selector); - w[18] = hc_byte_perm (w[14], w[15], selector); - w[17] = hc_byte_perm (w[13], w[14], selector); - w[16] = hc_byte_perm (w[12], w[13], selector); - w[15] = hc_byte_perm (w[11], w[12], selector); - w[14] = hc_byte_perm (w[10], w[11], selector); - w[13] = hc_byte_perm (w[ 9], w[10], selector); - w[12] = hc_byte_perm (w[ 8], w[ 9], selector); - w[11] = hc_byte_perm (w[ 7], w[ 8], selector); - w[10] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 9] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 8] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 7] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 6] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 5] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 4] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 3] = hc_byte_perm ( 0, w[ 0], selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm (w[58], w[59], selector); - w[62] = hc_byte_perm (w[57], w[58], selector); - w[61] = hc_byte_perm (w[56], w[57], selector); - w[60] = hc_byte_perm (w[55], w[56], selector); - w[59] = hc_byte_perm (w[54], w[55], selector); - w[58] = hc_byte_perm (w[53], w[54], selector); - w[57] = hc_byte_perm (w[52], w[53], selector); - w[56] = hc_byte_perm (w[51], w[52], selector); - w[55] = hc_byte_perm (w[50], w[51], selector); - w[54] = hc_byte_perm (w[49], w[50], selector); - w[53] = hc_byte_perm (w[48], w[49], selector); - w[52] = hc_byte_perm (w[47], w[48], selector); - w[51] = hc_byte_perm (w[46], w[47], selector); - w[50] = hc_byte_perm (w[45], w[46], selector); - w[49] = hc_byte_perm (w[44], w[45], selector); - w[48] = hc_byte_perm (w[43], w[44], selector); - w[47] = hc_byte_perm (w[42], w[43], selector); - w[46] = hc_byte_perm (w[41], w[42], selector); - w[45] = hc_byte_perm (w[40], w[41], selector); - w[44] = hc_byte_perm (w[39], w[40], selector); - w[43] = hc_byte_perm (w[38], w[39], selector); - w[42] = hc_byte_perm (w[37], w[38], selector); - w[41] = hc_byte_perm (w[36], w[37], selector); - w[40] = hc_byte_perm (w[35], w[36], selector); - w[39] = hc_byte_perm (w[34], w[35], selector); - w[38] = hc_byte_perm (w[33], w[34], selector); - w[37] = hc_byte_perm (w[32], w[33], selector); - w[36] = hc_byte_perm (w[31], w[32], selector); - w[35] = hc_byte_perm (w[30], w[31], selector); - w[34] = hc_byte_perm (w[29], w[30], selector); - w[33] = hc_byte_perm (w[28], w[29], selector); - w[32] = hc_byte_perm (w[27], w[28], selector); - w[31] = hc_byte_perm (w[26], w[27], selector); - w[30] = hc_byte_perm (w[25], w[26], selector); - w[29] = hc_byte_perm (w[24], w[25], selector); - w[28] = hc_byte_perm (w[23], w[24], selector); - w[27] = hc_byte_perm (w[22], w[23], selector); - w[26] = hc_byte_perm (w[21], w[22], selector); - w[25] = hc_byte_perm (w[20], w[21], selector); - w[24] = hc_byte_perm (w[19], w[20], selector); - w[23] = hc_byte_perm (w[18], w[19], selector); - w[22] = hc_byte_perm (w[17], w[18], selector); - w[21] = hc_byte_perm (w[16], w[17], selector); - w[20] = hc_byte_perm (w[15], w[16], selector); - w[19] = hc_byte_perm (w[14], w[15], selector); - w[18] = hc_byte_perm (w[13], w[14], selector); - w[17] = hc_byte_perm (w[12], w[13], selector); - w[16] = hc_byte_perm (w[11], w[12], selector); - w[15] = hc_byte_perm (w[10], w[11], selector); - w[14] = hc_byte_perm (w[ 9], w[10], selector); - w[13] = hc_byte_perm (w[ 8], w[ 9], selector); - w[12] = hc_byte_perm (w[ 7], w[ 8], selector); - w[11] = hc_byte_perm (w[ 6], w[ 7], selector); - w[10] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 9] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 8] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 7] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 6] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 5] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 4] = hc_byte_perm ( 0, w[ 0], selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm (w[57], w[58], selector); - w[62] = hc_byte_perm (w[56], w[57], selector); - w[61] = hc_byte_perm (w[55], w[56], selector); - w[60] = hc_byte_perm (w[54], w[55], selector); - w[59] = hc_byte_perm (w[53], w[54], selector); - w[58] = hc_byte_perm (w[52], w[53], selector); - w[57] = hc_byte_perm (w[51], w[52], selector); - w[56] = hc_byte_perm (w[50], w[51], selector); - w[55] = hc_byte_perm (w[49], w[50], selector); - w[54] = hc_byte_perm (w[48], w[49], selector); - w[53] = hc_byte_perm (w[47], w[48], selector); - w[52] = hc_byte_perm (w[46], w[47], selector); - w[51] = hc_byte_perm (w[45], w[46], selector); - w[50] = hc_byte_perm (w[44], w[45], selector); - w[49] = hc_byte_perm (w[43], w[44], selector); - w[48] = hc_byte_perm (w[42], w[43], selector); - w[47] = hc_byte_perm (w[41], w[42], selector); - w[46] = hc_byte_perm (w[40], w[41], selector); - w[45] = hc_byte_perm (w[39], w[40], selector); - w[44] = hc_byte_perm (w[38], w[39], selector); - w[43] = hc_byte_perm (w[37], w[38], selector); - w[42] = hc_byte_perm (w[36], w[37], selector); - w[41] = hc_byte_perm (w[35], w[36], selector); - w[40] = hc_byte_perm (w[34], w[35], selector); - w[39] = hc_byte_perm (w[33], w[34], selector); - w[38] = hc_byte_perm (w[32], w[33], selector); - w[37] = hc_byte_perm (w[31], w[32], selector); - w[36] = hc_byte_perm (w[30], w[31], selector); - w[35] = hc_byte_perm (w[29], w[30], selector); - w[34] = hc_byte_perm (w[28], w[29], selector); - w[33] = hc_byte_perm (w[27], w[28], selector); - w[32] = hc_byte_perm (w[26], w[27], selector); - w[31] = hc_byte_perm (w[25], w[26], selector); - w[30] = hc_byte_perm (w[24], w[25], selector); - w[29] = hc_byte_perm (w[23], w[24], selector); - w[28] = hc_byte_perm (w[22], w[23], selector); - w[27] = hc_byte_perm (w[21], w[22], selector); - w[26] = hc_byte_perm (w[20], w[21], selector); - w[25] = hc_byte_perm (w[19], w[20], selector); - w[24] = hc_byte_perm (w[18], w[19], selector); - w[23] = hc_byte_perm (w[17], w[18], selector); - w[22] = hc_byte_perm (w[16], w[17], selector); - w[21] = hc_byte_perm (w[15], w[16], selector); - w[20] = hc_byte_perm (w[14], w[15], selector); - w[19] = hc_byte_perm (w[13], w[14], selector); - w[18] = hc_byte_perm (w[12], w[13], selector); - w[17] = hc_byte_perm (w[11], w[12], selector); - w[16] = hc_byte_perm (w[10], w[11], selector); - w[15] = hc_byte_perm (w[ 9], w[10], selector); - w[14] = hc_byte_perm (w[ 8], w[ 9], selector); - w[13] = hc_byte_perm (w[ 7], w[ 8], selector); - w[12] = hc_byte_perm (w[ 6], w[ 7], selector); - w[11] = hc_byte_perm (w[ 5], w[ 6], selector); - w[10] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 9] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 8] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 7] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 6] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 5] = hc_byte_perm ( 0, w[ 0], selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm (w[56], w[57], selector); - w[62] = hc_byte_perm (w[55], w[56], selector); - w[61] = hc_byte_perm (w[54], w[55], selector); - w[60] = hc_byte_perm (w[53], w[54], selector); - w[59] = hc_byte_perm (w[52], w[53], selector); - w[58] = hc_byte_perm (w[51], w[52], selector); - w[57] = hc_byte_perm (w[50], w[51], selector); - w[56] = hc_byte_perm (w[49], w[50], selector); - w[55] = hc_byte_perm (w[48], w[49], selector); - w[54] = hc_byte_perm (w[47], w[48], selector); - w[53] = hc_byte_perm (w[46], w[47], selector); - w[52] = hc_byte_perm (w[45], w[46], selector); - w[51] = hc_byte_perm (w[44], w[45], selector); - w[50] = hc_byte_perm (w[43], w[44], selector); - w[49] = hc_byte_perm (w[42], w[43], selector); - w[48] = hc_byte_perm (w[41], w[42], selector); - w[47] = hc_byte_perm (w[40], w[41], selector); - w[46] = hc_byte_perm (w[39], w[40], selector); - w[45] = hc_byte_perm (w[38], w[39], selector); - w[44] = hc_byte_perm (w[37], w[38], selector); - w[43] = hc_byte_perm (w[36], w[37], selector); - w[42] = hc_byte_perm (w[35], w[36], selector); - w[41] = hc_byte_perm (w[34], w[35], selector); - w[40] = hc_byte_perm (w[33], w[34], selector); - w[39] = hc_byte_perm (w[32], w[33], selector); - w[38] = hc_byte_perm (w[31], w[32], selector); - w[37] = hc_byte_perm (w[30], w[31], selector); - w[36] = hc_byte_perm (w[29], w[30], selector); - w[35] = hc_byte_perm (w[28], w[29], selector); - w[34] = hc_byte_perm (w[27], w[28], selector); - w[33] = hc_byte_perm (w[26], w[27], selector); - w[32] = hc_byte_perm (w[25], w[26], selector); - w[31] = hc_byte_perm (w[24], w[25], selector); - w[30] = hc_byte_perm (w[23], w[24], selector); - w[29] = hc_byte_perm (w[22], w[23], selector); - w[28] = hc_byte_perm (w[21], w[22], selector); - w[27] = hc_byte_perm (w[20], w[21], selector); - w[26] = hc_byte_perm (w[19], w[20], selector); - w[25] = hc_byte_perm (w[18], w[19], selector); - w[24] = hc_byte_perm (w[17], w[18], selector); - w[23] = hc_byte_perm (w[16], w[17], selector); - w[22] = hc_byte_perm (w[15], w[16], selector); - w[21] = hc_byte_perm (w[14], w[15], selector); - w[20] = hc_byte_perm (w[13], w[14], selector); - w[19] = hc_byte_perm (w[12], w[13], selector); - w[18] = hc_byte_perm (w[11], w[12], selector); - w[17] = hc_byte_perm (w[10], w[11], selector); - w[16] = hc_byte_perm (w[ 9], w[10], selector); - w[15] = hc_byte_perm (w[ 8], w[ 9], selector); - w[14] = hc_byte_perm (w[ 7], w[ 8], selector); - w[13] = hc_byte_perm (w[ 6], w[ 7], selector); - w[12] = hc_byte_perm (w[ 5], w[ 6], selector); - w[11] = hc_byte_perm (w[ 4], w[ 5], selector); - w[10] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 9] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 8] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 7] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 6] = hc_byte_perm ( 0, w[ 0], selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm (w[55], w[56], selector); - w[62] = hc_byte_perm (w[54], w[55], selector); - w[61] = hc_byte_perm (w[53], w[54], selector); - w[60] = hc_byte_perm (w[52], w[53], selector); - w[59] = hc_byte_perm (w[51], w[52], selector); - w[58] = hc_byte_perm (w[50], w[51], selector); - w[57] = hc_byte_perm (w[49], w[50], selector); - w[56] = hc_byte_perm (w[48], w[49], selector); - w[55] = hc_byte_perm (w[47], w[48], selector); - w[54] = hc_byte_perm (w[46], w[47], selector); - w[53] = hc_byte_perm (w[45], w[46], selector); - w[52] = hc_byte_perm (w[44], w[45], selector); - w[51] = hc_byte_perm (w[43], w[44], selector); - w[50] = hc_byte_perm (w[42], w[43], selector); - w[49] = hc_byte_perm (w[41], w[42], selector); - w[48] = hc_byte_perm (w[40], w[41], selector); - w[47] = hc_byte_perm (w[39], w[40], selector); - w[46] = hc_byte_perm (w[38], w[39], selector); - w[45] = hc_byte_perm (w[37], w[38], selector); - w[44] = hc_byte_perm (w[36], w[37], selector); - w[43] = hc_byte_perm (w[35], w[36], selector); - w[42] = hc_byte_perm (w[34], w[35], selector); - w[41] = hc_byte_perm (w[33], w[34], selector); - w[40] = hc_byte_perm (w[32], w[33], selector); - w[39] = hc_byte_perm (w[31], w[32], selector); - w[38] = hc_byte_perm (w[30], w[31], selector); - w[37] = hc_byte_perm (w[29], w[30], selector); - w[36] = hc_byte_perm (w[28], w[29], selector); - w[35] = hc_byte_perm (w[27], w[28], selector); - w[34] = hc_byte_perm (w[26], w[27], selector); - w[33] = hc_byte_perm (w[25], w[26], selector); - w[32] = hc_byte_perm (w[24], w[25], selector); - w[31] = hc_byte_perm (w[23], w[24], selector); - w[30] = hc_byte_perm (w[22], w[23], selector); - w[29] = hc_byte_perm (w[21], w[22], selector); - w[28] = hc_byte_perm (w[20], w[21], selector); - w[27] = hc_byte_perm (w[19], w[20], selector); - w[26] = hc_byte_perm (w[18], w[19], selector); - w[25] = hc_byte_perm (w[17], w[18], selector); - w[24] = hc_byte_perm (w[16], w[17], selector); - w[23] = hc_byte_perm (w[15], w[16], selector); - w[22] = hc_byte_perm (w[14], w[15], selector); - w[21] = hc_byte_perm (w[13], w[14], selector); - w[20] = hc_byte_perm (w[12], w[13], selector); - w[19] = hc_byte_perm (w[11], w[12], selector); - w[18] = hc_byte_perm (w[10], w[11], selector); - w[17] = hc_byte_perm (w[ 9], w[10], selector); - w[16] = hc_byte_perm (w[ 8], w[ 9], selector); - w[15] = hc_byte_perm (w[ 7], w[ 8], selector); - w[14] = hc_byte_perm (w[ 6], w[ 7], selector); - w[13] = hc_byte_perm (w[ 5], w[ 6], selector); - w[12] = hc_byte_perm (w[ 4], w[ 5], selector); - w[11] = hc_byte_perm (w[ 3], w[ 4], selector); - w[10] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 9] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 8] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 7] = hc_byte_perm ( 0, w[ 0], selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm (w[54], w[55], selector); - w[62] = hc_byte_perm (w[53], w[54], selector); - w[61] = hc_byte_perm (w[52], w[53], selector); - w[60] = hc_byte_perm (w[51], w[52], selector); - w[59] = hc_byte_perm (w[50], w[51], selector); - w[58] = hc_byte_perm (w[49], w[50], selector); - w[57] = hc_byte_perm (w[48], w[49], selector); - w[56] = hc_byte_perm (w[47], w[48], selector); - w[55] = hc_byte_perm (w[46], w[47], selector); - w[54] = hc_byte_perm (w[45], w[46], selector); - w[53] = hc_byte_perm (w[44], w[45], selector); - w[52] = hc_byte_perm (w[43], w[44], selector); - w[51] = hc_byte_perm (w[42], w[43], selector); - w[50] = hc_byte_perm (w[41], w[42], selector); - w[49] = hc_byte_perm (w[40], w[41], selector); - w[48] = hc_byte_perm (w[39], w[40], selector); - w[47] = hc_byte_perm (w[38], w[39], selector); - w[46] = hc_byte_perm (w[37], w[38], selector); - w[45] = hc_byte_perm (w[36], w[37], selector); - w[44] = hc_byte_perm (w[35], w[36], selector); - w[43] = hc_byte_perm (w[34], w[35], selector); - w[42] = hc_byte_perm (w[33], w[34], selector); - w[41] = hc_byte_perm (w[32], w[33], selector); - w[40] = hc_byte_perm (w[31], w[32], selector); - w[39] = hc_byte_perm (w[30], w[31], selector); - w[38] = hc_byte_perm (w[29], w[30], selector); - w[37] = hc_byte_perm (w[28], w[29], selector); - w[36] = hc_byte_perm (w[27], w[28], selector); - w[35] = hc_byte_perm (w[26], w[27], selector); - w[34] = hc_byte_perm (w[25], w[26], selector); - w[33] = hc_byte_perm (w[24], w[25], selector); - w[32] = hc_byte_perm (w[23], w[24], selector); - w[31] = hc_byte_perm (w[22], w[23], selector); - w[30] = hc_byte_perm (w[21], w[22], selector); - w[29] = hc_byte_perm (w[20], w[21], selector); - w[28] = hc_byte_perm (w[19], w[20], selector); - w[27] = hc_byte_perm (w[18], w[19], selector); - w[26] = hc_byte_perm (w[17], w[18], selector); - w[25] = hc_byte_perm (w[16], w[17], selector); - w[24] = hc_byte_perm (w[15], w[16], selector); - w[23] = hc_byte_perm (w[14], w[15], selector); - w[22] = hc_byte_perm (w[13], w[14], selector); - w[21] = hc_byte_perm (w[12], w[13], selector); - w[20] = hc_byte_perm (w[11], w[12], selector); - w[19] = hc_byte_perm (w[10], w[11], selector); - w[18] = hc_byte_perm (w[ 9], w[10], selector); - w[17] = hc_byte_perm (w[ 8], w[ 9], selector); - w[16] = hc_byte_perm (w[ 7], w[ 8], selector); - w[15] = hc_byte_perm (w[ 6], w[ 7], selector); - w[14] = hc_byte_perm (w[ 5], w[ 6], selector); - w[13] = hc_byte_perm (w[ 4], w[ 5], selector); - w[12] = hc_byte_perm (w[ 3], w[ 4], selector); - w[11] = hc_byte_perm (w[ 2], w[ 3], selector); - w[10] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 9] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 8] = hc_byte_perm ( 0, w[ 0], selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm (w[53], w[54], selector); - w[62] = hc_byte_perm (w[52], w[53], selector); - w[61] = hc_byte_perm (w[51], w[52], selector); - w[60] = hc_byte_perm (w[50], w[51], selector); - w[59] = hc_byte_perm (w[49], w[50], selector); - w[58] = hc_byte_perm (w[48], w[49], selector); - w[57] = hc_byte_perm (w[47], w[48], selector); - w[56] = hc_byte_perm (w[46], w[47], selector); - w[55] = hc_byte_perm (w[45], w[46], selector); - w[54] = hc_byte_perm (w[44], w[45], selector); - w[53] = hc_byte_perm (w[43], w[44], selector); - w[52] = hc_byte_perm (w[42], w[43], selector); - w[51] = hc_byte_perm (w[41], w[42], selector); - w[50] = hc_byte_perm (w[40], w[41], selector); - w[49] = hc_byte_perm (w[39], w[40], selector); - w[48] = hc_byte_perm (w[38], w[39], selector); - w[47] = hc_byte_perm (w[37], w[38], selector); - w[46] = hc_byte_perm (w[36], w[37], selector); - w[45] = hc_byte_perm (w[35], w[36], selector); - w[44] = hc_byte_perm (w[34], w[35], selector); - w[43] = hc_byte_perm (w[33], w[34], selector); - w[42] = hc_byte_perm (w[32], w[33], selector); - w[41] = hc_byte_perm (w[31], w[32], selector); - w[40] = hc_byte_perm (w[30], w[31], selector); - w[39] = hc_byte_perm (w[29], w[30], selector); - w[38] = hc_byte_perm (w[28], w[29], selector); - w[37] = hc_byte_perm (w[27], w[28], selector); - w[36] = hc_byte_perm (w[26], w[27], selector); - w[35] = hc_byte_perm (w[25], w[26], selector); - w[34] = hc_byte_perm (w[24], w[25], selector); - w[33] = hc_byte_perm (w[23], w[24], selector); - w[32] = hc_byte_perm (w[22], w[23], selector); - w[31] = hc_byte_perm (w[21], w[22], selector); - w[30] = hc_byte_perm (w[20], w[21], selector); - w[29] = hc_byte_perm (w[19], w[20], selector); - w[28] = hc_byte_perm (w[18], w[19], selector); - w[27] = hc_byte_perm (w[17], w[18], selector); - w[26] = hc_byte_perm (w[16], w[17], selector); - w[25] = hc_byte_perm (w[15], w[16], selector); - w[24] = hc_byte_perm (w[14], w[15], selector); - w[23] = hc_byte_perm (w[13], w[14], selector); - w[22] = hc_byte_perm (w[12], w[13], selector); - w[21] = hc_byte_perm (w[11], w[12], selector); - w[20] = hc_byte_perm (w[10], w[11], selector); - w[19] = hc_byte_perm (w[ 9], w[10], selector); - w[18] = hc_byte_perm (w[ 8], w[ 9], selector); - w[17] = hc_byte_perm (w[ 7], w[ 8], selector); - w[16] = hc_byte_perm (w[ 6], w[ 7], selector); - w[15] = hc_byte_perm (w[ 5], w[ 6], selector); - w[14] = hc_byte_perm (w[ 4], w[ 5], selector); - w[13] = hc_byte_perm (w[ 3], w[ 4], selector); - w[12] = hc_byte_perm (w[ 2], w[ 3], selector); - w[11] = hc_byte_perm (w[ 1], w[ 2], selector); - w[10] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 9] = hc_byte_perm ( 0, w[ 0], selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm (w[52], w[53], selector); - w[62] = hc_byte_perm (w[51], w[52], selector); - w[61] = hc_byte_perm (w[50], w[51], selector); - w[60] = hc_byte_perm (w[49], w[50], selector); - w[59] = hc_byte_perm (w[48], w[49], selector); - w[58] = hc_byte_perm (w[47], w[48], selector); - w[57] = hc_byte_perm (w[46], w[47], selector); - w[56] = hc_byte_perm (w[45], w[46], selector); - w[55] = hc_byte_perm (w[44], w[45], selector); - w[54] = hc_byte_perm (w[43], w[44], selector); - w[53] = hc_byte_perm (w[42], w[43], selector); - w[52] = hc_byte_perm (w[41], w[42], selector); - w[51] = hc_byte_perm (w[40], w[41], selector); - w[50] = hc_byte_perm (w[39], w[40], selector); - w[49] = hc_byte_perm (w[38], w[39], selector); - w[48] = hc_byte_perm (w[37], w[38], selector); - w[47] = hc_byte_perm (w[36], w[37], selector); - w[46] = hc_byte_perm (w[35], w[36], selector); - w[45] = hc_byte_perm (w[34], w[35], selector); - w[44] = hc_byte_perm (w[33], w[34], selector); - w[43] = hc_byte_perm (w[32], w[33], selector); - w[42] = hc_byte_perm (w[31], w[32], selector); - w[41] = hc_byte_perm (w[30], w[31], selector); - w[40] = hc_byte_perm (w[29], w[30], selector); - w[39] = hc_byte_perm (w[28], w[29], selector); - w[38] = hc_byte_perm (w[27], w[28], selector); - w[37] = hc_byte_perm (w[26], w[27], selector); - w[36] = hc_byte_perm (w[25], w[26], selector); - w[35] = hc_byte_perm (w[24], w[25], selector); - w[34] = hc_byte_perm (w[23], w[24], selector); - w[33] = hc_byte_perm (w[22], w[23], selector); - w[32] = hc_byte_perm (w[21], w[22], selector); - w[31] = hc_byte_perm (w[20], w[21], selector); - w[30] = hc_byte_perm (w[19], w[20], selector); - w[29] = hc_byte_perm (w[18], w[19], selector); - w[28] = hc_byte_perm (w[17], w[18], selector); - w[27] = hc_byte_perm (w[16], w[17], selector); - w[26] = hc_byte_perm (w[15], w[16], selector); - w[25] = hc_byte_perm (w[14], w[15], selector); - w[24] = hc_byte_perm (w[13], w[14], selector); - w[23] = hc_byte_perm (w[12], w[13], selector); - w[22] = hc_byte_perm (w[11], w[12], selector); - w[21] = hc_byte_perm (w[10], w[11], selector); - w[20] = hc_byte_perm (w[ 9], w[10], selector); - w[19] = hc_byte_perm (w[ 8], w[ 9], selector); - w[18] = hc_byte_perm (w[ 7], w[ 8], selector); - w[17] = hc_byte_perm (w[ 6], w[ 7], selector); - w[16] = hc_byte_perm (w[ 5], w[ 6], selector); - w[15] = hc_byte_perm (w[ 4], w[ 5], selector); - w[14] = hc_byte_perm (w[ 3], w[ 4], selector); - w[13] = hc_byte_perm (w[ 2], w[ 3], selector); - w[12] = hc_byte_perm (w[ 1], w[ 2], selector); - w[11] = hc_byte_perm (w[ 0], w[ 1], selector); - w[10] = hc_byte_perm ( 0, w[ 0], selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm (w[51], w[52], selector); - w[62] = hc_byte_perm (w[50], w[51], selector); - w[61] = hc_byte_perm (w[49], w[50], selector); - w[60] = hc_byte_perm (w[48], w[49], selector); - w[59] = hc_byte_perm (w[47], w[48], selector); - w[58] = hc_byte_perm (w[46], w[47], selector); - w[57] = hc_byte_perm (w[45], w[46], selector); - w[56] = hc_byte_perm (w[44], w[45], selector); - w[55] = hc_byte_perm (w[43], w[44], selector); - w[54] = hc_byte_perm (w[42], w[43], selector); - w[53] = hc_byte_perm (w[41], w[42], selector); - w[52] = hc_byte_perm (w[40], w[41], selector); - w[51] = hc_byte_perm (w[39], w[40], selector); - w[50] = hc_byte_perm (w[38], w[39], selector); - w[49] = hc_byte_perm (w[37], w[38], selector); - w[48] = hc_byte_perm (w[36], w[37], selector); - w[47] = hc_byte_perm (w[35], w[36], selector); - w[46] = hc_byte_perm (w[34], w[35], selector); - w[45] = hc_byte_perm (w[33], w[34], selector); - w[44] = hc_byte_perm (w[32], w[33], selector); - w[43] = hc_byte_perm (w[31], w[32], selector); - w[42] = hc_byte_perm (w[30], w[31], selector); - w[41] = hc_byte_perm (w[29], w[30], selector); - w[40] = hc_byte_perm (w[28], w[29], selector); - w[39] = hc_byte_perm (w[27], w[28], selector); - w[38] = hc_byte_perm (w[26], w[27], selector); - w[37] = hc_byte_perm (w[25], w[26], selector); - w[36] = hc_byte_perm (w[24], w[25], selector); - w[35] = hc_byte_perm (w[23], w[24], selector); - w[34] = hc_byte_perm (w[22], w[23], selector); - w[33] = hc_byte_perm (w[21], w[22], selector); - w[32] = hc_byte_perm (w[20], w[21], selector); - w[31] = hc_byte_perm (w[19], w[20], selector); - w[30] = hc_byte_perm (w[18], w[19], selector); - w[29] = hc_byte_perm (w[17], w[18], selector); - w[28] = hc_byte_perm (w[16], w[17], selector); - w[27] = hc_byte_perm (w[15], w[16], selector); - w[26] = hc_byte_perm (w[14], w[15], selector); - w[25] = hc_byte_perm (w[13], w[14], selector); - w[24] = hc_byte_perm (w[12], w[13], selector); - w[23] = hc_byte_perm (w[11], w[12], selector); - w[22] = hc_byte_perm (w[10], w[11], selector); - w[21] = hc_byte_perm (w[ 9], w[10], selector); - w[20] = hc_byte_perm (w[ 8], w[ 9], selector); - w[19] = hc_byte_perm (w[ 7], w[ 8], selector); - w[18] = hc_byte_perm (w[ 6], w[ 7], selector); - w[17] = hc_byte_perm (w[ 5], w[ 6], selector); - w[16] = hc_byte_perm (w[ 4], w[ 5], selector); - w[15] = hc_byte_perm (w[ 3], w[ 4], selector); - w[14] = hc_byte_perm (w[ 2], w[ 3], selector); - w[13] = hc_byte_perm (w[ 1], w[ 2], selector); - w[12] = hc_byte_perm (w[ 0], w[ 1], selector); - w[11] = hc_byte_perm ( 0, w[ 0], selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm (w[50], w[51], selector); - w[62] = hc_byte_perm (w[49], w[50], selector); - w[61] = hc_byte_perm (w[48], w[49], selector); - w[60] = hc_byte_perm (w[47], w[48], selector); - w[59] = hc_byte_perm (w[46], w[47], selector); - w[58] = hc_byte_perm (w[45], w[46], selector); - w[57] = hc_byte_perm (w[44], w[45], selector); - w[56] = hc_byte_perm (w[43], w[44], selector); - w[55] = hc_byte_perm (w[42], w[43], selector); - w[54] = hc_byte_perm (w[41], w[42], selector); - w[53] = hc_byte_perm (w[40], w[41], selector); - w[52] = hc_byte_perm (w[39], w[40], selector); - w[51] = hc_byte_perm (w[38], w[39], selector); - w[50] = hc_byte_perm (w[37], w[38], selector); - w[49] = hc_byte_perm (w[36], w[37], selector); - w[48] = hc_byte_perm (w[35], w[36], selector); - w[47] = hc_byte_perm (w[34], w[35], selector); - w[46] = hc_byte_perm (w[33], w[34], selector); - w[45] = hc_byte_perm (w[32], w[33], selector); - w[44] = hc_byte_perm (w[31], w[32], selector); - w[43] = hc_byte_perm (w[30], w[31], selector); - w[42] = hc_byte_perm (w[29], w[30], selector); - w[41] = hc_byte_perm (w[28], w[29], selector); - w[40] = hc_byte_perm (w[27], w[28], selector); - w[39] = hc_byte_perm (w[26], w[27], selector); - w[38] = hc_byte_perm (w[25], w[26], selector); - w[37] = hc_byte_perm (w[24], w[25], selector); - w[36] = hc_byte_perm (w[23], w[24], selector); - w[35] = hc_byte_perm (w[22], w[23], selector); - w[34] = hc_byte_perm (w[21], w[22], selector); - w[33] = hc_byte_perm (w[20], w[21], selector); - w[32] = hc_byte_perm (w[19], w[20], selector); - w[31] = hc_byte_perm (w[18], w[19], selector); - w[30] = hc_byte_perm (w[17], w[18], selector); - w[29] = hc_byte_perm (w[16], w[17], selector); - w[28] = hc_byte_perm (w[15], w[16], selector); - w[27] = hc_byte_perm (w[14], w[15], selector); - w[26] = hc_byte_perm (w[13], w[14], selector); - w[25] = hc_byte_perm (w[12], w[13], selector); - w[24] = hc_byte_perm (w[11], w[12], selector); - w[23] = hc_byte_perm (w[10], w[11], selector); - w[22] = hc_byte_perm (w[ 9], w[10], selector); - w[21] = hc_byte_perm (w[ 8], w[ 9], selector); - w[20] = hc_byte_perm (w[ 7], w[ 8], selector); - w[19] = hc_byte_perm (w[ 6], w[ 7], selector); - w[18] = hc_byte_perm (w[ 5], w[ 6], selector); - w[17] = hc_byte_perm (w[ 4], w[ 5], selector); - w[16] = hc_byte_perm (w[ 3], w[ 4], selector); - w[15] = hc_byte_perm (w[ 2], w[ 3], selector); - w[14] = hc_byte_perm (w[ 1], w[ 2], selector); - w[13] = hc_byte_perm (w[ 0], w[ 1], selector); - w[12] = hc_byte_perm ( 0, w[ 0], selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm (w[49], w[50], selector); - w[62] = hc_byte_perm (w[48], w[49], selector); - w[61] = hc_byte_perm (w[47], w[48], selector); - w[60] = hc_byte_perm (w[46], w[47], selector); - w[59] = hc_byte_perm (w[45], w[46], selector); - w[58] = hc_byte_perm (w[44], w[45], selector); - w[57] = hc_byte_perm (w[43], w[44], selector); - w[56] = hc_byte_perm (w[42], w[43], selector); - w[55] = hc_byte_perm (w[41], w[42], selector); - w[54] = hc_byte_perm (w[40], w[41], selector); - w[53] = hc_byte_perm (w[39], w[40], selector); - w[52] = hc_byte_perm (w[38], w[39], selector); - w[51] = hc_byte_perm (w[37], w[38], selector); - w[50] = hc_byte_perm (w[36], w[37], selector); - w[49] = hc_byte_perm (w[35], w[36], selector); - w[48] = hc_byte_perm (w[34], w[35], selector); - w[47] = hc_byte_perm (w[33], w[34], selector); - w[46] = hc_byte_perm (w[32], w[33], selector); - w[45] = hc_byte_perm (w[31], w[32], selector); - w[44] = hc_byte_perm (w[30], w[31], selector); - w[43] = hc_byte_perm (w[29], w[30], selector); - w[42] = hc_byte_perm (w[28], w[29], selector); - w[41] = hc_byte_perm (w[27], w[28], selector); - w[40] = hc_byte_perm (w[26], w[27], selector); - w[39] = hc_byte_perm (w[25], w[26], selector); - w[38] = hc_byte_perm (w[24], w[25], selector); - w[37] = hc_byte_perm (w[23], w[24], selector); - w[36] = hc_byte_perm (w[22], w[23], selector); - w[35] = hc_byte_perm (w[21], w[22], selector); - w[34] = hc_byte_perm (w[20], w[21], selector); - w[33] = hc_byte_perm (w[19], w[20], selector); - w[32] = hc_byte_perm (w[18], w[19], selector); - w[31] = hc_byte_perm (w[17], w[18], selector); - w[30] = hc_byte_perm (w[16], w[17], selector); - w[29] = hc_byte_perm (w[15], w[16], selector); - w[28] = hc_byte_perm (w[14], w[15], selector); - w[27] = hc_byte_perm (w[13], w[14], selector); - w[26] = hc_byte_perm (w[12], w[13], selector); - w[25] = hc_byte_perm (w[11], w[12], selector); - w[24] = hc_byte_perm (w[10], w[11], selector); - w[23] = hc_byte_perm (w[ 9], w[10], selector); - w[22] = hc_byte_perm (w[ 8], w[ 9], selector); - w[21] = hc_byte_perm (w[ 7], w[ 8], selector); - w[20] = hc_byte_perm (w[ 6], w[ 7], selector); - w[19] = hc_byte_perm (w[ 5], w[ 6], selector); - w[18] = hc_byte_perm (w[ 4], w[ 5], selector); - w[17] = hc_byte_perm (w[ 3], w[ 4], selector); - w[16] = hc_byte_perm (w[ 2], w[ 3], selector); - w[15] = hc_byte_perm (w[ 1], w[ 2], selector); - w[14] = hc_byte_perm (w[ 0], w[ 1], selector); - w[13] = hc_byte_perm ( 0, w[ 0], selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm (w[48], w[49], selector); - w[62] = hc_byte_perm (w[47], w[48], selector); - w[61] = hc_byte_perm (w[46], w[47], selector); - w[60] = hc_byte_perm (w[45], w[46], selector); - w[59] = hc_byte_perm (w[44], w[45], selector); - w[58] = hc_byte_perm (w[43], w[44], selector); - w[57] = hc_byte_perm (w[42], w[43], selector); - w[56] = hc_byte_perm (w[41], w[42], selector); - w[55] = hc_byte_perm (w[40], w[41], selector); - w[54] = hc_byte_perm (w[39], w[40], selector); - w[53] = hc_byte_perm (w[38], w[39], selector); - w[52] = hc_byte_perm (w[37], w[38], selector); - w[51] = hc_byte_perm (w[36], w[37], selector); - w[50] = hc_byte_perm (w[35], w[36], selector); - w[49] = hc_byte_perm (w[34], w[35], selector); - w[48] = hc_byte_perm (w[33], w[34], selector); - w[47] = hc_byte_perm (w[32], w[33], selector); - w[46] = hc_byte_perm (w[31], w[32], selector); - w[45] = hc_byte_perm (w[30], w[31], selector); - w[44] = hc_byte_perm (w[29], w[30], selector); - w[43] = hc_byte_perm (w[28], w[29], selector); - w[42] = hc_byte_perm (w[27], w[28], selector); - w[41] = hc_byte_perm (w[26], w[27], selector); - w[40] = hc_byte_perm (w[25], w[26], selector); - w[39] = hc_byte_perm (w[24], w[25], selector); - w[38] = hc_byte_perm (w[23], w[24], selector); - w[37] = hc_byte_perm (w[22], w[23], selector); - w[36] = hc_byte_perm (w[21], w[22], selector); - w[35] = hc_byte_perm (w[20], w[21], selector); - w[34] = hc_byte_perm (w[19], w[20], selector); - w[33] = hc_byte_perm (w[18], w[19], selector); - w[32] = hc_byte_perm (w[17], w[18], selector); - w[31] = hc_byte_perm (w[16], w[17], selector); - w[30] = hc_byte_perm (w[15], w[16], selector); - w[29] = hc_byte_perm (w[14], w[15], selector); - w[28] = hc_byte_perm (w[13], w[14], selector); - w[27] = hc_byte_perm (w[12], w[13], selector); - w[26] = hc_byte_perm (w[11], w[12], selector); - w[25] = hc_byte_perm (w[10], w[11], selector); - w[24] = hc_byte_perm (w[ 9], w[10], selector); - w[23] = hc_byte_perm (w[ 8], w[ 9], selector); - w[22] = hc_byte_perm (w[ 7], w[ 8], selector); - w[21] = hc_byte_perm (w[ 6], w[ 7], selector); - w[20] = hc_byte_perm (w[ 5], w[ 6], selector); - w[19] = hc_byte_perm (w[ 4], w[ 5], selector); - w[18] = hc_byte_perm (w[ 3], w[ 4], selector); - w[17] = hc_byte_perm (w[ 2], w[ 3], selector); - w[16] = hc_byte_perm (w[ 1], w[ 2], selector); - w[15] = hc_byte_perm (w[ 0], w[ 1], selector); - w[14] = hc_byte_perm ( 0, w[ 0], selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm (w[47], w[48], selector); - w[62] = hc_byte_perm (w[46], w[47], selector); - w[61] = hc_byte_perm (w[45], w[46], selector); - w[60] = hc_byte_perm (w[44], w[45], selector); - w[59] = hc_byte_perm (w[43], w[44], selector); - w[58] = hc_byte_perm (w[42], w[43], selector); - w[57] = hc_byte_perm (w[41], w[42], selector); - w[56] = hc_byte_perm (w[40], w[41], selector); - w[55] = hc_byte_perm (w[39], w[40], selector); - w[54] = hc_byte_perm (w[38], w[39], selector); - w[53] = hc_byte_perm (w[37], w[38], selector); - w[52] = hc_byte_perm (w[36], w[37], selector); - w[51] = hc_byte_perm (w[35], w[36], selector); - w[50] = hc_byte_perm (w[34], w[35], selector); - w[49] = hc_byte_perm (w[33], w[34], selector); - w[48] = hc_byte_perm (w[32], w[33], selector); - w[47] = hc_byte_perm (w[31], w[32], selector); - w[46] = hc_byte_perm (w[30], w[31], selector); - w[45] = hc_byte_perm (w[29], w[30], selector); - w[44] = hc_byte_perm (w[28], w[29], selector); - w[43] = hc_byte_perm (w[27], w[28], selector); - w[42] = hc_byte_perm (w[26], w[27], selector); - w[41] = hc_byte_perm (w[25], w[26], selector); - w[40] = hc_byte_perm (w[24], w[25], selector); - w[39] = hc_byte_perm (w[23], w[24], selector); - w[38] = hc_byte_perm (w[22], w[23], selector); - w[37] = hc_byte_perm (w[21], w[22], selector); - w[36] = hc_byte_perm (w[20], w[21], selector); - w[35] = hc_byte_perm (w[19], w[20], selector); - w[34] = hc_byte_perm (w[18], w[19], selector); - w[33] = hc_byte_perm (w[17], w[18], selector); - w[32] = hc_byte_perm (w[16], w[17], selector); - w[31] = hc_byte_perm (w[15], w[16], selector); - w[30] = hc_byte_perm (w[14], w[15], selector); - w[29] = hc_byte_perm (w[13], w[14], selector); - w[28] = hc_byte_perm (w[12], w[13], selector); - w[27] = hc_byte_perm (w[11], w[12], selector); - w[26] = hc_byte_perm (w[10], w[11], selector); - w[25] = hc_byte_perm (w[ 9], w[10], selector); - w[24] = hc_byte_perm (w[ 8], w[ 9], selector); - w[23] = hc_byte_perm (w[ 7], w[ 8], selector); - w[22] = hc_byte_perm (w[ 6], w[ 7], selector); - w[21] = hc_byte_perm (w[ 5], w[ 6], selector); - w[20] = hc_byte_perm (w[ 4], w[ 5], selector); - w[19] = hc_byte_perm (w[ 3], w[ 4], selector); - w[18] = hc_byte_perm (w[ 2], w[ 3], selector); - w[17] = hc_byte_perm (w[ 1], w[ 2], selector); - w[16] = hc_byte_perm (w[ 0], w[ 1], selector); - w[15] = hc_byte_perm ( 0, w[ 0], selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm (w[46], w[47], selector); - w[62] = hc_byte_perm (w[45], w[46], selector); - w[61] = hc_byte_perm (w[44], w[45], selector); - w[60] = hc_byte_perm (w[43], w[44], selector); - w[59] = hc_byte_perm (w[42], w[43], selector); - w[58] = hc_byte_perm (w[41], w[42], selector); - w[57] = hc_byte_perm (w[40], w[41], selector); - w[56] = hc_byte_perm (w[39], w[40], selector); - w[55] = hc_byte_perm (w[38], w[39], selector); - w[54] = hc_byte_perm (w[37], w[38], selector); - w[53] = hc_byte_perm (w[36], w[37], selector); - w[52] = hc_byte_perm (w[35], w[36], selector); - w[51] = hc_byte_perm (w[34], w[35], selector); - w[50] = hc_byte_perm (w[33], w[34], selector); - w[49] = hc_byte_perm (w[32], w[33], selector); - w[48] = hc_byte_perm (w[31], w[32], selector); - w[47] = hc_byte_perm (w[30], w[31], selector); - w[46] = hc_byte_perm (w[29], w[30], selector); - w[45] = hc_byte_perm (w[28], w[29], selector); - w[44] = hc_byte_perm (w[27], w[28], selector); - w[43] = hc_byte_perm (w[26], w[27], selector); - w[42] = hc_byte_perm (w[25], w[26], selector); - w[41] = hc_byte_perm (w[24], w[25], selector); - w[40] = hc_byte_perm (w[23], w[24], selector); - w[39] = hc_byte_perm (w[22], w[23], selector); - w[38] = hc_byte_perm (w[21], w[22], selector); - w[37] = hc_byte_perm (w[20], w[21], selector); - w[36] = hc_byte_perm (w[19], w[20], selector); - w[35] = hc_byte_perm (w[18], w[19], selector); - w[34] = hc_byte_perm (w[17], w[18], selector); - w[33] = hc_byte_perm (w[16], w[17], selector); - w[32] = hc_byte_perm (w[15], w[16], selector); - w[31] = hc_byte_perm (w[14], w[15], selector); - w[30] = hc_byte_perm (w[13], w[14], selector); - w[29] = hc_byte_perm (w[12], w[13], selector); - w[28] = hc_byte_perm (w[11], w[12], selector); - w[27] = hc_byte_perm (w[10], w[11], selector); - w[26] = hc_byte_perm (w[ 9], w[10], selector); - w[25] = hc_byte_perm (w[ 8], w[ 9], selector); - w[24] = hc_byte_perm (w[ 7], w[ 8], selector); - w[23] = hc_byte_perm (w[ 6], w[ 7], selector); - w[22] = hc_byte_perm (w[ 5], w[ 6], selector); - w[21] = hc_byte_perm (w[ 4], w[ 5], selector); - w[20] = hc_byte_perm (w[ 3], w[ 4], selector); - w[19] = hc_byte_perm (w[ 2], w[ 3], selector); - w[18] = hc_byte_perm (w[ 1], w[ 2], selector); - w[17] = hc_byte_perm (w[ 0], w[ 1], selector); - w[16] = hc_byte_perm ( 0, w[ 0], selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm (w[45], w[46], selector); - w[62] = hc_byte_perm (w[44], w[45], selector); - w[61] = hc_byte_perm (w[43], w[44], selector); - w[60] = hc_byte_perm (w[42], w[43], selector); - w[59] = hc_byte_perm (w[41], w[42], selector); - w[58] = hc_byte_perm (w[40], w[41], selector); - w[57] = hc_byte_perm (w[39], w[40], selector); - w[56] = hc_byte_perm (w[38], w[39], selector); - w[55] = hc_byte_perm (w[37], w[38], selector); - w[54] = hc_byte_perm (w[36], w[37], selector); - w[53] = hc_byte_perm (w[35], w[36], selector); - w[52] = hc_byte_perm (w[34], w[35], selector); - w[51] = hc_byte_perm (w[33], w[34], selector); - w[50] = hc_byte_perm (w[32], w[33], selector); - w[49] = hc_byte_perm (w[31], w[32], selector); - w[48] = hc_byte_perm (w[30], w[31], selector); - w[47] = hc_byte_perm (w[29], w[30], selector); - w[46] = hc_byte_perm (w[28], w[29], selector); - w[45] = hc_byte_perm (w[27], w[28], selector); - w[44] = hc_byte_perm (w[26], w[27], selector); - w[43] = hc_byte_perm (w[25], w[26], selector); - w[42] = hc_byte_perm (w[24], w[25], selector); - w[41] = hc_byte_perm (w[23], w[24], selector); - w[40] = hc_byte_perm (w[22], w[23], selector); - w[39] = hc_byte_perm (w[21], w[22], selector); - w[38] = hc_byte_perm (w[20], w[21], selector); - w[37] = hc_byte_perm (w[19], w[20], selector); - w[36] = hc_byte_perm (w[18], w[19], selector); - w[35] = hc_byte_perm (w[17], w[18], selector); - w[34] = hc_byte_perm (w[16], w[17], selector); - w[33] = hc_byte_perm (w[15], w[16], selector); - w[32] = hc_byte_perm (w[14], w[15], selector); - w[31] = hc_byte_perm (w[13], w[14], selector); - w[30] = hc_byte_perm (w[12], w[13], selector); - w[29] = hc_byte_perm (w[11], w[12], selector); - w[28] = hc_byte_perm (w[10], w[11], selector); - w[27] = hc_byte_perm (w[ 9], w[10], selector); - w[26] = hc_byte_perm (w[ 8], w[ 9], selector); - w[25] = hc_byte_perm (w[ 7], w[ 8], selector); - w[24] = hc_byte_perm (w[ 6], w[ 7], selector); - w[23] = hc_byte_perm (w[ 5], w[ 6], selector); - w[22] = hc_byte_perm (w[ 4], w[ 5], selector); - w[21] = hc_byte_perm (w[ 3], w[ 4], selector); - w[20] = hc_byte_perm (w[ 2], w[ 3], selector); - w[19] = hc_byte_perm (w[ 1], w[ 2], selector); - w[18] = hc_byte_perm (w[ 0], w[ 1], selector); - w[17] = hc_byte_perm ( 0, w[ 0], selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm (w[44], w[45], selector); - w[62] = hc_byte_perm (w[43], w[44], selector); - w[61] = hc_byte_perm (w[42], w[43], selector); - w[60] = hc_byte_perm (w[41], w[42], selector); - w[59] = hc_byte_perm (w[40], w[41], selector); - w[58] = hc_byte_perm (w[39], w[40], selector); - w[57] = hc_byte_perm (w[38], w[39], selector); - w[56] = hc_byte_perm (w[37], w[38], selector); - w[55] = hc_byte_perm (w[36], w[37], selector); - w[54] = hc_byte_perm (w[35], w[36], selector); - w[53] = hc_byte_perm (w[34], w[35], selector); - w[52] = hc_byte_perm (w[33], w[34], selector); - w[51] = hc_byte_perm (w[32], w[33], selector); - w[50] = hc_byte_perm (w[31], w[32], selector); - w[49] = hc_byte_perm (w[30], w[31], selector); - w[48] = hc_byte_perm (w[29], w[30], selector); - w[47] = hc_byte_perm (w[28], w[29], selector); - w[46] = hc_byte_perm (w[27], w[28], selector); - w[45] = hc_byte_perm (w[26], w[27], selector); - w[44] = hc_byte_perm (w[25], w[26], selector); - w[43] = hc_byte_perm (w[24], w[25], selector); - w[42] = hc_byte_perm (w[23], w[24], selector); - w[41] = hc_byte_perm (w[22], w[23], selector); - w[40] = hc_byte_perm (w[21], w[22], selector); - w[39] = hc_byte_perm (w[20], w[21], selector); - w[38] = hc_byte_perm (w[19], w[20], selector); - w[37] = hc_byte_perm (w[18], w[19], selector); - w[36] = hc_byte_perm (w[17], w[18], selector); - w[35] = hc_byte_perm (w[16], w[17], selector); - w[34] = hc_byte_perm (w[15], w[16], selector); - w[33] = hc_byte_perm (w[14], w[15], selector); - w[32] = hc_byte_perm (w[13], w[14], selector); - w[31] = hc_byte_perm (w[12], w[13], selector); - w[30] = hc_byte_perm (w[11], w[12], selector); - w[29] = hc_byte_perm (w[10], w[11], selector); - w[28] = hc_byte_perm (w[ 9], w[10], selector); - w[27] = hc_byte_perm (w[ 8], w[ 9], selector); - w[26] = hc_byte_perm (w[ 7], w[ 8], selector); - w[25] = hc_byte_perm (w[ 6], w[ 7], selector); - w[24] = hc_byte_perm (w[ 5], w[ 6], selector); - w[23] = hc_byte_perm (w[ 4], w[ 5], selector); - w[22] = hc_byte_perm (w[ 3], w[ 4], selector); - w[21] = hc_byte_perm (w[ 2], w[ 3], selector); - w[20] = hc_byte_perm (w[ 1], w[ 2], selector); - w[19] = hc_byte_perm (w[ 0], w[ 1], selector); - w[18] = hc_byte_perm ( 0, w[ 0], selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm (w[43], w[44], selector); - w[62] = hc_byte_perm (w[42], w[43], selector); - w[61] = hc_byte_perm (w[41], w[42], selector); - w[60] = hc_byte_perm (w[40], w[41], selector); - w[59] = hc_byte_perm (w[39], w[40], selector); - w[58] = hc_byte_perm (w[38], w[39], selector); - w[57] = hc_byte_perm (w[37], w[38], selector); - w[56] = hc_byte_perm (w[36], w[37], selector); - w[55] = hc_byte_perm (w[35], w[36], selector); - w[54] = hc_byte_perm (w[34], w[35], selector); - w[53] = hc_byte_perm (w[33], w[34], selector); - w[52] = hc_byte_perm (w[32], w[33], selector); - w[51] = hc_byte_perm (w[31], w[32], selector); - w[50] = hc_byte_perm (w[30], w[31], selector); - w[49] = hc_byte_perm (w[29], w[30], selector); - w[48] = hc_byte_perm (w[28], w[29], selector); - w[47] = hc_byte_perm (w[27], w[28], selector); - w[46] = hc_byte_perm (w[26], w[27], selector); - w[45] = hc_byte_perm (w[25], w[26], selector); - w[44] = hc_byte_perm (w[24], w[25], selector); - w[43] = hc_byte_perm (w[23], w[24], selector); - w[42] = hc_byte_perm (w[22], w[23], selector); - w[41] = hc_byte_perm (w[21], w[22], selector); - w[40] = hc_byte_perm (w[20], w[21], selector); - w[39] = hc_byte_perm (w[19], w[20], selector); - w[38] = hc_byte_perm (w[18], w[19], selector); - w[37] = hc_byte_perm (w[17], w[18], selector); - w[36] = hc_byte_perm (w[16], w[17], selector); - w[35] = hc_byte_perm (w[15], w[16], selector); - w[34] = hc_byte_perm (w[14], w[15], selector); - w[33] = hc_byte_perm (w[13], w[14], selector); - w[32] = hc_byte_perm (w[12], w[13], selector); - w[31] = hc_byte_perm (w[11], w[12], selector); - w[30] = hc_byte_perm (w[10], w[11], selector); - w[29] = hc_byte_perm (w[ 9], w[10], selector); - w[28] = hc_byte_perm (w[ 8], w[ 9], selector); - w[27] = hc_byte_perm (w[ 7], w[ 8], selector); - w[26] = hc_byte_perm (w[ 6], w[ 7], selector); - w[25] = hc_byte_perm (w[ 5], w[ 6], selector); - w[24] = hc_byte_perm (w[ 4], w[ 5], selector); - w[23] = hc_byte_perm (w[ 3], w[ 4], selector); - w[22] = hc_byte_perm (w[ 2], w[ 3], selector); - w[21] = hc_byte_perm (w[ 1], w[ 2], selector); - w[20] = hc_byte_perm (w[ 0], w[ 1], selector); - w[19] = hc_byte_perm ( 0, w[ 0], selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm (w[42], w[43], selector); - w[62] = hc_byte_perm (w[41], w[42], selector); - w[61] = hc_byte_perm (w[40], w[41], selector); - w[60] = hc_byte_perm (w[39], w[40], selector); - w[59] = hc_byte_perm (w[38], w[39], selector); - w[58] = hc_byte_perm (w[37], w[38], selector); - w[57] = hc_byte_perm (w[36], w[37], selector); - w[56] = hc_byte_perm (w[35], w[36], selector); - w[55] = hc_byte_perm (w[34], w[35], selector); - w[54] = hc_byte_perm (w[33], w[34], selector); - w[53] = hc_byte_perm (w[32], w[33], selector); - w[52] = hc_byte_perm (w[31], w[32], selector); - w[51] = hc_byte_perm (w[30], w[31], selector); - w[50] = hc_byte_perm (w[29], w[30], selector); - w[49] = hc_byte_perm (w[28], w[29], selector); - w[48] = hc_byte_perm (w[27], w[28], selector); - w[47] = hc_byte_perm (w[26], w[27], selector); - w[46] = hc_byte_perm (w[25], w[26], selector); - w[45] = hc_byte_perm (w[24], w[25], selector); - w[44] = hc_byte_perm (w[23], w[24], selector); - w[43] = hc_byte_perm (w[22], w[23], selector); - w[42] = hc_byte_perm (w[21], w[22], selector); - w[41] = hc_byte_perm (w[20], w[21], selector); - w[40] = hc_byte_perm (w[19], w[20], selector); - w[39] = hc_byte_perm (w[18], w[19], selector); - w[38] = hc_byte_perm (w[17], w[18], selector); - w[37] = hc_byte_perm (w[16], w[17], selector); - w[36] = hc_byte_perm (w[15], w[16], selector); - w[35] = hc_byte_perm (w[14], w[15], selector); - w[34] = hc_byte_perm (w[13], w[14], selector); - w[33] = hc_byte_perm (w[12], w[13], selector); - w[32] = hc_byte_perm (w[11], w[12], selector); - w[31] = hc_byte_perm (w[10], w[11], selector); - w[30] = hc_byte_perm (w[ 9], w[10], selector); - w[29] = hc_byte_perm (w[ 8], w[ 9], selector); - w[28] = hc_byte_perm (w[ 7], w[ 8], selector); - w[27] = hc_byte_perm (w[ 6], w[ 7], selector); - w[26] = hc_byte_perm (w[ 5], w[ 6], selector); - w[25] = hc_byte_perm (w[ 4], w[ 5], selector); - w[24] = hc_byte_perm (w[ 3], w[ 4], selector); - w[23] = hc_byte_perm (w[ 2], w[ 3], selector); - w[22] = hc_byte_perm (w[ 1], w[ 2], selector); - w[21] = hc_byte_perm (w[ 0], w[ 1], selector); - w[20] = hc_byte_perm ( 0, w[ 0], selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm (w[41], w[42], selector); - w[62] = hc_byte_perm (w[40], w[41], selector); - w[61] = hc_byte_perm (w[39], w[40], selector); - w[60] = hc_byte_perm (w[38], w[39], selector); - w[59] = hc_byte_perm (w[37], w[38], selector); - w[58] = hc_byte_perm (w[36], w[37], selector); - w[57] = hc_byte_perm (w[35], w[36], selector); - w[56] = hc_byte_perm (w[34], w[35], selector); - w[55] = hc_byte_perm (w[33], w[34], selector); - w[54] = hc_byte_perm (w[32], w[33], selector); - w[53] = hc_byte_perm (w[31], w[32], selector); - w[52] = hc_byte_perm (w[30], w[31], selector); - w[51] = hc_byte_perm (w[29], w[30], selector); - w[50] = hc_byte_perm (w[28], w[29], selector); - w[49] = hc_byte_perm (w[27], w[28], selector); - w[48] = hc_byte_perm (w[26], w[27], selector); - w[47] = hc_byte_perm (w[25], w[26], selector); - w[46] = hc_byte_perm (w[24], w[25], selector); - w[45] = hc_byte_perm (w[23], w[24], selector); - w[44] = hc_byte_perm (w[22], w[23], selector); - w[43] = hc_byte_perm (w[21], w[22], selector); - w[42] = hc_byte_perm (w[20], w[21], selector); - w[41] = hc_byte_perm (w[19], w[20], selector); - w[40] = hc_byte_perm (w[18], w[19], selector); - w[39] = hc_byte_perm (w[17], w[18], selector); - w[38] = hc_byte_perm (w[16], w[17], selector); - w[37] = hc_byte_perm (w[15], w[16], selector); - w[36] = hc_byte_perm (w[14], w[15], selector); - w[35] = hc_byte_perm (w[13], w[14], selector); - w[34] = hc_byte_perm (w[12], w[13], selector); - w[33] = hc_byte_perm (w[11], w[12], selector); - w[32] = hc_byte_perm (w[10], w[11], selector); - w[31] = hc_byte_perm (w[ 9], w[10], selector); - w[30] = hc_byte_perm (w[ 8], w[ 9], selector); - w[29] = hc_byte_perm (w[ 7], w[ 8], selector); - w[28] = hc_byte_perm (w[ 6], w[ 7], selector); - w[27] = hc_byte_perm (w[ 5], w[ 6], selector); - w[26] = hc_byte_perm (w[ 4], w[ 5], selector); - w[25] = hc_byte_perm (w[ 3], w[ 4], selector); - w[24] = hc_byte_perm (w[ 2], w[ 3], selector); - w[23] = hc_byte_perm (w[ 1], w[ 2], selector); - w[22] = hc_byte_perm (w[ 0], w[ 1], selector); - w[21] = hc_byte_perm ( 0, w[ 0], selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm (w[40], w[41], selector); - w[62] = hc_byte_perm (w[39], w[40], selector); - w[61] = hc_byte_perm (w[38], w[39], selector); - w[60] = hc_byte_perm (w[37], w[38], selector); - w[59] = hc_byte_perm (w[36], w[37], selector); - w[58] = hc_byte_perm (w[35], w[36], selector); - w[57] = hc_byte_perm (w[34], w[35], selector); - w[56] = hc_byte_perm (w[33], w[34], selector); - w[55] = hc_byte_perm (w[32], w[33], selector); - w[54] = hc_byte_perm (w[31], w[32], selector); - w[53] = hc_byte_perm (w[30], w[31], selector); - w[52] = hc_byte_perm (w[29], w[30], selector); - w[51] = hc_byte_perm (w[28], w[29], selector); - w[50] = hc_byte_perm (w[27], w[28], selector); - w[49] = hc_byte_perm (w[26], w[27], selector); - w[48] = hc_byte_perm (w[25], w[26], selector); - w[47] = hc_byte_perm (w[24], w[25], selector); - w[46] = hc_byte_perm (w[23], w[24], selector); - w[45] = hc_byte_perm (w[22], w[23], selector); - w[44] = hc_byte_perm (w[21], w[22], selector); - w[43] = hc_byte_perm (w[20], w[21], selector); - w[42] = hc_byte_perm (w[19], w[20], selector); - w[41] = hc_byte_perm (w[18], w[19], selector); - w[40] = hc_byte_perm (w[17], w[18], selector); - w[39] = hc_byte_perm (w[16], w[17], selector); - w[38] = hc_byte_perm (w[15], w[16], selector); - w[37] = hc_byte_perm (w[14], w[15], selector); - w[36] = hc_byte_perm (w[13], w[14], selector); - w[35] = hc_byte_perm (w[12], w[13], selector); - w[34] = hc_byte_perm (w[11], w[12], selector); - w[33] = hc_byte_perm (w[10], w[11], selector); - w[32] = hc_byte_perm (w[ 9], w[10], selector); - w[31] = hc_byte_perm (w[ 8], w[ 9], selector); - w[30] = hc_byte_perm (w[ 7], w[ 8], selector); - w[29] = hc_byte_perm (w[ 6], w[ 7], selector); - w[28] = hc_byte_perm (w[ 5], w[ 6], selector); - w[27] = hc_byte_perm (w[ 4], w[ 5], selector); - w[26] = hc_byte_perm (w[ 3], w[ 4], selector); - w[25] = hc_byte_perm (w[ 2], w[ 3], selector); - w[24] = hc_byte_perm (w[ 1], w[ 2], selector); - w[23] = hc_byte_perm (w[ 0], w[ 1], selector); - w[22] = hc_byte_perm ( 0, w[ 0], selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm (w[39], w[40], selector); - w[62] = hc_byte_perm (w[38], w[39], selector); - w[61] = hc_byte_perm (w[37], w[38], selector); - w[60] = hc_byte_perm (w[36], w[37], selector); - w[59] = hc_byte_perm (w[35], w[36], selector); - w[58] = hc_byte_perm (w[34], w[35], selector); - w[57] = hc_byte_perm (w[33], w[34], selector); - w[56] = hc_byte_perm (w[32], w[33], selector); - w[55] = hc_byte_perm (w[31], w[32], selector); - w[54] = hc_byte_perm (w[30], w[31], selector); - w[53] = hc_byte_perm (w[29], w[30], selector); - w[52] = hc_byte_perm (w[28], w[29], selector); - w[51] = hc_byte_perm (w[27], w[28], selector); - w[50] = hc_byte_perm (w[26], w[27], selector); - w[49] = hc_byte_perm (w[25], w[26], selector); - w[48] = hc_byte_perm (w[24], w[25], selector); - w[47] = hc_byte_perm (w[23], w[24], selector); - w[46] = hc_byte_perm (w[22], w[23], selector); - w[45] = hc_byte_perm (w[21], w[22], selector); - w[44] = hc_byte_perm (w[20], w[21], selector); - w[43] = hc_byte_perm (w[19], w[20], selector); - w[42] = hc_byte_perm (w[18], w[19], selector); - w[41] = hc_byte_perm (w[17], w[18], selector); - w[40] = hc_byte_perm (w[16], w[17], selector); - w[39] = hc_byte_perm (w[15], w[16], selector); - w[38] = hc_byte_perm (w[14], w[15], selector); - w[37] = hc_byte_perm (w[13], w[14], selector); - w[36] = hc_byte_perm (w[12], w[13], selector); - w[35] = hc_byte_perm (w[11], w[12], selector); - w[34] = hc_byte_perm (w[10], w[11], selector); - w[33] = hc_byte_perm (w[ 9], w[10], selector); - w[32] = hc_byte_perm (w[ 8], w[ 9], selector); - w[31] = hc_byte_perm (w[ 7], w[ 8], selector); - w[30] = hc_byte_perm (w[ 6], w[ 7], selector); - w[29] = hc_byte_perm (w[ 5], w[ 6], selector); - w[28] = hc_byte_perm (w[ 4], w[ 5], selector); - w[27] = hc_byte_perm (w[ 3], w[ 4], selector); - w[26] = hc_byte_perm (w[ 2], w[ 3], selector); - w[25] = hc_byte_perm (w[ 1], w[ 2], selector); - w[24] = hc_byte_perm (w[ 0], w[ 1], selector); - w[23] = hc_byte_perm ( 0, w[ 0], selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm (w[38], w[39], selector); - w[62] = hc_byte_perm (w[37], w[38], selector); - w[61] = hc_byte_perm (w[36], w[37], selector); - w[60] = hc_byte_perm (w[35], w[36], selector); - w[59] = hc_byte_perm (w[34], w[35], selector); - w[58] = hc_byte_perm (w[33], w[34], selector); - w[57] = hc_byte_perm (w[32], w[33], selector); - w[56] = hc_byte_perm (w[31], w[32], selector); - w[55] = hc_byte_perm (w[30], w[31], selector); - w[54] = hc_byte_perm (w[29], w[30], selector); - w[53] = hc_byte_perm (w[28], w[29], selector); - w[52] = hc_byte_perm (w[27], w[28], selector); - w[51] = hc_byte_perm (w[26], w[27], selector); - w[50] = hc_byte_perm (w[25], w[26], selector); - w[49] = hc_byte_perm (w[24], w[25], selector); - w[48] = hc_byte_perm (w[23], w[24], selector); - w[47] = hc_byte_perm (w[22], w[23], selector); - w[46] = hc_byte_perm (w[21], w[22], selector); - w[45] = hc_byte_perm (w[20], w[21], selector); - w[44] = hc_byte_perm (w[19], w[20], selector); - w[43] = hc_byte_perm (w[18], w[19], selector); - w[42] = hc_byte_perm (w[17], w[18], selector); - w[41] = hc_byte_perm (w[16], w[17], selector); - w[40] = hc_byte_perm (w[15], w[16], selector); - w[39] = hc_byte_perm (w[14], w[15], selector); - w[38] = hc_byte_perm (w[13], w[14], selector); - w[37] = hc_byte_perm (w[12], w[13], selector); - w[36] = hc_byte_perm (w[11], w[12], selector); - w[35] = hc_byte_perm (w[10], w[11], selector); - w[34] = hc_byte_perm (w[ 9], w[10], selector); - w[33] = hc_byte_perm (w[ 8], w[ 9], selector); - w[32] = hc_byte_perm (w[ 7], w[ 8], selector); - w[31] = hc_byte_perm (w[ 6], w[ 7], selector); - w[30] = hc_byte_perm (w[ 5], w[ 6], selector); - w[29] = hc_byte_perm (w[ 4], w[ 5], selector); - w[28] = hc_byte_perm (w[ 3], w[ 4], selector); - w[27] = hc_byte_perm (w[ 2], w[ 3], selector); - w[26] = hc_byte_perm (w[ 1], w[ 2], selector); - w[25] = hc_byte_perm (w[ 0], w[ 1], selector); - w[24] = hc_byte_perm ( 0, w[ 0], selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm (w[37], w[38], selector); - w[62] = hc_byte_perm (w[36], w[37], selector); - w[61] = hc_byte_perm (w[35], w[36], selector); - w[60] = hc_byte_perm (w[34], w[35], selector); - w[59] = hc_byte_perm (w[33], w[34], selector); - w[58] = hc_byte_perm (w[32], w[33], selector); - w[57] = hc_byte_perm (w[31], w[32], selector); - w[56] = hc_byte_perm (w[30], w[31], selector); - w[55] = hc_byte_perm (w[29], w[30], selector); - w[54] = hc_byte_perm (w[28], w[29], selector); - w[53] = hc_byte_perm (w[27], w[28], selector); - w[52] = hc_byte_perm (w[26], w[27], selector); - w[51] = hc_byte_perm (w[25], w[26], selector); - w[50] = hc_byte_perm (w[24], w[25], selector); - w[49] = hc_byte_perm (w[23], w[24], selector); - w[48] = hc_byte_perm (w[22], w[23], selector); - w[47] = hc_byte_perm (w[21], w[22], selector); - w[46] = hc_byte_perm (w[20], w[21], selector); - w[45] = hc_byte_perm (w[19], w[20], selector); - w[44] = hc_byte_perm (w[18], w[19], selector); - w[43] = hc_byte_perm (w[17], w[18], selector); - w[42] = hc_byte_perm (w[16], w[17], selector); - w[41] = hc_byte_perm (w[15], w[16], selector); - w[40] = hc_byte_perm (w[14], w[15], selector); - w[39] = hc_byte_perm (w[13], w[14], selector); - w[38] = hc_byte_perm (w[12], w[13], selector); - w[37] = hc_byte_perm (w[11], w[12], selector); - w[36] = hc_byte_perm (w[10], w[11], selector); - w[35] = hc_byte_perm (w[ 9], w[10], selector); - w[34] = hc_byte_perm (w[ 8], w[ 9], selector); - w[33] = hc_byte_perm (w[ 7], w[ 8], selector); - w[32] = hc_byte_perm (w[ 6], w[ 7], selector); - w[31] = hc_byte_perm (w[ 5], w[ 6], selector); - w[30] = hc_byte_perm (w[ 4], w[ 5], selector); - w[29] = hc_byte_perm (w[ 3], w[ 4], selector); - w[28] = hc_byte_perm (w[ 2], w[ 3], selector); - w[27] = hc_byte_perm (w[ 1], w[ 2], selector); - w[26] = hc_byte_perm (w[ 0], w[ 1], selector); - w[25] = hc_byte_perm ( 0, w[ 0], selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm (w[36], w[37], selector); - w[62] = hc_byte_perm (w[35], w[36], selector); - w[61] = hc_byte_perm (w[34], w[35], selector); - w[60] = hc_byte_perm (w[33], w[34], selector); - w[59] = hc_byte_perm (w[32], w[33], selector); - w[58] = hc_byte_perm (w[31], w[32], selector); - w[57] = hc_byte_perm (w[30], w[31], selector); - w[56] = hc_byte_perm (w[29], w[30], selector); - w[55] = hc_byte_perm (w[28], w[29], selector); - w[54] = hc_byte_perm (w[27], w[28], selector); - w[53] = hc_byte_perm (w[26], w[27], selector); - w[52] = hc_byte_perm (w[25], w[26], selector); - w[51] = hc_byte_perm (w[24], w[25], selector); - w[50] = hc_byte_perm (w[23], w[24], selector); - w[49] = hc_byte_perm (w[22], w[23], selector); - w[48] = hc_byte_perm (w[21], w[22], selector); - w[47] = hc_byte_perm (w[20], w[21], selector); - w[46] = hc_byte_perm (w[19], w[20], selector); - w[45] = hc_byte_perm (w[18], w[19], selector); - w[44] = hc_byte_perm (w[17], w[18], selector); - w[43] = hc_byte_perm (w[16], w[17], selector); - w[42] = hc_byte_perm (w[15], w[16], selector); - w[41] = hc_byte_perm (w[14], w[15], selector); - w[40] = hc_byte_perm (w[13], w[14], selector); - w[39] = hc_byte_perm (w[12], w[13], selector); - w[38] = hc_byte_perm (w[11], w[12], selector); - w[37] = hc_byte_perm (w[10], w[11], selector); - w[36] = hc_byte_perm (w[ 9], w[10], selector); - w[35] = hc_byte_perm (w[ 8], w[ 9], selector); - w[34] = hc_byte_perm (w[ 7], w[ 8], selector); - w[33] = hc_byte_perm (w[ 6], w[ 7], selector); - w[32] = hc_byte_perm (w[ 5], w[ 6], selector); - w[31] = hc_byte_perm (w[ 4], w[ 5], selector); - w[30] = hc_byte_perm (w[ 3], w[ 4], selector); - w[29] = hc_byte_perm (w[ 2], w[ 3], selector); - w[28] = hc_byte_perm (w[ 1], w[ 2], selector); - w[27] = hc_byte_perm (w[ 0], w[ 1], selector); - w[26] = hc_byte_perm ( 0, w[ 0], selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm (w[35], w[36], selector); - w[62] = hc_byte_perm (w[34], w[35], selector); - w[61] = hc_byte_perm (w[33], w[34], selector); - w[60] = hc_byte_perm (w[32], w[33], selector); - w[59] = hc_byte_perm (w[31], w[32], selector); - w[58] = hc_byte_perm (w[30], w[31], selector); - w[57] = hc_byte_perm (w[29], w[30], selector); - w[56] = hc_byte_perm (w[28], w[29], selector); - w[55] = hc_byte_perm (w[27], w[28], selector); - w[54] = hc_byte_perm (w[26], w[27], selector); - w[53] = hc_byte_perm (w[25], w[26], selector); - w[52] = hc_byte_perm (w[24], w[25], selector); - w[51] = hc_byte_perm (w[23], w[24], selector); - w[50] = hc_byte_perm (w[22], w[23], selector); - w[49] = hc_byte_perm (w[21], w[22], selector); - w[48] = hc_byte_perm (w[20], w[21], selector); - w[47] = hc_byte_perm (w[19], w[20], selector); - w[46] = hc_byte_perm (w[18], w[19], selector); - w[45] = hc_byte_perm (w[17], w[18], selector); - w[44] = hc_byte_perm (w[16], w[17], selector); - w[43] = hc_byte_perm (w[15], w[16], selector); - w[42] = hc_byte_perm (w[14], w[15], selector); - w[41] = hc_byte_perm (w[13], w[14], selector); - w[40] = hc_byte_perm (w[12], w[13], selector); - w[39] = hc_byte_perm (w[11], w[12], selector); - w[38] = hc_byte_perm (w[10], w[11], selector); - w[37] = hc_byte_perm (w[ 9], w[10], selector); - w[36] = hc_byte_perm (w[ 8], w[ 9], selector); - w[35] = hc_byte_perm (w[ 7], w[ 8], selector); - w[34] = hc_byte_perm (w[ 6], w[ 7], selector); - w[33] = hc_byte_perm (w[ 5], w[ 6], selector); - w[32] = hc_byte_perm (w[ 4], w[ 5], selector); - w[31] = hc_byte_perm (w[ 3], w[ 4], selector); - w[30] = hc_byte_perm (w[ 2], w[ 3], selector); - w[29] = hc_byte_perm (w[ 1], w[ 2], selector); - w[28] = hc_byte_perm (w[ 0], w[ 1], selector); - w[27] = hc_byte_perm ( 0, w[ 0], selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm (w[34], w[35], selector); - w[62] = hc_byte_perm (w[33], w[34], selector); - w[61] = hc_byte_perm (w[32], w[33], selector); - w[60] = hc_byte_perm (w[31], w[32], selector); - w[59] = hc_byte_perm (w[30], w[31], selector); - w[58] = hc_byte_perm (w[29], w[30], selector); - w[57] = hc_byte_perm (w[28], w[29], selector); - w[56] = hc_byte_perm (w[27], w[28], selector); - w[55] = hc_byte_perm (w[26], w[27], selector); - w[54] = hc_byte_perm (w[25], w[26], selector); - w[53] = hc_byte_perm (w[24], w[25], selector); - w[52] = hc_byte_perm (w[23], w[24], selector); - w[51] = hc_byte_perm (w[22], w[23], selector); - w[50] = hc_byte_perm (w[21], w[22], selector); - w[49] = hc_byte_perm (w[20], w[21], selector); - w[48] = hc_byte_perm (w[19], w[20], selector); - w[47] = hc_byte_perm (w[18], w[19], selector); - w[46] = hc_byte_perm (w[17], w[18], selector); - w[45] = hc_byte_perm (w[16], w[17], selector); - w[44] = hc_byte_perm (w[15], w[16], selector); - w[43] = hc_byte_perm (w[14], w[15], selector); - w[42] = hc_byte_perm (w[13], w[14], selector); - w[41] = hc_byte_perm (w[12], w[13], selector); - w[40] = hc_byte_perm (w[11], w[12], selector); - w[39] = hc_byte_perm (w[10], w[11], selector); - w[38] = hc_byte_perm (w[ 9], w[10], selector); - w[37] = hc_byte_perm (w[ 8], w[ 9], selector); - w[36] = hc_byte_perm (w[ 7], w[ 8], selector); - w[35] = hc_byte_perm (w[ 6], w[ 7], selector); - w[34] = hc_byte_perm (w[ 5], w[ 6], selector); - w[33] = hc_byte_perm (w[ 4], w[ 5], selector); - w[32] = hc_byte_perm (w[ 3], w[ 4], selector); - w[31] = hc_byte_perm (w[ 2], w[ 3], selector); - w[30] = hc_byte_perm (w[ 1], w[ 2], selector); - w[29] = hc_byte_perm (w[ 0], w[ 1], selector); - w[28] = hc_byte_perm ( 0, w[ 0], selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm (w[33], w[34], selector); - w[62] = hc_byte_perm (w[32], w[33], selector); - w[61] = hc_byte_perm (w[31], w[32], selector); - w[60] = hc_byte_perm (w[30], w[31], selector); - w[59] = hc_byte_perm (w[29], w[30], selector); - w[58] = hc_byte_perm (w[28], w[29], selector); - w[57] = hc_byte_perm (w[27], w[28], selector); - w[56] = hc_byte_perm (w[26], w[27], selector); - w[55] = hc_byte_perm (w[25], w[26], selector); - w[54] = hc_byte_perm (w[24], w[25], selector); - w[53] = hc_byte_perm (w[23], w[24], selector); - w[52] = hc_byte_perm (w[22], w[23], selector); - w[51] = hc_byte_perm (w[21], w[22], selector); - w[50] = hc_byte_perm (w[20], w[21], selector); - w[49] = hc_byte_perm (w[19], w[20], selector); - w[48] = hc_byte_perm (w[18], w[19], selector); - w[47] = hc_byte_perm (w[17], w[18], selector); - w[46] = hc_byte_perm (w[16], w[17], selector); - w[45] = hc_byte_perm (w[15], w[16], selector); - w[44] = hc_byte_perm (w[14], w[15], selector); - w[43] = hc_byte_perm (w[13], w[14], selector); - w[42] = hc_byte_perm (w[12], w[13], selector); - w[41] = hc_byte_perm (w[11], w[12], selector); - w[40] = hc_byte_perm (w[10], w[11], selector); - w[39] = hc_byte_perm (w[ 9], w[10], selector); - w[38] = hc_byte_perm (w[ 8], w[ 9], selector); - w[37] = hc_byte_perm (w[ 7], w[ 8], selector); - w[36] = hc_byte_perm (w[ 6], w[ 7], selector); - w[35] = hc_byte_perm (w[ 5], w[ 6], selector); - w[34] = hc_byte_perm (w[ 4], w[ 5], selector); - w[33] = hc_byte_perm (w[ 3], w[ 4], selector); - w[32] = hc_byte_perm (w[ 2], w[ 3], selector); - w[31] = hc_byte_perm (w[ 1], w[ 2], selector); - w[30] = hc_byte_perm (w[ 0], w[ 1], selector); - w[29] = hc_byte_perm ( 0, w[ 0], selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm (w[32], w[33], selector); - w[62] = hc_byte_perm (w[31], w[32], selector); - w[61] = hc_byte_perm (w[30], w[31], selector); - w[60] = hc_byte_perm (w[29], w[30], selector); - w[59] = hc_byte_perm (w[28], w[29], selector); - w[58] = hc_byte_perm (w[27], w[28], selector); - w[57] = hc_byte_perm (w[26], w[27], selector); - w[56] = hc_byte_perm (w[25], w[26], selector); - w[55] = hc_byte_perm (w[24], w[25], selector); - w[54] = hc_byte_perm (w[23], w[24], selector); - w[53] = hc_byte_perm (w[22], w[23], selector); - w[52] = hc_byte_perm (w[21], w[22], selector); - w[51] = hc_byte_perm (w[20], w[21], selector); - w[50] = hc_byte_perm (w[19], w[20], selector); - w[49] = hc_byte_perm (w[18], w[19], selector); - w[48] = hc_byte_perm (w[17], w[18], selector); - w[47] = hc_byte_perm (w[16], w[17], selector); - w[46] = hc_byte_perm (w[15], w[16], selector); - w[45] = hc_byte_perm (w[14], w[15], selector); - w[44] = hc_byte_perm (w[13], w[14], selector); - w[43] = hc_byte_perm (w[12], w[13], selector); - w[42] = hc_byte_perm (w[11], w[12], selector); - w[41] = hc_byte_perm (w[10], w[11], selector); - w[40] = hc_byte_perm (w[ 9], w[10], selector); - w[39] = hc_byte_perm (w[ 8], w[ 9], selector); - w[38] = hc_byte_perm (w[ 7], w[ 8], selector); - w[37] = hc_byte_perm (w[ 6], w[ 7], selector); - w[36] = hc_byte_perm (w[ 5], w[ 6], selector); - w[35] = hc_byte_perm (w[ 4], w[ 5], selector); - w[34] = hc_byte_perm (w[ 3], w[ 4], selector); - w[33] = hc_byte_perm (w[ 2], w[ 3], selector); - w[32] = hc_byte_perm (w[ 1], w[ 2], selector); - w[31] = hc_byte_perm (w[ 0], w[ 1], selector); - w[30] = hc_byte_perm ( 0, w[ 0], selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm (w[31], w[32], selector); - w[62] = hc_byte_perm (w[30], w[31], selector); - w[61] = hc_byte_perm (w[29], w[30], selector); - w[60] = hc_byte_perm (w[28], w[29], selector); - w[59] = hc_byte_perm (w[27], w[28], selector); - w[58] = hc_byte_perm (w[26], w[27], selector); - w[57] = hc_byte_perm (w[25], w[26], selector); - w[56] = hc_byte_perm (w[24], w[25], selector); - w[55] = hc_byte_perm (w[23], w[24], selector); - w[54] = hc_byte_perm (w[22], w[23], selector); - w[53] = hc_byte_perm (w[21], w[22], selector); - w[52] = hc_byte_perm (w[20], w[21], selector); - w[51] = hc_byte_perm (w[19], w[20], selector); - w[50] = hc_byte_perm (w[18], w[19], selector); - w[49] = hc_byte_perm (w[17], w[18], selector); - w[48] = hc_byte_perm (w[16], w[17], selector); - w[47] = hc_byte_perm (w[15], w[16], selector); - w[46] = hc_byte_perm (w[14], w[15], selector); - w[45] = hc_byte_perm (w[13], w[14], selector); - w[44] = hc_byte_perm (w[12], w[13], selector); - w[43] = hc_byte_perm (w[11], w[12], selector); - w[42] = hc_byte_perm (w[10], w[11], selector); - w[41] = hc_byte_perm (w[ 9], w[10], selector); - w[40] = hc_byte_perm (w[ 8], w[ 9], selector); - w[39] = hc_byte_perm (w[ 7], w[ 8], selector); - w[38] = hc_byte_perm (w[ 6], w[ 7], selector); - w[37] = hc_byte_perm (w[ 5], w[ 6], selector); - w[36] = hc_byte_perm (w[ 4], w[ 5], selector); - w[35] = hc_byte_perm (w[ 3], w[ 4], selector); - w[34] = hc_byte_perm (w[ 2], w[ 3], selector); - w[33] = hc_byte_perm (w[ 1], w[ 2], selector); - w[32] = hc_byte_perm (w[ 0], w[ 1], selector); - w[31] = hc_byte_perm ( 0, w[ 0], selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm (w[30], w[31], selector); - w[62] = hc_byte_perm (w[29], w[30], selector); - w[61] = hc_byte_perm (w[28], w[29], selector); - w[60] = hc_byte_perm (w[27], w[28], selector); - w[59] = hc_byte_perm (w[26], w[27], selector); - w[58] = hc_byte_perm (w[25], w[26], selector); - w[57] = hc_byte_perm (w[24], w[25], selector); - w[56] = hc_byte_perm (w[23], w[24], selector); - w[55] = hc_byte_perm (w[22], w[23], selector); - w[54] = hc_byte_perm (w[21], w[22], selector); - w[53] = hc_byte_perm (w[20], w[21], selector); - w[52] = hc_byte_perm (w[19], w[20], selector); - w[51] = hc_byte_perm (w[18], w[19], selector); - w[50] = hc_byte_perm (w[17], w[18], selector); - w[49] = hc_byte_perm (w[16], w[17], selector); - w[48] = hc_byte_perm (w[15], w[16], selector); - w[47] = hc_byte_perm (w[14], w[15], selector); - w[46] = hc_byte_perm (w[13], w[14], selector); - w[45] = hc_byte_perm (w[12], w[13], selector); - w[44] = hc_byte_perm (w[11], w[12], selector); - w[43] = hc_byte_perm (w[10], w[11], selector); - w[42] = hc_byte_perm (w[ 9], w[10], selector); - w[41] = hc_byte_perm (w[ 8], w[ 9], selector); - w[40] = hc_byte_perm (w[ 7], w[ 8], selector); - w[39] = hc_byte_perm (w[ 6], w[ 7], selector); - w[38] = hc_byte_perm (w[ 5], w[ 6], selector); - w[37] = hc_byte_perm (w[ 4], w[ 5], selector); - w[36] = hc_byte_perm (w[ 3], w[ 4], selector); - w[35] = hc_byte_perm (w[ 2], w[ 3], selector); - w[34] = hc_byte_perm (w[ 1], w[ 2], selector); - w[33] = hc_byte_perm (w[ 0], w[ 1], selector); - w[32] = hc_byte_perm ( 0, w[ 0], selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm (w[29], w[30], selector); - w[62] = hc_byte_perm (w[28], w[29], selector); - w[61] = hc_byte_perm (w[27], w[28], selector); - w[60] = hc_byte_perm (w[26], w[27], selector); - w[59] = hc_byte_perm (w[25], w[26], selector); - w[58] = hc_byte_perm (w[24], w[25], selector); - w[57] = hc_byte_perm (w[23], w[24], selector); - w[56] = hc_byte_perm (w[22], w[23], selector); - w[55] = hc_byte_perm (w[21], w[22], selector); - w[54] = hc_byte_perm (w[20], w[21], selector); - w[53] = hc_byte_perm (w[19], w[20], selector); - w[52] = hc_byte_perm (w[18], w[19], selector); - w[51] = hc_byte_perm (w[17], w[18], selector); - w[50] = hc_byte_perm (w[16], w[17], selector); - w[49] = hc_byte_perm (w[15], w[16], selector); - w[48] = hc_byte_perm (w[14], w[15], selector); - w[47] = hc_byte_perm (w[13], w[14], selector); - w[46] = hc_byte_perm (w[12], w[13], selector); - w[45] = hc_byte_perm (w[11], w[12], selector); - w[44] = hc_byte_perm (w[10], w[11], selector); - w[43] = hc_byte_perm (w[ 9], w[10], selector); - w[42] = hc_byte_perm (w[ 8], w[ 9], selector); - w[41] = hc_byte_perm (w[ 7], w[ 8], selector); - w[40] = hc_byte_perm (w[ 6], w[ 7], selector); - w[39] = hc_byte_perm (w[ 5], w[ 6], selector); - w[38] = hc_byte_perm (w[ 4], w[ 5], selector); - w[37] = hc_byte_perm (w[ 3], w[ 4], selector); - w[36] = hc_byte_perm (w[ 2], w[ 3], selector); - w[35] = hc_byte_perm (w[ 1], w[ 2], selector); - w[34] = hc_byte_perm (w[ 0], w[ 1], selector); - w[33] = hc_byte_perm ( 0, w[ 0], selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm (w[28], w[29], selector); - w[62] = hc_byte_perm (w[27], w[28], selector); - w[61] = hc_byte_perm (w[26], w[27], selector); - w[60] = hc_byte_perm (w[25], w[26], selector); - w[59] = hc_byte_perm (w[24], w[25], selector); - w[58] = hc_byte_perm (w[23], w[24], selector); - w[57] = hc_byte_perm (w[22], w[23], selector); - w[56] = hc_byte_perm (w[21], w[22], selector); - w[55] = hc_byte_perm (w[20], w[21], selector); - w[54] = hc_byte_perm (w[19], w[20], selector); - w[53] = hc_byte_perm (w[18], w[19], selector); - w[52] = hc_byte_perm (w[17], w[18], selector); - w[51] = hc_byte_perm (w[16], w[17], selector); - w[50] = hc_byte_perm (w[15], w[16], selector); - w[49] = hc_byte_perm (w[14], w[15], selector); - w[48] = hc_byte_perm (w[13], w[14], selector); - w[47] = hc_byte_perm (w[12], w[13], selector); - w[46] = hc_byte_perm (w[11], w[12], selector); - w[45] = hc_byte_perm (w[10], w[11], selector); - w[44] = hc_byte_perm (w[ 9], w[10], selector); - w[43] = hc_byte_perm (w[ 8], w[ 9], selector); - w[42] = hc_byte_perm (w[ 7], w[ 8], selector); - w[41] = hc_byte_perm (w[ 6], w[ 7], selector); - w[40] = hc_byte_perm (w[ 5], w[ 6], selector); - w[39] = hc_byte_perm (w[ 4], w[ 5], selector); - w[38] = hc_byte_perm (w[ 3], w[ 4], selector); - w[37] = hc_byte_perm (w[ 2], w[ 3], selector); - w[36] = hc_byte_perm (w[ 1], w[ 2], selector); - w[35] = hc_byte_perm (w[ 0], w[ 1], selector); - w[34] = hc_byte_perm ( 0, w[ 0], selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm (w[27], w[28], selector); - w[62] = hc_byte_perm (w[26], w[27], selector); - w[61] = hc_byte_perm (w[25], w[26], selector); - w[60] = hc_byte_perm (w[24], w[25], selector); - w[59] = hc_byte_perm (w[23], w[24], selector); - w[58] = hc_byte_perm (w[22], w[23], selector); - w[57] = hc_byte_perm (w[21], w[22], selector); - w[56] = hc_byte_perm (w[20], w[21], selector); - w[55] = hc_byte_perm (w[19], w[20], selector); - w[54] = hc_byte_perm (w[18], w[19], selector); - w[53] = hc_byte_perm (w[17], w[18], selector); - w[52] = hc_byte_perm (w[16], w[17], selector); - w[51] = hc_byte_perm (w[15], w[16], selector); - w[50] = hc_byte_perm (w[14], w[15], selector); - w[49] = hc_byte_perm (w[13], w[14], selector); - w[48] = hc_byte_perm (w[12], w[13], selector); - w[47] = hc_byte_perm (w[11], w[12], selector); - w[46] = hc_byte_perm (w[10], w[11], selector); - w[45] = hc_byte_perm (w[ 9], w[10], selector); - w[44] = hc_byte_perm (w[ 8], w[ 9], selector); - w[43] = hc_byte_perm (w[ 7], w[ 8], selector); - w[42] = hc_byte_perm (w[ 6], w[ 7], selector); - w[41] = hc_byte_perm (w[ 5], w[ 6], selector); - w[40] = hc_byte_perm (w[ 4], w[ 5], selector); - w[39] = hc_byte_perm (w[ 3], w[ 4], selector); - w[38] = hc_byte_perm (w[ 2], w[ 3], selector); - w[37] = hc_byte_perm (w[ 1], w[ 2], selector); - w[36] = hc_byte_perm (w[ 0], w[ 1], selector); - w[35] = hc_byte_perm ( 0, w[ 0], selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm (w[26], w[27], selector); - w[62] = hc_byte_perm (w[25], w[26], selector); - w[61] = hc_byte_perm (w[24], w[25], selector); - w[60] = hc_byte_perm (w[23], w[24], selector); - w[59] = hc_byte_perm (w[22], w[23], selector); - w[58] = hc_byte_perm (w[21], w[22], selector); - w[57] = hc_byte_perm (w[20], w[21], selector); - w[56] = hc_byte_perm (w[19], w[20], selector); - w[55] = hc_byte_perm (w[18], w[19], selector); - w[54] = hc_byte_perm (w[17], w[18], selector); - w[53] = hc_byte_perm (w[16], w[17], selector); - w[52] = hc_byte_perm (w[15], w[16], selector); - w[51] = hc_byte_perm (w[14], w[15], selector); - w[50] = hc_byte_perm (w[13], w[14], selector); - w[49] = hc_byte_perm (w[12], w[13], selector); - w[48] = hc_byte_perm (w[11], w[12], selector); - w[47] = hc_byte_perm (w[10], w[11], selector); - w[46] = hc_byte_perm (w[ 9], w[10], selector); - w[45] = hc_byte_perm (w[ 8], w[ 9], selector); - w[44] = hc_byte_perm (w[ 7], w[ 8], selector); - w[43] = hc_byte_perm (w[ 6], w[ 7], selector); - w[42] = hc_byte_perm (w[ 5], w[ 6], selector); - w[41] = hc_byte_perm (w[ 4], w[ 5], selector); - w[40] = hc_byte_perm (w[ 3], w[ 4], selector); - w[39] = hc_byte_perm (w[ 2], w[ 3], selector); - w[38] = hc_byte_perm (w[ 1], w[ 2], selector); - w[37] = hc_byte_perm (w[ 0], w[ 1], selector); - w[36] = hc_byte_perm ( 0, w[ 0], selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm (w[25], w[26], selector); - w[62] = hc_byte_perm (w[24], w[25], selector); - w[61] = hc_byte_perm (w[23], w[24], selector); - w[60] = hc_byte_perm (w[22], w[23], selector); - w[59] = hc_byte_perm (w[21], w[22], selector); - w[58] = hc_byte_perm (w[20], w[21], selector); - w[57] = hc_byte_perm (w[19], w[20], selector); - w[56] = hc_byte_perm (w[18], w[19], selector); - w[55] = hc_byte_perm (w[17], w[18], selector); - w[54] = hc_byte_perm (w[16], w[17], selector); - w[53] = hc_byte_perm (w[15], w[16], selector); - w[52] = hc_byte_perm (w[14], w[15], selector); - w[51] = hc_byte_perm (w[13], w[14], selector); - w[50] = hc_byte_perm (w[12], w[13], selector); - w[49] = hc_byte_perm (w[11], w[12], selector); - w[48] = hc_byte_perm (w[10], w[11], selector); - w[47] = hc_byte_perm (w[ 9], w[10], selector); - w[46] = hc_byte_perm (w[ 8], w[ 9], selector); - w[45] = hc_byte_perm (w[ 7], w[ 8], selector); - w[44] = hc_byte_perm (w[ 6], w[ 7], selector); - w[43] = hc_byte_perm (w[ 5], w[ 6], selector); - w[42] = hc_byte_perm (w[ 4], w[ 5], selector); - w[41] = hc_byte_perm (w[ 3], w[ 4], selector); - w[40] = hc_byte_perm (w[ 2], w[ 3], selector); - w[39] = hc_byte_perm (w[ 1], w[ 2], selector); - w[38] = hc_byte_perm (w[ 0], w[ 1], selector); - w[37] = hc_byte_perm ( 0, w[ 0], selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm (w[24], w[25], selector); - w[62] = hc_byte_perm (w[23], w[24], selector); - w[61] = hc_byte_perm (w[22], w[23], selector); - w[60] = hc_byte_perm (w[21], w[22], selector); - w[59] = hc_byte_perm (w[20], w[21], selector); - w[58] = hc_byte_perm (w[19], w[20], selector); - w[57] = hc_byte_perm (w[18], w[19], selector); - w[56] = hc_byte_perm (w[17], w[18], selector); - w[55] = hc_byte_perm (w[16], w[17], selector); - w[54] = hc_byte_perm (w[15], w[16], selector); - w[53] = hc_byte_perm (w[14], w[15], selector); - w[52] = hc_byte_perm (w[13], w[14], selector); - w[51] = hc_byte_perm (w[12], w[13], selector); - w[50] = hc_byte_perm (w[11], w[12], selector); - w[49] = hc_byte_perm (w[10], w[11], selector); - w[48] = hc_byte_perm (w[ 9], w[10], selector); - w[47] = hc_byte_perm (w[ 8], w[ 9], selector); - w[46] = hc_byte_perm (w[ 7], w[ 8], selector); - w[45] = hc_byte_perm (w[ 6], w[ 7], selector); - w[44] = hc_byte_perm (w[ 5], w[ 6], selector); - w[43] = hc_byte_perm (w[ 4], w[ 5], selector); - w[42] = hc_byte_perm (w[ 3], w[ 4], selector); - w[41] = hc_byte_perm (w[ 2], w[ 3], selector); - w[40] = hc_byte_perm (w[ 1], w[ 2], selector); - w[39] = hc_byte_perm (w[ 0], w[ 1], selector); - w[38] = hc_byte_perm ( 0, w[ 0], selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm (w[23], w[24], selector); - w[62] = hc_byte_perm (w[22], w[23], selector); - w[61] = hc_byte_perm (w[21], w[22], selector); - w[60] = hc_byte_perm (w[20], w[21], selector); - w[59] = hc_byte_perm (w[19], w[20], selector); - w[58] = hc_byte_perm (w[18], w[19], selector); - w[57] = hc_byte_perm (w[17], w[18], selector); - w[56] = hc_byte_perm (w[16], w[17], selector); - w[55] = hc_byte_perm (w[15], w[16], selector); - w[54] = hc_byte_perm (w[14], w[15], selector); - w[53] = hc_byte_perm (w[13], w[14], selector); - w[52] = hc_byte_perm (w[12], w[13], selector); - w[51] = hc_byte_perm (w[11], w[12], selector); - w[50] = hc_byte_perm (w[10], w[11], selector); - w[49] = hc_byte_perm (w[ 9], w[10], selector); - w[48] = hc_byte_perm (w[ 8], w[ 9], selector); - w[47] = hc_byte_perm (w[ 7], w[ 8], selector); - w[46] = hc_byte_perm (w[ 6], w[ 7], selector); - w[45] = hc_byte_perm (w[ 5], w[ 6], selector); - w[44] = hc_byte_perm (w[ 4], w[ 5], selector); - w[43] = hc_byte_perm (w[ 3], w[ 4], selector); - w[42] = hc_byte_perm (w[ 2], w[ 3], selector); - w[41] = hc_byte_perm (w[ 1], w[ 2], selector); - w[40] = hc_byte_perm (w[ 0], w[ 1], selector); - w[39] = hc_byte_perm ( 0, w[ 0], selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm (w[22], w[23], selector); - w[62] = hc_byte_perm (w[21], w[22], selector); - w[61] = hc_byte_perm (w[20], w[21], selector); - w[60] = hc_byte_perm (w[19], w[20], selector); - w[59] = hc_byte_perm (w[18], w[19], selector); - w[58] = hc_byte_perm (w[17], w[18], selector); - w[57] = hc_byte_perm (w[16], w[17], selector); - w[56] = hc_byte_perm (w[15], w[16], selector); - w[55] = hc_byte_perm (w[14], w[15], selector); - w[54] = hc_byte_perm (w[13], w[14], selector); - w[53] = hc_byte_perm (w[12], w[13], selector); - w[52] = hc_byte_perm (w[11], w[12], selector); - w[51] = hc_byte_perm (w[10], w[11], selector); - w[50] = hc_byte_perm (w[ 9], w[10], selector); - w[49] = hc_byte_perm (w[ 8], w[ 9], selector); - w[48] = hc_byte_perm (w[ 7], w[ 8], selector); - w[47] = hc_byte_perm (w[ 6], w[ 7], selector); - w[46] = hc_byte_perm (w[ 5], w[ 6], selector); - w[45] = hc_byte_perm (w[ 4], w[ 5], selector); - w[44] = hc_byte_perm (w[ 3], w[ 4], selector); - w[43] = hc_byte_perm (w[ 2], w[ 3], selector); - w[42] = hc_byte_perm (w[ 1], w[ 2], selector); - w[41] = hc_byte_perm (w[ 0], w[ 1], selector); - w[40] = hc_byte_perm ( 0, w[ 0], selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm (w[21], w[22], selector); - w[62] = hc_byte_perm (w[20], w[21], selector); - w[61] = hc_byte_perm (w[19], w[20], selector); - w[60] = hc_byte_perm (w[18], w[19], selector); - w[59] = hc_byte_perm (w[17], w[18], selector); - w[58] = hc_byte_perm (w[16], w[17], selector); - w[57] = hc_byte_perm (w[15], w[16], selector); - w[56] = hc_byte_perm (w[14], w[15], selector); - w[55] = hc_byte_perm (w[13], w[14], selector); - w[54] = hc_byte_perm (w[12], w[13], selector); - w[53] = hc_byte_perm (w[11], w[12], selector); - w[52] = hc_byte_perm (w[10], w[11], selector); - w[51] = hc_byte_perm (w[ 9], w[10], selector); - w[50] = hc_byte_perm (w[ 8], w[ 9], selector); - w[49] = hc_byte_perm (w[ 7], w[ 8], selector); - w[48] = hc_byte_perm (w[ 6], w[ 7], selector); - w[47] = hc_byte_perm (w[ 5], w[ 6], selector); - w[46] = hc_byte_perm (w[ 4], w[ 5], selector); - w[45] = hc_byte_perm (w[ 3], w[ 4], selector); - w[44] = hc_byte_perm (w[ 2], w[ 3], selector); - w[43] = hc_byte_perm (w[ 1], w[ 2], selector); - w[42] = hc_byte_perm (w[ 0], w[ 1], selector); - w[41] = hc_byte_perm ( 0, w[ 0], selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm (w[20], w[21], selector); - w[62] = hc_byte_perm (w[19], w[20], selector); - w[61] = hc_byte_perm (w[18], w[19], selector); - w[60] = hc_byte_perm (w[17], w[18], selector); - w[59] = hc_byte_perm (w[16], w[17], selector); - w[58] = hc_byte_perm (w[15], w[16], selector); - w[57] = hc_byte_perm (w[14], w[15], selector); - w[56] = hc_byte_perm (w[13], w[14], selector); - w[55] = hc_byte_perm (w[12], w[13], selector); - w[54] = hc_byte_perm (w[11], w[12], selector); - w[53] = hc_byte_perm (w[10], w[11], selector); - w[52] = hc_byte_perm (w[ 9], w[10], selector); - w[51] = hc_byte_perm (w[ 8], w[ 9], selector); - w[50] = hc_byte_perm (w[ 7], w[ 8], selector); - w[49] = hc_byte_perm (w[ 6], w[ 7], selector); - w[48] = hc_byte_perm (w[ 5], w[ 6], selector); - w[47] = hc_byte_perm (w[ 4], w[ 5], selector); - w[46] = hc_byte_perm (w[ 3], w[ 4], selector); - w[45] = hc_byte_perm (w[ 2], w[ 3], selector); - w[44] = hc_byte_perm (w[ 1], w[ 2], selector); - w[43] = hc_byte_perm (w[ 0], w[ 1], selector); - w[42] = hc_byte_perm ( 0, w[ 0], selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm (w[19], w[20], selector); - w[62] = hc_byte_perm (w[18], w[19], selector); - w[61] = hc_byte_perm (w[17], w[18], selector); - w[60] = hc_byte_perm (w[16], w[17], selector); - w[59] = hc_byte_perm (w[15], w[16], selector); - w[58] = hc_byte_perm (w[14], w[15], selector); - w[57] = hc_byte_perm (w[13], w[14], selector); - w[56] = hc_byte_perm (w[12], w[13], selector); - w[55] = hc_byte_perm (w[11], w[12], selector); - w[54] = hc_byte_perm (w[10], w[11], selector); - w[53] = hc_byte_perm (w[ 9], w[10], selector); - w[52] = hc_byte_perm (w[ 8], w[ 9], selector); - w[51] = hc_byte_perm (w[ 7], w[ 8], selector); - w[50] = hc_byte_perm (w[ 6], w[ 7], selector); - w[49] = hc_byte_perm (w[ 5], w[ 6], selector); - w[48] = hc_byte_perm (w[ 4], w[ 5], selector); - w[47] = hc_byte_perm (w[ 3], w[ 4], selector); - w[46] = hc_byte_perm (w[ 2], w[ 3], selector); - w[45] = hc_byte_perm (w[ 1], w[ 2], selector); - w[44] = hc_byte_perm (w[ 0], w[ 1], selector); - w[43] = hc_byte_perm ( 0, w[ 0], selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm (w[18], w[19], selector); - w[62] = hc_byte_perm (w[17], w[18], selector); - w[61] = hc_byte_perm (w[16], w[17], selector); - w[60] = hc_byte_perm (w[15], w[16], selector); - w[59] = hc_byte_perm (w[14], w[15], selector); - w[58] = hc_byte_perm (w[13], w[14], selector); - w[57] = hc_byte_perm (w[12], w[13], selector); - w[56] = hc_byte_perm (w[11], w[12], selector); - w[55] = hc_byte_perm (w[10], w[11], selector); - w[54] = hc_byte_perm (w[ 9], w[10], selector); - w[53] = hc_byte_perm (w[ 8], w[ 9], selector); - w[52] = hc_byte_perm (w[ 7], w[ 8], selector); - w[51] = hc_byte_perm (w[ 6], w[ 7], selector); - w[50] = hc_byte_perm (w[ 5], w[ 6], selector); - w[49] = hc_byte_perm (w[ 4], w[ 5], selector); - w[48] = hc_byte_perm (w[ 3], w[ 4], selector); - w[47] = hc_byte_perm (w[ 2], w[ 3], selector); - w[46] = hc_byte_perm (w[ 1], w[ 2], selector); - w[45] = hc_byte_perm (w[ 0], w[ 1], selector); - w[44] = hc_byte_perm ( 0, w[ 0], selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm (w[17], w[18], selector); - w[62] = hc_byte_perm (w[16], w[17], selector); - w[61] = hc_byte_perm (w[15], w[16], selector); - w[60] = hc_byte_perm (w[14], w[15], selector); - w[59] = hc_byte_perm (w[13], w[14], selector); - w[58] = hc_byte_perm (w[12], w[13], selector); - w[57] = hc_byte_perm (w[11], w[12], selector); - w[56] = hc_byte_perm (w[10], w[11], selector); - w[55] = hc_byte_perm (w[ 9], w[10], selector); - w[54] = hc_byte_perm (w[ 8], w[ 9], selector); - w[53] = hc_byte_perm (w[ 7], w[ 8], selector); - w[52] = hc_byte_perm (w[ 6], w[ 7], selector); - w[51] = hc_byte_perm (w[ 5], w[ 6], selector); - w[50] = hc_byte_perm (w[ 4], w[ 5], selector); - w[49] = hc_byte_perm (w[ 3], w[ 4], selector); - w[48] = hc_byte_perm (w[ 2], w[ 3], selector); - w[47] = hc_byte_perm (w[ 1], w[ 2], selector); - w[46] = hc_byte_perm (w[ 0], w[ 1], selector); - w[45] = hc_byte_perm ( 0, w[ 0], selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm (w[16], w[17], selector); - w[62] = hc_byte_perm (w[15], w[16], selector); - w[61] = hc_byte_perm (w[14], w[15], selector); - w[60] = hc_byte_perm (w[13], w[14], selector); - w[59] = hc_byte_perm (w[12], w[13], selector); - w[58] = hc_byte_perm (w[11], w[12], selector); - w[57] = hc_byte_perm (w[10], w[11], selector); - w[56] = hc_byte_perm (w[ 9], w[10], selector); - w[55] = hc_byte_perm (w[ 8], w[ 9], selector); - w[54] = hc_byte_perm (w[ 7], w[ 8], selector); - w[53] = hc_byte_perm (w[ 6], w[ 7], selector); - w[52] = hc_byte_perm (w[ 5], w[ 6], selector); - w[51] = hc_byte_perm (w[ 4], w[ 5], selector); - w[50] = hc_byte_perm (w[ 3], w[ 4], selector); - w[49] = hc_byte_perm (w[ 2], w[ 3], selector); - w[48] = hc_byte_perm (w[ 1], w[ 2], selector); - w[47] = hc_byte_perm (w[ 0], w[ 1], selector); - w[46] = hc_byte_perm ( 0, w[ 0], selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm (w[15], w[16], selector); - w[62] = hc_byte_perm (w[14], w[15], selector); - w[61] = hc_byte_perm (w[13], w[14], selector); - w[60] = hc_byte_perm (w[12], w[13], selector); - w[59] = hc_byte_perm (w[11], w[12], selector); - w[58] = hc_byte_perm (w[10], w[11], selector); - w[57] = hc_byte_perm (w[ 9], w[10], selector); - w[56] = hc_byte_perm (w[ 8], w[ 9], selector); - w[55] = hc_byte_perm (w[ 7], w[ 8], selector); - w[54] = hc_byte_perm (w[ 6], w[ 7], selector); - w[53] = hc_byte_perm (w[ 5], w[ 6], selector); - w[52] = hc_byte_perm (w[ 4], w[ 5], selector); - w[51] = hc_byte_perm (w[ 3], w[ 4], selector); - w[50] = hc_byte_perm (w[ 2], w[ 3], selector); - w[49] = hc_byte_perm (w[ 1], w[ 2], selector); - w[48] = hc_byte_perm (w[ 0], w[ 1], selector); - w[47] = hc_byte_perm ( 0, w[ 0], selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm (w[14], w[15], selector); - w[62] = hc_byte_perm (w[13], w[14], selector); - w[61] = hc_byte_perm (w[12], w[13], selector); - w[60] = hc_byte_perm (w[11], w[12], selector); - w[59] = hc_byte_perm (w[10], w[11], selector); - w[58] = hc_byte_perm (w[ 9], w[10], selector); - w[57] = hc_byte_perm (w[ 8], w[ 9], selector); - w[56] = hc_byte_perm (w[ 7], w[ 8], selector); - w[55] = hc_byte_perm (w[ 6], w[ 7], selector); - w[54] = hc_byte_perm (w[ 5], w[ 6], selector); - w[53] = hc_byte_perm (w[ 4], w[ 5], selector); - w[52] = hc_byte_perm (w[ 3], w[ 4], selector); - w[51] = hc_byte_perm (w[ 2], w[ 3], selector); - w[50] = hc_byte_perm (w[ 1], w[ 2], selector); - w[49] = hc_byte_perm (w[ 0], w[ 1], selector); - w[48] = hc_byte_perm ( 0, w[ 0], selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm (w[13], w[14], selector); - w[62] = hc_byte_perm (w[12], w[13], selector); - w[61] = hc_byte_perm (w[11], w[12], selector); - w[60] = hc_byte_perm (w[10], w[11], selector); - w[59] = hc_byte_perm (w[ 9], w[10], selector); - w[58] = hc_byte_perm (w[ 8], w[ 9], selector); - w[57] = hc_byte_perm (w[ 7], w[ 8], selector); - w[56] = hc_byte_perm (w[ 6], w[ 7], selector); - w[55] = hc_byte_perm (w[ 5], w[ 6], selector); - w[54] = hc_byte_perm (w[ 4], w[ 5], selector); - w[53] = hc_byte_perm (w[ 3], w[ 4], selector); - w[52] = hc_byte_perm (w[ 2], w[ 3], selector); - w[51] = hc_byte_perm (w[ 1], w[ 2], selector); - w[50] = hc_byte_perm (w[ 0], w[ 1], selector); - w[49] = hc_byte_perm ( 0, w[ 0], selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm (w[12], w[13], selector); - w[62] = hc_byte_perm (w[11], w[12], selector); - w[61] = hc_byte_perm (w[10], w[11], selector); - w[60] = hc_byte_perm (w[ 9], w[10], selector); - w[59] = hc_byte_perm (w[ 8], w[ 9], selector); - w[58] = hc_byte_perm (w[ 7], w[ 8], selector); - w[57] = hc_byte_perm (w[ 6], w[ 7], selector); - w[56] = hc_byte_perm (w[ 5], w[ 6], selector); - w[55] = hc_byte_perm (w[ 4], w[ 5], selector); - w[54] = hc_byte_perm (w[ 3], w[ 4], selector); - w[53] = hc_byte_perm (w[ 2], w[ 3], selector); - w[52] = hc_byte_perm (w[ 1], w[ 2], selector); - w[51] = hc_byte_perm (w[ 0], w[ 1], selector); - w[50] = hc_byte_perm ( 0, w[ 0], selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm (w[11], w[12], selector); - w[62] = hc_byte_perm (w[10], w[11], selector); - w[61] = hc_byte_perm (w[ 9], w[10], selector); - w[60] = hc_byte_perm (w[ 8], w[ 9], selector); - w[59] = hc_byte_perm (w[ 7], w[ 8], selector); - w[58] = hc_byte_perm (w[ 6], w[ 7], selector); - w[57] = hc_byte_perm (w[ 5], w[ 6], selector); - w[56] = hc_byte_perm (w[ 4], w[ 5], selector); - w[55] = hc_byte_perm (w[ 3], w[ 4], selector); - w[54] = hc_byte_perm (w[ 2], w[ 3], selector); - w[53] = hc_byte_perm (w[ 1], w[ 2], selector); - w[52] = hc_byte_perm (w[ 0], w[ 1], selector); - w[51] = hc_byte_perm ( 0, w[ 0], selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm (w[10], w[11], selector); - w[62] = hc_byte_perm (w[ 9], w[10], selector); - w[61] = hc_byte_perm (w[ 8], w[ 9], selector); - w[60] = hc_byte_perm (w[ 7], w[ 8], selector); - w[59] = hc_byte_perm (w[ 6], w[ 7], selector); - w[58] = hc_byte_perm (w[ 5], w[ 6], selector); - w[57] = hc_byte_perm (w[ 4], w[ 5], selector); - w[56] = hc_byte_perm (w[ 3], w[ 4], selector); - w[55] = hc_byte_perm (w[ 2], w[ 3], selector); - w[54] = hc_byte_perm (w[ 1], w[ 2], selector); - w[53] = hc_byte_perm (w[ 0], w[ 1], selector); - w[52] = hc_byte_perm ( 0, w[ 0], selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm (w[ 9], w[10], selector); - w[62] = hc_byte_perm (w[ 8], w[ 9], selector); - w[61] = hc_byte_perm (w[ 7], w[ 8], selector); - w[60] = hc_byte_perm (w[ 6], w[ 7], selector); - w[59] = hc_byte_perm (w[ 5], w[ 6], selector); - w[58] = hc_byte_perm (w[ 4], w[ 5], selector); - w[57] = hc_byte_perm (w[ 3], w[ 4], selector); - w[56] = hc_byte_perm (w[ 2], w[ 3], selector); - w[55] = hc_byte_perm (w[ 1], w[ 2], selector); - w[54] = hc_byte_perm (w[ 0], w[ 1], selector); - w[53] = hc_byte_perm ( 0, w[ 0], selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm (w[ 8], w[ 9], selector); - w[62] = hc_byte_perm (w[ 7], w[ 8], selector); - w[61] = hc_byte_perm (w[ 6], w[ 7], selector); - w[60] = hc_byte_perm (w[ 5], w[ 6], selector); - w[59] = hc_byte_perm (w[ 4], w[ 5], selector); - w[58] = hc_byte_perm (w[ 3], w[ 4], selector); - w[57] = hc_byte_perm (w[ 2], w[ 3], selector); - w[56] = hc_byte_perm (w[ 1], w[ 2], selector); - w[55] = hc_byte_perm (w[ 0], w[ 1], selector); - w[54] = hc_byte_perm ( 0, w[ 0], selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm (w[ 7], w[ 8], selector); - w[62] = hc_byte_perm (w[ 6], w[ 7], selector); - w[61] = hc_byte_perm (w[ 5], w[ 6], selector); - w[60] = hc_byte_perm (w[ 4], w[ 5], selector); - w[59] = hc_byte_perm (w[ 3], w[ 4], selector); - w[58] = hc_byte_perm (w[ 2], w[ 3], selector); - w[57] = hc_byte_perm (w[ 1], w[ 2], selector); - w[56] = hc_byte_perm (w[ 0], w[ 1], selector); - w[55] = hc_byte_perm ( 0, w[ 0], selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm (w[ 6], w[ 7], selector); - w[62] = hc_byte_perm (w[ 5], w[ 6], selector); - w[61] = hc_byte_perm (w[ 4], w[ 5], selector); - w[60] = hc_byte_perm (w[ 3], w[ 4], selector); - w[59] = hc_byte_perm (w[ 2], w[ 3], selector); - w[58] = hc_byte_perm (w[ 1], w[ 2], selector); - w[57] = hc_byte_perm (w[ 0], w[ 1], selector); - w[56] = hc_byte_perm ( 0, w[ 0], selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm (w[ 5], w[ 6], selector); - w[62] = hc_byte_perm (w[ 4], w[ 5], selector); - w[61] = hc_byte_perm (w[ 3], w[ 4], selector); - w[60] = hc_byte_perm (w[ 2], w[ 3], selector); - w[59] = hc_byte_perm (w[ 1], w[ 2], selector); - w[58] = hc_byte_perm (w[ 0], w[ 1], selector); - w[57] = hc_byte_perm ( 0, w[ 0], selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm (w[ 4], w[ 5], selector); - w[62] = hc_byte_perm (w[ 3], w[ 4], selector); - w[61] = hc_byte_perm (w[ 2], w[ 3], selector); - w[60] = hc_byte_perm (w[ 1], w[ 2], selector); - w[59] = hc_byte_perm (w[ 0], w[ 1], selector); - w[58] = hc_byte_perm ( 0, w[ 0], selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm (w[ 3], w[ 4], selector); - w[62] = hc_byte_perm (w[ 2], w[ 3], selector); - w[61] = hc_byte_perm (w[ 1], w[ 2], selector); - w[60] = hc_byte_perm (w[ 0], w[ 1], selector); - w[59] = hc_byte_perm ( 0, w[ 0], selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm (w[ 2], w[ 3], selector); - w[62] = hc_byte_perm (w[ 1], w[ 2], selector); - w[61] = hc_byte_perm (w[ 0], w[ 1], selector); - w[60] = hc_byte_perm ( 0, w[ 0], selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm (w[ 1], w[ 2], selector); - w[62] = hc_byte_perm (w[ 0], w[ 1], selector); - w[61] = hc_byte_perm ( 0, w[ 0], selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm (w[ 0], w[ 1], selector); - w[62] = hc_byte_perm ( 0, w[ 0], selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm ( 0, w[ 0], selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_be (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -30530,4373 +19518,6 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (PRIVATE_AS u32x *w, const u32 off break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm (w[63], w[62], selector); - w[62] = hc_byte_perm (w[62], w[61], selector); - w[61] = hc_byte_perm (w[61], w[60], selector); - w[60] = hc_byte_perm (w[60], w[59], selector); - w[59] = hc_byte_perm (w[59], w[58], selector); - w[58] = hc_byte_perm (w[58], w[57], selector); - w[57] = hc_byte_perm (w[57], w[56], selector); - w[56] = hc_byte_perm (w[56], w[55], selector); - w[55] = hc_byte_perm (w[55], w[54], selector); - w[54] = hc_byte_perm (w[54], w[53], selector); - w[53] = hc_byte_perm (w[53], w[52], selector); - w[52] = hc_byte_perm (w[52], w[51], selector); - w[51] = hc_byte_perm (w[51], w[50], selector); - w[50] = hc_byte_perm (w[50], w[49], selector); - w[49] = hc_byte_perm (w[49], w[48], selector); - w[48] = hc_byte_perm (w[48], w[47], selector); - w[47] = hc_byte_perm (w[47], w[46], selector); - w[46] = hc_byte_perm (w[46], w[45], selector); - w[45] = hc_byte_perm (w[45], w[44], selector); - w[44] = hc_byte_perm (w[44], w[43], selector); - w[43] = hc_byte_perm (w[43], w[42], selector); - w[42] = hc_byte_perm (w[42], w[41], selector); - w[41] = hc_byte_perm (w[41], w[40], selector); - w[40] = hc_byte_perm (w[40], w[39], selector); - w[39] = hc_byte_perm (w[39], w[38], selector); - w[38] = hc_byte_perm (w[38], w[37], selector); - w[37] = hc_byte_perm (w[37], w[36], selector); - w[36] = hc_byte_perm (w[36], w[35], selector); - w[35] = hc_byte_perm (w[35], w[34], selector); - w[34] = hc_byte_perm (w[34], w[33], selector); - w[33] = hc_byte_perm (w[33], w[32], selector); - w[32] = hc_byte_perm (w[32], w[31], selector); - w[31] = hc_byte_perm (w[31], w[30], selector); - w[30] = hc_byte_perm (w[30], w[29], selector); - w[29] = hc_byte_perm (w[29], w[28], selector); - w[28] = hc_byte_perm (w[28], w[27], selector); - w[27] = hc_byte_perm (w[27], w[26], selector); - w[26] = hc_byte_perm (w[26], w[25], selector); - w[25] = hc_byte_perm (w[25], w[24], selector); - w[24] = hc_byte_perm (w[24], w[23], selector); - w[23] = hc_byte_perm (w[23], w[22], selector); - w[22] = hc_byte_perm (w[22], w[21], selector); - w[21] = hc_byte_perm (w[21], w[20], selector); - w[20] = hc_byte_perm (w[20], w[19], selector); - w[19] = hc_byte_perm (w[19], w[18], selector); - w[18] = hc_byte_perm (w[18], w[17], selector); - w[17] = hc_byte_perm (w[17], w[16], selector); - w[16] = hc_byte_perm (w[16], w[15], selector); - w[15] = hc_byte_perm (w[15], w[14], selector); - w[14] = hc_byte_perm (w[14], w[13], selector); - w[13] = hc_byte_perm (w[13], w[12], selector); - w[12] = hc_byte_perm (w[12], w[11], selector); - w[11] = hc_byte_perm (w[11], w[10], selector); - w[10] = hc_byte_perm (w[10], w[ 9], selector); - w[ 9] = hc_byte_perm (w[ 9], w[ 8], selector); - w[ 8] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 7] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 6] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 5] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 4] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 3] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 2] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 1] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 0] = hc_byte_perm (w[ 0], 0, selector); - - break; - - case 1: - w[63] = hc_byte_perm (w[62], w[61], selector); - w[62] = hc_byte_perm (w[61], w[60], selector); - w[61] = hc_byte_perm (w[60], w[59], selector); - w[60] = hc_byte_perm (w[59], w[58], selector); - w[59] = hc_byte_perm (w[58], w[57], selector); - w[58] = hc_byte_perm (w[57], w[56], selector); - w[57] = hc_byte_perm (w[56], w[55], selector); - w[56] = hc_byte_perm (w[55], w[54], selector); - w[55] = hc_byte_perm (w[54], w[53], selector); - w[54] = hc_byte_perm (w[53], w[52], selector); - w[53] = hc_byte_perm (w[52], w[51], selector); - w[52] = hc_byte_perm (w[51], w[50], selector); - w[51] = hc_byte_perm (w[50], w[49], selector); - w[50] = hc_byte_perm (w[49], w[48], selector); - w[49] = hc_byte_perm (w[48], w[47], selector); - w[48] = hc_byte_perm (w[47], w[46], selector); - w[47] = hc_byte_perm (w[46], w[45], selector); - w[46] = hc_byte_perm (w[45], w[44], selector); - w[45] = hc_byte_perm (w[44], w[43], selector); - w[44] = hc_byte_perm (w[43], w[42], selector); - w[43] = hc_byte_perm (w[42], w[41], selector); - w[42] = hc_byte_perm (w[41], w[40], selector); - w[41] = hc_byte_perm (w[40], w[39], selector); - w[40] = hc_byte_perm (w[39], w[38], selector); - w[39] = hc_byte_perm (w[38], w[37], selector); - w[38] = hc_byte_perm (w[37], w[36], selector); - w[37] = hc_byte_perm (w[36], w[35], selector); - w[36] = hc_byte_perm (w[35], w[34], selector); - w[35] = hc_byte_perm (w[34], w[33], selector); - w[34] = hc_byte_perm (w[33], w[32], selector); - w[33] = hc_byte_perm (w[32], w[31], selector); - w[32] = hc_byte_perm (w[31], w[30], selector); - w[31] = hc_byte_perm (w[30], w[29], selector); - w[30] = hc_byte_perm (w[29], w[28], selector); - w[29] = hc_byte_perm (w[28], w[27], selector); - w[28] = hc_byte_perm (w[27], w[26], selector); - w[27] = hc_byte_perm (w[26], w[25], selector); - w[26] = hc_byte_perm (w[25], w[24], selector); - w[25] = hc_byte_perm (w[24], w[23], selector); - w[24] = hc_byte_perm (w[23], w[22], selector); - w[23] = hc_byte_perm (w[22], w[21], selector); - w[22] = hc_byte_perm (w[21], w[20], selector); - w[21] = hc_byte_perm (w[20], w[19], selector); - w[20] = hc_byte_perm (w[19], w[18], selector); - w[19] = hc_byte_perm (w[18], w[17], selector); - w[18] = hc_byte_perm (w[17], w[16], selector); - w[17] = hc_byte_perm (w[16], w[15], selector); - w[16] = hc_byte_perm (w[15], w[14], selector); - w[15] = hc_byte_perm (w[14], w[13], selector); - w[14] = hc_byte_perm (w[13], w[12], selector); - w[13] = hc_byte_perm (w[12], w[11], selector); - w[12] = hc_byte_perm (w[11], w[10], selector); - w[11] = hc_byte_perm (w[10], w[ 9], selector); - w[10] = hc_byte_perm (w[ 9], w[ 8], selector); - w[ 9] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 8] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 7] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 6] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 5] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 4] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 3] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 2] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 1] = hc_byte_perm (w[ 0], 0, selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm (w[61], w[60], selector); - w[62] = hc_byte_perm (w[60], w[59], selector); - w[61] = hc_byte_perm (w[59], w[58], selector); - w[60] = hc_byte_perm (w[58], w[57], selector); - w[59] = hc_byte_perm (w[57], w[56], selector); - w[58] = hc_byte_perm (w[56], w[55], selector); - w[57] = hc_byte_perm (w[55], w[54], selector); - w[56] = hc_byte_perm (w[54], w[53], selector); - w[55] = hc_byte_perm (w[53], w[52], selector); - w[54] = hc_byte_perm (w[52], w[51], selector); - w[53] = hc_byte_perm (w[51], w[50], selector); - w[52] = hc_byte_perm (w[50], w[49], selector); - w[51] = hc_byte_perm (w[49], w[48], selector); - w[50] = hc_byte_perm (w[48], w[47], selector); - w[49] = hc_byte_perm (w[47], w[46], selector); - w[48] = hc_byte_perm (w[46], w[45], selector); - w[47] = hc_byte_perm (w[45], w[44], selector); - w[46] = hc_byte_perm (w[44], w[43], selector); - w[45] = hc_byte_perm (w[43], w[42], selector); - w[44] = hc_byte_perm (w[42], w[41], selector); - w[43] = hc_byte_perm (w[41], w[40], selector); - w[42] = hc_byte_perm (w[40], w[39], selector); - w[41] = hc_byte_perm (w[39], w[38], selector); - w[40] = hc_byte_perm (w[38], w[37], selector); - w[39] = hc_byte_perm (w[37], w[36], selector); - w[38] = hc_byte_perm (w[36], w[35], selector); - w[37] = hc_byte_perm (w[35], w[34], selector); - w[36] = hc_byte_perm (w[34], w[33], selector); - w[35] = hc_byte_perm (w[33], w[32], selector); - w[34] = hc_byte_perm (w[32], w[31], selector); - w[33] = hc_byte_perm (w[31], w[30], selector); - w[32] = hc_byte_perm (w[30], w[29], selector); - w[31] = hc_byte_perm (w[29], w[28], selector); - w[30] = hc_byte_perm (w[28], w[27], selector); - w[29] = hc_byte_perm (w[27], w[26], selector); - w[28] = hc_byte_perm (w[26], w[25], selector); - w[27] = hc_byte_perm (w[25], w[24], selector); - w[26] = hc_byte_perm (w[24], w[23], selector); - w[25] = hc_byte_perm (w[23], w[22], selector); - w[24] = hc_byte_perm (w[22], w[21], selector); - w[23] = hc_byte_perm (w[21], w[20], selector); - w[22] = hc_byte_perm (w[20], w[19], selector); - w[21] = hc_byte_perm (w[19], w[18], selector); - w[20] = hc_byte_perm (w[18], w[17], selector); - w[19] = hc_byte_perm (w[17], w[16], selector); - w[18] = hc_byte_perm (w[16], w[15], selector); - w[17] = hc_byte_perm (w[15], w[14], selector); - w[16] = hc_byte_perm (w[14], w[13], selector); - w[15] = hc_byte_perm (w[13], w[12], selector); - w[14] = hc_byte_perm (w[12], w[11], selector); - w[13] = hc_byte_perm (w[11], w[10], selector); - w[12] = hc_byte_perm (w[10], w[ 9], selector); - w[11] = hc_byte_perm (w[ 9], w[ 8], selector); - w[10] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 9] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 8] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 7] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 6] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 5] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 4] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 3] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 2] = hc_byte_perm (w[ 0], 0, selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm (w[60], w[59], selector); - w[62] = hc_byte_perm (w[59], w[58], selector); - w[61] = hc_byte_perm (w[58], w[57], selector); - w[60] = hc_byte_perm (w[57], w[56], selector); - w[59] = hc_byte_perm (w[56], w[55], selector); - w[58] = hc_byte_perm (w[55], w[54], selector); - w[57] = hc_byte_perm (w[54], w[53], selector); - w[56] = hc_byte_perm (w[53], w[52], selector); - w[55] = hc_byte_perm (w[52], w[51], selector); - w[54] = hc_byte_perm (w[51], w[50], selector); - w[53] = hc_byte_perm (w[50], w[49], selector); - w[52] = hc_byte_perm (w[49], w[48], selector); - w[51] = hc_byte_perm (w[48], w[47], selector); - w[50] = hc_byte_perm (w[47], w[46], selector); - w[49] = hc_byte_perm (w[46], w[45], selector); - w[48] = hc_byte_perm (w[45], w[44], selector); - w[47] = hc_byte_perm (w[44], w[43], selector); - w[46] = hc_byte_perm (w[43], w[42], selector); - w[45] = hc_byte_perm (w[42], w[41], selector); - w[44] = hc_byte_perm (w[41], w[40], selector); - w[43] = hc_byte_perm (w[40], w[39], selector); - w[42] = hc_byte_perm (w[39], w[38], selector); - w[41] = hc_byte_perm (w[38], w[37], selector); - w[40] = hc_byte_perm (w[37], w[36], selector); - w[39] = hc_byte_perm (w[36], w[35], selector); - w[38] = hc_byte_perm (w[35], w[34], selector); - w[37] = hc_byte_perm (w[34], w[33], selector); - w[36] = hc_byte_perm (w[33], w[32], selector); - w[35] = hc_byte_perm (w[32], w[31], selector); - w[34] = hc_byte_perm (w[31], w[30], selector); - w[33] = hc_byte_perm (w[30], w[29], selector); - w[32] = hc_byte_perm (w[29], w[28], selector); - w[31] = hc_byte_perm (w[28], w[27], selector); - w[30] = hc_byte_perm (w[27], w[26], selector); - w[29] = hc_byte_perm (w[26], w[25], selector); - w[28] = hc_byte_perm (w[25], w[24], selector); - w[27] = hc_byte_perm (w[24], w[23], selector); - w[26] = hc_byte_perm (w[23], w[22], selector); - w[25] = hc_byte_perm (w[22], w[21], selector); - w[24] = hc_byte_perm (w[21], w[20], selector); - w[23] = hc_byte_perm (w[20], w[19], selector); - w[22] = hc_byte_perm (w[19], w[18], selector); - w[21] = hc_byte_perm (w[18], w[17], selector); - w[20] = hc_byte_perm (w[17], w[16], selector); - w[19] = hc_byte_perm (w[16], w[15], selector); - w[18] = hc_byte_perm (w[15], w[14], selector); - w[17] = hc_byte_perm (w[14], w[13], selector); - w[16] = hc_byte_perm (w[13], w[12], selector); - w[15] = hc_byte_perm (w[12], w[11], selector); - w[14] = hc_byte_perm (w[11], w[10], selector); - w[13] = hc_byte_perm (w[10], w[ 9], selector); - w[12] = hc_byte_perm (w[ 9], w[ 8], selector); - w[11] = hc_byte_perm (w[ 8], w[ 7], selector); - w[10] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 9] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 8] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 7] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 6] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 5] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 4] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 3] = hc_byte_perm (w[ 0], 0, selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm (w[59], w[58], selector); - w[62] = hc_byte_perm (w[58], w[57], selector); - w[61] = hc_byte_perm (w[57], w[56], selector); - w[60] = hc_byte_perm (w[56], w[55], selector); - w[59] = hc_byte_perm (w[55], w[54], selector); - w[58] = hc_byte_perm (w[54], w[53], selector); - w[57] = hc_byte_perm (w[53], w[52], selector); - w[56] = hc_byte_perm (w[52], w[51], selector); - w[55] = hc_byte_perm (w[51], w[50], selector); - w[54] = hc_byte_perm (w[50], w[49], selector); - w[53] = hc_byte_perm (w[49], w[48], selector); - w[52] = hc_byte_perm (w[48], w[47], selector); - w[51] = hc_byte_perm (w[47], w[46], selector); - w[50] = hc_byte_perm (w[46], w[45], selector); - w[49] = hc_byte_perm (w[45], w[44], selector); - w[48] = hc_byte_perm (w[44], w[43], selector); - w[47] = hc_byte_perm (w[43], w[42], selector); - w[46] = hc_byte_perm (w[42], w[41], selector); - w[45] = hc_byte_perm (w[41], w[40], selector); - w[44] = hc_byte_perm (w[40], w[39], selector); - w[43] = hc_byte_perm (w[39], w[38], selector); - w[42] = hc_byte_perm (w[38], w[37], selector); - w[41] = hc_byte_perm (w[37], w[36], selector); - w[40] = hc_byte_perm (w[36], w[35], selector); - w[39] = hc_byte_perm (w[35], w[34], selector); - w[38] = hc_byte_perm (w[34], w[33], selector); - w[37] = hc_byte_perm (w[33], w[32], selector); - w[36] = hc_byte_perm (w[32], w[31], selector); - w[35] = hc_byte_perm (w[31], w[30], selector); - w[34] = hc_byte_perm (w[30], w[29], selector); - w[33] = hc_byte_perm (w[29], w[28], selector); - w[32] = hc_byte_perm (w[28], w[27], selector); - w[31] = hc_byte_perm (w[27], w[26], selector); - w[30] = hc_byte_perm (w[26], w[25], selector); - w[29] = hc_byte_perm (w[25], w[24], selector); - w[28] = hc_byte_perm (w[24], w[23], selector); - w[27] = hc_byte_perm (w[23], w[22], selector); - w[26] = hc_byte_perm (w[22], w[21], selector); - w[25] = hc_byte_perm (w[21], w[20], selector); - w[24] = hc_byte_perm (w[20], w[19], selector); - w[23] = hc_byte_perm (w[19], w[18], selector); - w[22] = hc_byte_perm (w[18], w[17], selector); - w[21] = hc_byte_perm (w[17], w[16], selector); - w[20] = hc_byte_perm (w[16], w[15], selector); - w[19] = hc_byte_perm (w[15], w[14], selector); - w[18] = hc_byte_perm (w[14], w[13], selector); - w[17] = hc_byte_perm (w[13], w[12], selector); - w[16] = hc_byte_perm (w[12], w[11], selector); - w[15] = hc_byte_perm (w[11], w[10], selector); - w[14] = hc_byte_perm (w[10], w[ 9], selector); - w[13] = hc_byte_perm (w[ 9], w[ 8], selector); - w[12] = hc_byte_perm (w[ 8], w[ 7], selector); - w[11] = hc_byte_perm (w[ 7], w[ 6], selector); - w[10] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 9] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 8] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 7] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 6] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 5] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 4] = hc_byte_perm (w[ 0], 0, selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm (w[58], w[57], selector); - w[62] = hc_byte_perm (w[57], w[56], selector); - w[61] = hc_byte_perm (w[56], w[55], selector); - w[60] = hc_byte_perm (w[55], w[54], selector); - w[59] = hc_byte_perm (w[54], w[53], selector); - w[58] = hc_byte_perm (w[53], w[52], selector); - w[57] = hc_byte_perm (w[52], w[51], selector); - w[56] = hc_byte_perm (w[51], w[50], selector); - w[55] = hc_byte_perm (w[50], w[49], selector); - w[54] = hc_byte_perm (w[49], w[48], selector); - w[53] = hc_byte_perm (w[48], w[47], selector); - w[52] = hc_byte_perm (w[47], w[46], selector); - w[51] = hc_byte_perm (w[46], w[45], selector); - w[50] = hc_byte_perm (w[45], w[44], selector); - w[49] = hc_byte_perm (w[44], w[43], selector); - w[48] = hc_byte_perm (w[43], w[42], selector); - w[47] = hc_byte_perm (w[42], w[41], selector); - w[46] = hc_byte_perm (w[41], w[40], selector); - w[45] = hc_byte_perm (w[40], w[39], selector); - w[44] = hc_byte_perm (w[39], w[38], selector); - w[43] = hc_byte_perm (w[38], w[37], selector); - w[42] = hc_byte_perm (w[37], w[36], selector); - w[41] = hc_byte_perm (w[36], w[35], selector); - w[40] = hc_byte_perm (w[35], w[34], selector); - w[39] = hc_byte_perm (w[34], w[33], selector); - w[38] = hc_byte_perm (w[33], w[32], selector); - w[37] = hc_byte_perm (w[32], w[31], selector); - w[36] = hc_byte_perm (w[31], w[30], selector); - w[35] = hc_byte_perm (w[30], w[29], selector); - w[34] = hc_byte_perm (w[29], w[28], selector); - w[33] = hc_byte_perm (w[28], w[27], selector); - w[32] = hc_byte_perm (w[27], w[26], selector); - w[31] = hc_byte_perm (w[26], w[25], selector); - w[30] = hc_byte_perm (w[25], w[24], selector); - w[29] = hc_byte_perm (w[24], w[23], selector); - w[28] = hc_byte_perm (w[23], w[22], selector); - w[27] = hc_byte_perm (w[22], w[21], selector); - w[26] = hc_byte_perm (w[21], w[20], selector); - w[25] = hc_byte_perm (w[20], w[19], selector); - w[24] = hc_byte_perm (w[19], w[18], selector); - w[23] = hc_byte_perm (w[18], w[17], selector); - w[22] = hc_byte_perm (w[17], w[16], selector); - w[21] = hc_byte_perm (w[16], w[15], selector); - w[20] = hc_byte_perm (w[15], w[14], selector); - w[19] = hc_byte_perm (w[14], w[13], selector); - w[18] = hc_byte_perm (w[13], w[12], selector); - w[17] = hc_byte_perm (w[12], w[11], selector); - w[16] = hc_byte_perm (w[11], w[10], selector); - w[15] = hc_byte_perm (w[10], w[ 9], selector); - w[14] = hc_byte_perm (w[ 9], w[ 8], selector); - w[13] = hc_byte_perm (w[ 8], w[ 7], selector); - w[12] = hc_byte_perm (w[ 7], w[ 6], selector); - w[11] = hc_byte_perm (w[ 6], w[ 5], selector); - w[10] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 9] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 8] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 7] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 6] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 5] = hc_byte_perm (w[ 0], 0, selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm (w[57], w[56], selector); - w[62] = hc_byte_perm (w[56], w[55], selector); - w[61] = hc_byte_perm (w[55], w[54], selector); - w[60] = hc_byte_perm (w[54], w[53], selector); - w[59] = hc_byte_perm (w[53], w[52], selector); - w[58] = hc_byte_perm (w[52], w[51], selector); - w[57] = hc_byte_perm (w[51], w[50], selector); - w[56] = hc_byte_perm (w[50], w[49], selector); - w[55] = hc_byte_perm (w[49], w[48], selector); - w[54] = hc_byte_perm (w[48], w[47], selector); - w[53] = hc_byte_perm (w[47], w[46], selector); - w[52] = hc_byte_perm (w[46], w[45], selector); - w[51] = hc_byte_perm (w[45], w[44], selector); - w[50] = hc_byte_perm (w[44], w[43], selector); - w[49] = hc_byte_perm (w[43], w[42], selector); - w[48] = hc_byte_perm (w[42], w[41], selector); - w[47] = hc_byte_perm (w[41], w[40], selector); - w[46] = hc_byte_perm (w[40], w[39], selector); - w[45] = hc_byte_perm (w[39], w[38], selector); - w[44] = hc_byte_perm (w[38], w[37], selector); - w[43] = hc_byte_perm (w[37], w[36], selector); - w[42] = hc_byte_perm (w[36], w[35], selector); - w[41] = hc_byte_perm (w[35], w[34], selector); - w[40] = hc_byte_perm (w[34], w[33], selector); - w[39] = hc_byte_perm (w[33], w[32], selector); - w[38] = hc_byte_perm (w[32], w[31], selector); - w[37] = hc_byte_perm (w[31], w[30], selector); - w[36] = hc_byte_perm (w[30], w[29], selector); - w[35] = hc_byte_perm (w[29], w[28], selector); - w[34] = hc_byte_perm (w[28], w[27], selector); - w[33] = hc_byte_perm (w[27], w[26], selector); - w[32] = hc_byte_perm (w[26], w[25], selector); - w[31] = hc_byte_perm (w[25], w[24], selector); - w[30] = hc_byte_perm (w[24], w[23], selector); - w[29] = hc_byte_perm (w[23], w[22], selector); - w[28] = hc_byte_perm (w[22], w[21], selector); - w[27] = hc_byte_perm (w[21], w[20], selector); - w[26] = hc_byte_perm (w[20], w[19], selector); - w[25] = hc_byte_perm (w[19], w[18], selector); - w[24] = hc_byte_perm (w[18], w[17], selector); - w[23] = hc_byte_perm (w[17], w[16], selector); - w[22] = hc_byte_perm (w[16], w[15], selector); - w[21] = hc_byte_perm (w[15], w[14], selector); - w[20] = hc_byte_perm (w[14], w[13], selector); - w[19] = hc_byte_perm (w[13], w[12], selector); - w[18] = hc_byte_perm (w[12], w[11], selector); - w[17] = hc_byte_perm (w[11], w[10], selector); - w[16] = hc_byte_perm (w[10], w[ 9], selector); - w[15] = hc_byte_perm (w[ 9], w[ 8], selector); - w[14] = hc_byte_perm (w[ 8], w[ 7], selector); - w[13] = hc_byte_perm (w[ 7], w[ 6], selector); - w[12] = hc_byte_perm (w[ 6], w[ 5], selector); - w[11] = hc_byte_perm (w[ 5], w[ 4], selector); - w[10] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 9] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 8] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 7] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 6] = hc_byte_perm (w[ 0], 0, selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm (w[56], w[55], selector); - w[62] = hc_byte_perm (w[55], w[54], selector); - w[61] = hc_byte_perm (w[54], w[53], selector); - w[60] = hc_byte_perm (w[53], w[52], selector); - w[59] = hc_byte_perm (w[52], w[51], selector); - w[58] = hc_byte_perm (w[51], w[50], selector); - w[57] = hc_byte_perm (w[50], w[49], selector); - w[56] = hc_byte_perm (w[49], w[48], selector); - w[55] = hc_byte_perm (w[48], w[47], selector); - w[54] = hc_byte_perm (w[47], w[46], selector); - w[53] = hc_byte_perm (w[46], w[45], selector); - w[52] = hc_byte_perm (w[45], w[44], selector); - w[51] = hc_byte_perm (w[44], w[43], selector); - w[50] = hc_byte_perm (w[43], w[42], selector); - w[49] = hc_byte_perm (w[42], w[41], selector); - w[48] = hc_byte_perm (w[41], w[40], selector); - w[47] = hc_byte_perm (w[40], w[39], selector); - w[46] = hc_byte_perm (w[39], w[38], selector); - w[45] = hc_byte_perm (w[38], w[37], selector); - w[44] = hc_byte_perm (w[37], w[36], selector); - w[43] = hc_byte_perm (w[36], w[35], selector); - w[42] = hc_byte_perm (w[35], w[34], selector); - w[41] = hc_byte_perm (w[34], w[33], selector); - w[40] = hc_byte_perm (w[33], w[32], selector); - w[39] = hc_byte_perm (w[32], w[31], selector); - w[38] = hc_byte_perm (w[31], w[30], selector); - w[37] = hc_byte_perm (w[30], w[29], selector); - w[36] = hc_byte_perm (w[29], w[28], selector); - w[35] = hc_byte_perm (w[28], w[27], selector); - w[34] = hc_byte_perm (w[27], w[26], selector); - w[33] = hc_byte_perm (w[26], w[25], selector); - w[32] = hc_byte_perm (w[25], w[24], selector); - w[31] = hc_byte_perm (w[24], w[23], selector); - w[30] = hc_byte_perm (w[23], w[22], selector); - w[29] = hc_byte_perm (w[22], w[21], selector); - w[28] = hc_byte_perm (w[21], w[20], selector); - w[27] = hc_byte_perm (w[20], w[19], selector); - w[26] = hc_byte_perm (w[19], w[18], selector); - w[25] = hc_byte_perm (w[18], w[17], selector); - w[24] = hc_byte_perm (w[17], w[16], selector); - w[23] = hc_byte_perm (w[16], w[15], selector); - w[22] = hc_byte_perm (w[15], w[14], selector); - w[21] = hc_byte_perm (w[14], w[13], selector); - w[20] = hc_byte_perm (w[13], w[12], selector); - w[19] = hc_byte_perm (w[12], w[11], selector); - w[18] = hc_byte_perm (w[11], w[10], selector); - w[17] = hc_byte_perm (w[10], w[ 9], selector); - w[16] = hc_byte_perm (w[ 9], w[ 8], selector); - w[15] = hc_byte_perm (w[ 8], w[ 7], selector); - w[14] = hc_byte_perm (w[ 7], w[ 6], selector); - w[13] = hc_byte_perm (w[ 6], w[ 5], selector); - w[12] = hc_byte_perm (w[ 5], w[ 4], selector); - w[11] = hc_byte_perm (w[ 4], w[ 3], selector); - w[10] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 9] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 8] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 7] = hc_byte_perm (w[ 0], 0, selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm (w[55], w[54], selector); - w[62] = hc_byte_perm (w[54], w[53], selector); - w[61] = hc_byte_perm (w[53], w[52], selector); - w[60] = hc_byte_perm (w[52], w[51], selector); - w[59] = hc_byte_perm (w[51], w[50], selector); - w[58] = hc_byte_perm (w[50], w[49], selector); - w[57] = hc_byte_perm (w[49], w[48], selector); - w[56] = hc_byte_perm (w[48], w[47], selector); - w[55] = hc_byte_perm (w[47], w[46], selector); - w[54] = hc_byte_perm (w[46], w[45], selector); - w[53] = hc_byte_perm (w[45], w[44], selector); - w[52] = hc_byte_perm (w[44], w[43], selector); - w[51] = hc_byte_perm (w[43], w[42], selector); - w[50] = hc_byte_perm (w[42], w[41], selector); - w[49] = hc_byte_perm (w[41], w[40], selector); - w[48] = hc_byte_perm (w[40], w[39], selector); - w[47] = hc_byte_perm (w[39], w[38], selector); - w[46] = hc_byte_perm (w[38], w[37], selector); - w[45] = hc_byte_perm (w[37], w[36], selector); - w[44] = hc_byte_perm (w[36], w[35], selector); - w[43] = hc_byte_perm (w[35], w[34], selector); - w[42] = hc_byte_perm (w[34], w[33], selector); - w[41] = hc_byte_perm (w[33], w[32], selector); - w[40] = hc_byte_perm (w[32], w[31], selector); - w[39] = hc_byte_perm (w[31], w[30], selector); - w[38] = hc_byte_perm (w[30], w[29], selector); - w[37] = hc_byte_perm (w[29], w[28], selector); - w[36] = hc_byte_perm (w[28], w[27], selector); - w[35] = hc_byte_perm (w[27], w[26], selector); - w[34] = hc_byte_perm (w[26], w[25], selector); - w[33] = hc_byte_perm (w[25], w[24], selector); - w[32] = hc_byte_perm (w[24], w[23], selector); - w[31] = hc_byte_perm (w[23], w[22], selector); - w[30] = hc_byte_perm (w[22], w[21], selector); - w[29] = hc_byte_perm (w[21], w[20], selector); - w[28] = hc_byte_perm (w[20], w[19], selector); - w[27] = hc_byte_perm (w[19], w[18], selector); - w[26] = hc_byte_perm (w[18], w[17], selector); - w[25] = hc_byte_perm (w[17], w[16], selector); - w[24] = hc_byte_perm (w[16], w[15], selector); - w[23] = hc_byte_perm (w[15], w[14], selector); - w[22] = hc_byte_perm (w[14], w[13], selector); - w[21] = hc_byte_perm (w[13], w[12], selector); - w[20] = hc_byte_perm (w[12], w[11], selector); - w[19] = hc_byte_perm (w[11], w[10], selector); - w[18] = hc_byte_perm (w[10], w[ 9], selector); - w[17] = hc_byte_perm (w[ 9], w[ 8], selector); - w[16] = hc_byte_perm (w[ 8], w[ 7], selector); - w[15] = hc_byte_perm (w[ 7], w[ 6], selector); - w[14] = hc_byte_perm (w[ 6], w[ 5], selector); - w[13] = hc_byte_perm (w[ 5], w[ 4], selector); - w[12] = hc_byte_perm (w[ 4], w[ 3], selector); - w[11] = hc_byte_perm (w[ 3], w[ 2], selector); - w[10] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 9] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 8] = hc_byte_perm (w[ 0], 0, selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm (w[54], w[53], selector); - w[62] = hc_byte_perm (w[53], w[52], selector); - w[61] = hc_byte_perm (w[52], w[51], selector); - w[60] = hc_byte_perm (w[51], w[50], selector); - w[59] = hc_byte_perm (w[50], w[49], selector); - w[58] = hc_byte_perm (w[49], w[48], selector); - w[57] = hc_byte_perm (w[48], w[47], selector); - w[56] = hc_byte_perm (w[47], w[46], selector); - w[55] = hc_byte_perm (w[46], w[45], selector); - w[54] = hc_byte_perm (w[45], w[44], selector); - w[53] = hc_byte_perm (w[44], w[43], selector); - w[52] = hc_byte_perm (w[43], w[42], selector); - w[51] = hc_byte_perm (w[42], w[41], selector); - w[50] = hc_byte_perm (w[41], w[40], selector); - w[49] = hc_byte_perm (w[40], w[39], selector); - w[48] = hc_byte_perm (w[39], w[38], selector); - w[47] = hc_byte_perm (w[38], w[37], selector); - w[46] = hc_byte_perm (w[37], w[36], selector); - w[45] = hc_byte_perm (w[36], w[35], selector); - w[44] = hc_byte_perm (w[35], w[34], selector); - w[43] = hc_byte_perm (w[34], w[33], selector); - w[42] = hc_byte_perm (w[33], w[32], selector); - w[41] = hc_byte_perm (w[32], w[31], selector); - w[40] = hc_byte_perm (w[31], w[30], selector); - w[39] = hc_byte_perm (w[30], w[29], selector); - w[38] = hc_byte_perm (w[29], w[28], selector); - w[37] = hc_byte_perm (w[28], w[27], selector); - w[36] = hc_byte_perm (w[27], w[26], selector); - w[35] = hc_byte_perm (w[26], w[25], selector); - w[34] = hc_byte_perm (w[25], w[24], selector); - w[33] = hc_byte_perm (w[24], w[23], selector); - w[32] = hc_byte_perm (w[23], w[22], selector); - w[31] = hc_byte_perm (w[22], w[21], selector); - w[30] = hc_byte_perm (w[21], w[20], selector); - w[29] = hc_byte_perm (w[20], w[19], selector); - w[28] = hc_byte_perm (w[19], w[18], selector); - w[27] = hc_byte_perm (w[18], w[17], selector); - w[26] = hc_byte_perm (w[17], w[16], selector); - w[25] = hc_byte_perm (w[16], w[15], selector); - w[24] = hc_byte_perm (w[15], w[14], selector); - w[23] = hc_byte_perm (w[14], w[13], selector); - w[22] = hc_byte_perm (w[13], w[12], selector); - w[21] = hc_byte_perm (w[12], w[11], selector); - w[20] = hc_byte_perm (w[11], w[10], selector); - w[19] = hc_byte_perm (w[10], w[ 9], selector); - w[18] = hc_byte_perm (w[ 9], w[ 8], selector); - w[17] = hc_byte_perm (w[ 8], w[ 7], selector); - w[16] = hc_byte_perm (w[ 7], w[ 6], selector); - w[15] = hc_byte_perm (w[ 6], w[ 5], selector); - w[14] = hc_byte_perm (w[ 5], w[ 4], selector); - w[13] = hc_byte_perm (w[ 4], w[ 3], selector); - w[12] = hc_byte_perm (w[ 3], w[ 2], selector); - w[11] = hc_byte_perm (w[ 2], w[ 1], selector); - w[10] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 9] = hc_byte_perm (w[ 0], 0, selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm (w[53], w[52], selector); - w[62] = hc_byte_perm (w[52], w[51], selector); - w[61] = hc_byte_perm (w[51], w[50], selector); - w[60] = hc_byte_perm (w[50], w[49], selector); - w[59] = hc_byte_perm (w[49], w[48], selector); - w[58] = hc_byte_perm (w[48], w[47], selector); - w[57] = hc_byte_perm (w[47], w[46], selector); - w[56] = hc_byte_perm (w[46], w[45], selector); - w[55] = hc_byte_perm (w[45], w[44], selector); - w[54] = hc_byte_perm (w[44], w[43], selector); - w[53] = hc_byte_perm (w[43], w[42], selector); - w[52] = hc_byte_perm (w[42], w[41], selector); - w[51] = hc_byte_perm (w[41], w[40], selector); - w[50] = hc_byte_perm (w[40], w[39], selector); - w[49] = hc_byte_perm (w[39], w[38], selector); - w[48] = hc_byte_perm (w[38], w[37], selector); - w[47] = hc_byte_perm (w[37], w[36], selector); - w[46] = hc_byte_perm (w[36], w[35], selector); - w[45] = hc_byte_perm (w[35], w[34], selector); - w[44] = hc_byte_perm (w[34], w[33], selector); - w[43] = hc_byte_perm (w[33], w[32], selector); - w[42] = hc_byte_perm (w[32], w[31], selector); - w[41] = hc_byte_perm (w[31], w[30], selector); - w[40] = hc_byte_perm (w[30], w[29], selector); - w[39] = hc_byte_perm (w[29], w[28], selector); - w[38] = hc_byte_perm (w[28], w[27], selector); - w[37] = hc_byte_perm (w[27], w[26], selector); - w[36] = hc_byte_perm (w[26], w[25], selector); - w[35] = hc_byte_perm (w[25], w[24], selector); - w[34] = hc_byte_perm (w[24], w[23], selector); - w[33] = hc_byte_perm (w[23], w[22], selector); - w[32] = hc_byte_perm (w[22], w[21], selector); - w[31] = hc_byte_perm (w[21], w[20], selector); - w[30] = hc_byte_perm (w[20], w[19], selector); - w[29] = hc_byte_perm (w[19], w[18], selector); - w[28] = hc_byte_perm (w[18], w[17], selector); - w[27] = hc_byte_perm (w[17], w[16], selector); - w[26] = hc_byte_perm (w[16], w[15], selector); - w[25] = hc_byte_perm (w[15], w[14], selector); - w[24] = hc_byte_perm (w[14], w[13], selector); - w[23] = hc_byte_perm (w[13], w[12], selector); - w[22] = hc_byte_perm (w[12], w[11], selector); - w[21] = hc_byte_perm (w[11], w[10], selector); - w[20] = hc_byte_perm (w[10], w[ 9], selector); - w[19] = hc_byte_perm (w[ 9], w[ 8], selector); - w[18] = hc_byte_perm (w[ 8], w[ 7], selector); - w[17] = hc_byte_perm (w[ 7], w[ 6], selector); - w[16] = hc_byte_perm (w[ 6], w[ 5], selector); - w[15] = hc_byte_perm (w[ 5], w[ 4], selector); - w[14] = hc_byte_perm (w[ 4], w[ 3], selector); - w[13] = hc_byte_perm (w[ 3], w[ 2], selector); - w[12] = hc_byte_perm (w[ 2], w[ 1], selector); - w[11] = hc_byte_perm (w[ 1], w[ 0], selector); - w[10] = hc_byte_perm (w[ 0], 0, selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm (w[52], w[51], selector); - w[62] = hc_byte_perm (w[51], w[50], selector); - w[61] = hc_byte_perm (w[50], w[49], selector); - w[60] = hc_byte_perm (w[49], w[48], selector); - w[59] = hc_byte_perm (w[48], w[47], selector); - w[58] = hc_byte_perm (w[47], w[46], selector); - w[57] = hc_byte_perm (w[46], w[45], selector); - w[56] = hc_byte_perm (w[45], w[44], selector); - w[55] = hc_byte_perm (w[44], w[43], selector); - w[54] = hc_byte_perm (w[43], w[42], selector); - w[53] = hc_byte_perm (w[42], w[41], selector); - w[52] = hc_byte_perm (w[41], w[40], selector); - w[51] = hc_byte_perm (w[40], w[39], selector); - w[50] = hc_byte_perm (w[39], w[38], selector); - w[49] = hc_byte_perm (w[38], w[37], selector); - w[48] = hc_byte_perm (w[37], w[36], selector); - w[47] = hc_byte_perm (w[36], w[35], selector); - w[46] = hc_byte_perm (w[35], w[34], selector); - w[45] = hc_byte_perm (w[34], w[33], selector); - w[44] = hc_byte_perm (w[33], w[32], selector); - w[43] = hc_byte_perm (w[32], w[31], selector); - w[42] = hc_byte_perm (w[31], w[30], selector); - w[41] = hc_byte_perm (w[30], w[29], selector); - w[40] = hc_byte_perm (w[29], w[28], selector); - w[39] = hc_byte_perm (w[28], w[27], selector); - w[38] = hc_byte_perm (w[27], w[26], selector); - w[37] = hc_byte_perm (w[26], w[25], selector); - w[36] = hc_byte_perm (w[25], w[24], selector); - w[35] = hc_byte_perm (w[24], w[23], selector); - w[34] = hc_byte_perm (w[23], w[22], selector); - w[33] = hc_byte_perm (w[22], w[21], selector); - w[32] = hc_byte_perm (w[21], w[20], selector); - w[31] = hc_byte_perm (w[20], w[19], selector); - w[30] = hc_byte_perm (w[19], w[18], selector); - w[29] = hc_byte_perm (w[18], w[17], selector); - w[28] = hc_byte_perm (w[17], w[16], selector); - w[27] = hc_byte_perm (w[16], w[15], selector); - w[26] = hc_byte_perm (w[15], w[14], selector); - w[25] = hc_byte_perm (w[14], w[13], selector); - w[24] = hc_byte_perm (w[13], w[12], selector); - w[23] = hc_byte_perm (w[12], w[11], selector); - w[22] = hc_byte_perm (w[11], w[10], selector); - w[21] = hc_byte_perm (w[10], w[ 9], selector); - w[20] = hc_byte_perm (w[ 9], w[ 8], selector); - w[19] = hc_byte_perm (w[ 8], w[ 7], selector); - w[18] = hc_byte_perm (w[ 7], w[ 6], selector); - w[17] = hc_byte_perm (w[ 6], w[ 5], selector); - w[16] = hc_byte_perm (w[ 5], w[ 4], selector); - w[15] = hc_byte_perm (w[ 4], w[ 3], selector); - w[14] = hc_byte_perm (w[ 3], w[ 2], selector); - w[13] = hc_byte_perm (w[ 2], w[ 1], selector); - w[12] = hc_byte_perm (w[ 1], w[ 0], selector); - w[11] = hc_byte_perm (w[ 0], 0, selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm (w[51], w[50], selector); - w[62] = hc_byte_perm (w[50], w[49], selector); - w[61] = hc_byte_perm (w[49], w[48], selector); - w[60] = hc_byte_perm (w[48], w[47], selector); - w[59] = hc_byte_perm (w[47], w[46], selector); - w[58] = hc_byte_perm (w[46], w[45], selector); - w[57] = hc_byte_perm (w[45], w[44], selector); - w[56] = hc_byte_perm (w[44], w[43], selector); - w[55] = hc_byte_perm (w[43], w[42], selector); - w[54] = hc_byte_perm (w[42], w[41], selector); - w[53] = hc_byte_perm (w[41], w[40], selector); - w[52] = hc_byte_perm (w[40], w[39], selector); - w[51] = hc_byte_perm (w[39], w[38], selector); - w[50] = hc_byte_perm (w[38], w[37], selector); - w[49] = hc_byte_perm (w[37], w[36], selector); - w[48] = hc_byte_perm (w[36], w[35], selector); - w[47] = hc_byte_perm (w[35], w[34], selector); - w[46] = hc_byte_perm (w[34], w[33], selector); - w[45] = hc_byte_perm (w[33], w[32], selector); - w[44] = hc_byte_perm (w[32], w[31], selector); - w[43] = hc_byte_perm (w[31], w[30], selector); - w[42] = hc_byte_perm (w[30], w[29], selector); - w[41] = hc_byte_perm (w[29], w[28], selector); - w[40] = hc_byte_perm (w[28], w[27], selector); - w[39] = hc_byte_perm (w[27], w[26], selector); - w[38] = hc_byte_perm (w[26], w[25], selector); - w[37] = hc_byte_perm (w[25], w[24], selector); - w[36] = hc_byte_perm (w[24], w[23], selector); - w[35] = hc_byte_perm (w[23], w[22], selector); - w[34] = hc_byte_perm (w[22], w[21], selector); - w[33] = hc_byte_perm (w[21], w[20], selector); - w[32] = hc_byte_perm (w[20], w[19], selector); - w[31] = hc_byte_perm (w[19], w[18], selector); - w[30] = hc_byte_perm (w[18], w[17], selector); - w[29] = hc_byte_perm (w[17], w[16], selector); - w[28] = hc_byte_perm (w[16], w[15], selector); - w[27] = hc_byte_perm (w[15], w[14], selector); - w[26] = hc_byte_perm (w[14], w[13], selector); - w[25] = hc_byte_perm (w[13], w[12], selector); - w[24] = hc_byte_perm (w[12], w[11], selector); - w[23] = hc_byte_perm (w[11], w[10], selector); - w[22] = hc_byte_perm (w[10], w[ 9], selector); - w[21] = hc_byte_perm (w[ 9], w[ 8], selector); - w[20] = hc_byte_perm (w[ 8], w[ 7], selector); - w[19] = hc_byte_perm (w[ 7], w[ 6], selector); - w[18] = hc_byte_perm (w[ 6], w[ 5], selector); - w[17] = hc_byte_perm (w[ 5], w[ 4], selector); - w[16] = hc_byte_perm (w[ 4], w[ 3], selector); - w[15] = hc_byte_perm (w[ 3], w[ 2], selector); - w[14] = hc_byte_perm (w[ 2], w[ 1], selector); - w[13] = hc_byte_perm (w[ 1], w[ 0], selector); - w[12] = hc_byte_perm (w[ 0], 0, selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm (w[50], w[49], selector); - w[62] = hc_byte_perm (w[49], w[48], selector); - w[61] = hc_byte_perm (w[48], w[47], selector); - w[60] = hc_byte_perm (w[47], w[46], selector); - w[59] = hc_byte_perm (w[46], w[45], selector); - w[58] = hc_byte_perm (w[45], w[44], selector); - w[57] = hc_byte_perm (w[44], w[43], selector); - w[56] = hc_byte_perm (w[43], w[42], selector); - w[55] = hc_byte_perm (w[42], w[41], selector); - w[54] = hc_byte_perm (w[41], w[40], selector); - w[53] = hc_byte_perm (w[40], w[39], selector); - w[52] = hc_byte_perm (w[39], w[38], selector); - w[51] = hc_byte_perm (w[38], w[37], selector); - w[50] = hc_byte_perm (w[37], w[36], selector); - w[49] = hc_byte_perm (w[36], w[35], selector); - w[48] = hc_byte_perm (w[35], w[34], selector); - w[47] = hc_byte_perm (w[34], w[33], selector); - w[46] = hc_byte_perm (w[33], w[32], selector); - w[45] = hc_byte_perm (w[32], w[31], selector); - w[44] = hc_byte_perm (w[31], w[30], selector); - w[43] = hc_byte_perm (w[30], w[29], selector); - w[42] = hc_byte_perm (w[29], w[28], selector); - w[41] = hc_byte_perm (w[28], w[27], selector); - w[40] = hc_byte_perm (w[27], w[26], selector); - w[39] = hc_byte_perm (w[26], w[25], selector); - w[38] = hc_byte_perm (w[25], w[24], selector); - w[37] = hc_byte_perm (w[24], w[23], selector); - w[36] = hc_byte_perm (w[23], w[22], selector); - w[35] = hc_byte_perm (w[22], w[21], selector); - w[34] = hc_byte_perm (w[21], w[20], selector); - w[33] = hc_byte_perm (w[20], w[19], selector); - w[32] = hc_byte_perm (w[19], w[18], selector); - w[31] = hc_byte_perm (w[18], w[17], selector); - w[30] = hc_byte_perm (w[17], w[16], selector); - w[29] = hc_byte_perm (w[16], w[15], selector); - w[28] = hc_byte_perm (w[15], w[14], selector); - w[27] = hc_byte_perm (w[14], w[13], selector); - w[26] = hc_byte_perm (w[13], w[12], selector); - w[25] = hc_byte_perm (w[12], w[11], selector); - w[24] = hc_byte_perm (w[11], w[10], selector); - w[23] = hc_byte_perm (w[10], w[ 9], selector); - w[22] = hc_byte_perm (w[ 9], w[ 8], selector); - w[21] = hc_byte_perm (w[ 8], w[ 7], selector); - w[20] = hc_byte_perm (w[ 7], w[ 6], selector); - w[19] = hc_byte_perm (w[ 6], w[ 5], selector); - w[18] = hc_byte_perm (w[ 5], w[ 4], selector); - w[17] = hc_byte_perm (w[ 4], w[ 3], selector); - w[16] = hc_byte_perm (w[ 3], w[ 2], selector); - w[15] = hc_byte_perm (w[ 2], w[ 1], selector); - w[14] = hc_byte_perm (w[ 1], w[ 0], selector); - w[13] = hc_byte_perm (w[ 0], 0, selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm (w[49], w[48], selector); - w[62] = hc_byte_perm (w[48], w[47], selector); - w[61] = hc_byte_perm (w[47], w[46], selector); - w[60] = hc_byte_perm (w[46], w[45], selector); - w[59] = hc_byte_perm (w[45], w[44], selector); - w[58] = hc_byte_perm (w[44], w[43], selector); - w[57] = hc_byte_perm (w[43], w[42], selector); - w[56] = hc_byte_perm (w[42], w[41], selector); - w[55] = hc_byte_perm (w[41], w[40], selector); - w[54] = hc_byte_perm (w[40], w[39], selector); - w[53] = hc_byte_perm (w[39], w[38], selector); - w[52] = hc_byte_perm (w[38], w[37], selector); - w[51] = hc_byte_perm (w[37], w[36], selector); - w[50] = hc_byte_perm (w[36], w[35], selector); - w[49] = hc_byte_perm (w[35], w[34], selector); - w[48] = hc_byte_perm (w[34], w[33], selector); - w[47] = hc_byte_perm (w[33], w[32], selector); - w[46] = hc_byte_perm (w[32], w[31], selector); - w[45] = hc_byte_perm (w[31], w[30], selector); - w[44] = hc_byte_perm (w[30], w[29], selector); - w[43] = hc_byte_perm (w[29], w[28], selector); - w[42] = hc_byte_perm (w[28], w[27], selector); - w[41] = hc_byte_perm (w[27], w[26], selector); - w[40] = hc_byte_perm (w[26], w[25], selector); - w[39] = hc_byte_perm (w[25], w[24], selector); - w[38] = hc_byte_perm (w[24], w[23], selector); - w[37] = hc_byte_perm (w[23], w[22], selector); - w[36] = hc_byte_perm (w[22], w[21], selector); - w[35] = hc_byte_perm (w[21], w[20], selector); - w[34] = hc_byte_perm (w[20], w[19], selector); - w[33] = hc_byte_perm (w[19], w[18], selector); - w[32] = hc_byte_perm (w[18], w[17], selector); - w[31] = hc_byte_perm (w[17], w[16], selector); - w[30] = hc_byte_perm (w[16], w[15], selector); - w[29] = hc_byte_perm (w[15], w[14], selector); - w[28] = hc_byte_perm (w[14], w[13], selector); - w[27] = hc_byte_perm (w[13], w[12], selector); - w[26] = hc_byte_perm (w[12], w[11], selector); - w[25] = hc_byte_perm (w[11], w[10], selector); - w[24] = hc_byte_perm (w[10], w[ 9], selector); - w[23] = hc_byte_perm (w[ 9], w[ 8], selector); - w[22] = hc_byte_perm (w[ 8], w[ 7], selector); - w[21] = hc_byte_perm (w[ 7], w[ 6], selector); - w[20] = hc_byte_perm (w[ 6], w[ 5], selector); - w[19] = hc_byte_perm (w[ 5], w[ 4], selector); - w[18] = hc_byte_perm (w[ 4], w[ 3], selector); - w[17] = hc_byte_perm (w[ 3], w[ 2], selector); - w[16] = hc_byte_perm (w[ 2], w[ 1], selector); - w[15] = hc_byte_perm (w[ 1], w[ 0], selector); - w[14] = hc_byte_perm (w[ 0], 0, selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm (w[48], w[47], selector); - w[62] = hc_byte_perm (w[47], w[46], selector); - w[61] = hc_byte_perm (w[46], w[45], selector); - w[60] = hc_byte_perm (w[45], w[44], selector); - w[59] = hc_byte_perm (w[44], w[43], selector); - w[58] = hc_byte_perm (w[43], w[42], selector); - w[57] = hc_byte_perm (w[42], w[41], selector); - w[56] = hc_byte_perm (w[41], w[40], selector); - w[55] = hc_byte_perm (w[40], w[39], selector); - w[54] = hc_byte_perm (w[39], w[38], selector); - w[53] = hc_byte_perm (w[38], w[37], selector); - w[52] = hc_byte_perm (w[37], w[36], selector); - w[51] = hc_byte_perm (w[36], w[35], selector); - w[50] = hc_byte_perm (w[35], w[34], selector); - w[49] = hc_byte_perm (w[34], w[33], selector); - w[48] = hc_byte_perm (w[33], w[32], selector); - w[47] = hc_byte_perm (w[32], w[31], selector); - w[46] = hc_byte_perm (w[31], w[30], selector); - w[45] = hc_byte_perm (w[30], w[29], selector); - w[44] = hc_byte_perm (w[29], w[28], selector); - w[43] = hc_byte_perm (w[28], w[27], selector); - w[42] = hc_byte_perm (w[27], w[26], selector); - w[41] = hc_byte_perm (w[26], w[25], selector); - w[40] = hc_byte_perm (w[25], w[24], selector); - w[39] = hc_byte_perm (w[24], w[23], selector); - w[38] = hc_byte_perm (w[23], w[22], selector); - w[37] = hc_byte_perm (w[22], w[21], selector); - w[36] = hc_byte_perm (w[21], w[20], selector); - w[35] = hc_byte_perm (w[20], w[19], selector); - w[34] = hc_byte_perm (w[19], w[18], selector); - w[33] = hc_byte_perm (w[18], w[17], selector); - w[32] = hc_byte_perm (w[17], w[16], selector); - w[31] = hc_byte_perm (w[16], w[15], selector); - w[30] = hc_byte_perm (w[15], w[14], selector); - w[29] = hc_byte_perm (w[14], w[13], selector); - w[28] = hc_byte_perm (w[13], w[12], selector); - w[27] = hc_byte_perm (w[12], w[11], selector); - w[26] = hc_byte_perm (w[11], w[10], selector); - w[25] = hc_byte_perm (w[10], w[ 9], selector); - w[24] = hc_byte_perm (w[ 9], w[ 8], selector); - w[23] = hc_byte_perm (w[ 8], w[ 7], selector); - w[22] = hc_byte_perm (w[ 7], w[ 6], selector); - w[21] = hc_byte_perm (w[ 6], w[ 5], selector); - w[20] = hc_byte_perm (w[ 5], w[ 4], selector); - w[19] = hc_byte_perm (w[ 4], w[ 3], selector); - w[18] = hc_byte_perm (w[ 3], w[ 2], selector); - w[17] = hc_byte_perm (w[ 2], w[ 1], selector); - w[16] = hc_byte_perm (w[ 1], w[ 0], selector); - w[15] = hc_byte_perm (w[ 0], 0, selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm (w[47], w[46], selector); - w[62] = hc_byte_perm (w[46], w[45], selector); - w[61] = hc_byte_perm (w[45], w[44], selector); - w[60] = hc_byte_perm (w[44], w[43], selector); - w[59] = hc_byte_perm (w[43], w[42], selector); - w[58] = hc_byte_perm (w[42], w[41], selector); - w[57] = hc_byte_perm (w[41], w[40], selector); - w[56] = hc_byte_perm (w[40], w[39], selector); - w[55] = hc_byte_perm (w[39], w[38], selector); - w[54] = hc_byte_perm (w[38], w[37], selector); - w[53] = hc_byte_perm (w[37], w[36], selector); - w[52] = hc_byte_perm (w[36], w[35], selector); - w[51] = hc_byte_perm (w[35], w[34], selector); - w[50] = hc_byte_perm (w[34], w[33], selector); - w[49] = hc_byte_perm (w[33], w[32], selector); - w[48] = hc_byte_perm (w[32], w[31], selector); - w[47] = hc_byte_perm (w[31], w[30], selector); - w[46] = hc_byte_perm (w[30], w[29], selector); - w[45] = hc_byte_perm (w[29], w[28], selector); - w[44] = hc_byte_perm (w[28], w[27], selector); - w[43] = hc_byte_perm (w[27], w[26], selector); - w[42] = hc_byte_perm (w[26], w[25], selector); - w[41] = hc_byte_perm (w[25], w[24], selector); - w[40] = hc_byte_perm (w[24], w[23], selector); - w[39] = hc_byte_perm (w[23], w[22], selector); - w[38] = hc_byte_perm (w[22], w[21], selector); - w[37] = hc_byte_perm (w[21], w[20], selector); - w[36] = hc_byte_perm (w[20], w[19], selector); - w[35] = hc_byte_perm (w[19], w[18], selector); - w[34] = hc_byte_perm (w[18], w[17], selector); - w[33] = hc_byte_perm (w[17], w[16], selector); - w[32] = hc_byte_perm (w[16], w[15], selector); - w[31] = hc_byte_perm (w[15], w[14], selector); - w[30] = hc_byte_perm (w[14], w[13], selector); - w[29] = hc_byte_perm (w[13], w[12], selector); - w[28] = hc_byte_perm (w[12], w[11], selector); - w[27] = hc_byte_perm (w[11], w[10], selector); - w[26] = hc_byte_perm (w[10], w[ 9], selector); - w[25] = hc_byte_perm (w[ 9], w[ 8], selector); - w[24] = hc_byte_perm (w[ 8], w[ 7], selector); - w[23] = hc_byte_perm (w[ 7], w[ 6], selector); - w[22] = hc_byte_perm (w[ 6], w[ 5], selector); - w[21] = hc_byte_perm (w[ 5], w[ 4], selector); - w[20] = hc_byte_perm (w[ 4], w[ 3], selector); - w[19] = hc_byte_perm (w[ 3], w[ 2], selector); - w[18] = hc_byte_perm (w[ 2], w[ 1], selector); - w[17] = hc_byte_perm (w[ 1], w[ 0], selector); - w[16] = hc_byte_perm (w[ 0], 0, selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm (w[46], w[45], selector); - w[62] = hc_byte_perm (w[45], w[44], selector); - w[61] = hc_byte_perm (w[44], w[43], selector); - w[60] = hc_byte_perm (w[43], w[42], selector); - w[59] = hc_byte_perm (w[42], w[41], selector); - w[58] = hc_byte_perm (w[41], w[40], selector); - w[57] = hc_byte_perm (w[40], w[39], selector); - w[56] = hc_byte_perm (w[39], w[38], selector); - w[55] = hc_byte_perm (w[38], w[37], selector); - w[54] = hc_byte_perm (w[37], w[36], selector); - w[53] = hc_byte_perm (w[36], w[35], selector); - w[52] = hc_byte_perm (w[35], w[34], selector); - w[51] = hc_byte_perm (w[34], w[33], selector); - w[50] = hc_byte_perm (w[33], w[32], selector); - w[49] = hc_byte_perm (w[32], w[31], selector); - w[48] = hc_byte_perm (w[31], w[30], selector); - w[47] = hc_byte_perm (w[30], w[29], selector); - w[46] = hc_byte_perm (w[29], w[28], selector); - w[45] = hc_byte_perm (w[28], w[27], selector); - w[44] = hc_byte_perm (w[27], w[26], selector); - w[43] = hc_byte_perm (w[26], w[25], selector); - w[42] = hc_byte_perm (w[25], w[24], selector); - w[41] = hc_byte_perm (w[24], w[23], selector); - w[40] = hc_byte_perm (w[23], w[22], selector); - w[39] = hc_byte_perm (w[22], w[21], selector); - w[38] = hc_byte_perm (w[21], w[20], selector); - w[37] = hc_byte_perm (w[20], w[19], selector); - w[36] = hc_byte_perm (w[19], w[18], selector); - w[35] = hc_byte_perm (w[18], w[17], selector); - w[34] = hc_byte_perm (w[17], w[16], selector); - w[33] = hc_byte_perm (w[16], w[15], selector); - w[32] = hc_byte_perm (w[15], w[14], selector); - w[31] = hc_byte_perm (w[14], w[13], selector); - w[30] = hc_byte_perm (w[13], w[12], selector); - w[29] = hc_byte_perm (w[12], w[11], selector); - w[28] = hc_byte_perm (w[11], w[10], selector); - w[27] = hc_byte_perm (w[10], w[ 9], selector); - w[26] = hc_byte_perm (w[ 9], w[ 8], selector); - w[25] = hc_byte_perm (w[ 8], w[ 7], selector); - w[24] = hc_byte_perm (w[ 7], w[ 6], selector); - w[23] = hc_byte_perm (w[ 6], w[ 5], selector); - w[22] = hc_byte_perm (w[ 5], w[ 4], selector); - w[21] = hc_byte_perm (w[ 4], w[ 3], selector); - w[20] = hc_byte_perm (w[ 3], w[ 2], selector); - w[19] = hc_byte_perm (w[ 2], w[ 1], selector); - w[18] = hc_byte_perm (w[ 1], w[ 0], selector); - w[17] = hc_byte_perm (w[ 0], 0, selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm (w[45], w[44], selector); - w[62] = hc_byte_perm (w[44], w[43], selector); - w[61] = hc_byte_perm (w[43], w[42], selector); - w[60] = hc_byte_perm (w[42], w[41], selector); - w[59] = hc_byte_perm (w[41], w[40], selector); - w[58] = hc_byte_perm (w[40], w[39], selector); - w[57] = hc_byte_perm (w[39], w[38], selector); - w[56] = hc_byte_perm (w[38], w[37], selector); - w[55] = hc_byte_perm (w[37], w[36], selector); - w[54] = hc_byte_perm (w[36], w[35], selector); - w[53] = hc_byte_perm (w[35], w[34], selector); - w[52] = hc_byte_perm (w[34], w[33], selector); - w[51] = hc_byte_perm (w[33], w[32], selector); - w[50] = hc_byte_perm (w[32], w[31], selector); - w[49] = hc_byte_perm (w[31], w[30], selector); - w[48] = hc_byte_perm (w[30], w[29], selector); - w[47] = hc_byte_perm (w[29], w[28], selector); - w[46] = hc_byte_perm (w[28], w[27], selector); - w[45] = hc_byte_perm (w[27], w[26], selector); - w[44] = hc_byte_perm (w[26], w[25], selector); - w[43] = hc_byte_perm (w[25], w[24], selector); - w[42] = hc_byte_perm (w[24], w[23], selector); - w[41] = hc_byte_perm (w[23], w[22], selector); - w[40] = hc_byte_perm (w[22], w[21], selector); - w[39] = hc_byte_perm (w[21], w[20], selector); - w[38] = hc_byte_perm (w[20], w[19], selector); - w[37] = hc_byte_perm (w[19], w[18], selector); - w[36] = hc_byte_perm (w[18], w[17], selector); - w[35] = hc_byte_perm (w[17], w[16], selector); - w[34] = hc_byte_perm (w[16], w[15], selector); - w[33] = hc_byte_perm (w[15], w[14], selector); - w[32] = hc_byte_perm (w[14], w[13], selector); - w[31] = hc_byte_perm (w[13], w[12], selector); - w[30] = hc_byte_perm (w[12], w[11], selector); - w[29] = hc_byte_perm (w[11], w[10], selector); - w[28] = hc_byte_perm (w[10], w[ 9], selector); - w[27] = hc_byte_perm (w[ 9], w[ 8], selector); - w[26] = hc_byte_perm (w[ 8], w[ 7], selector); - w[25] = hc_byte_perm (w[ 7], w[ 6], selector); - w[24] = hc_byte_perm (w[ 6], w[ 5], selector); - w[23] = hc_byte_perm (w[ 5], w[ 4], selector); - w[22] = hc_byte_perm (w[ 4], w[ 3], selector); - w[21] = hc_byte_perm (w[ 3], w[ 2], selector); - w[20] = hc_byte_perm (w[ 2], w[ 1], selector); - w[19] = hc_byte_perm (w[ 1], w[ 0], selector); - w[18] = hc_byte_perm (w[ 0], 0, selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm (w[44], w[43], selector); - w[62] = hc_byte_perm (w[43], w[42], selector); - w[61] = hc_byte_perm (w[42], w[41], selector); - w[60] = hc_byte_perm (w[41], w[40], selector); - w[59] = hc_byte_perm (w[40], w[39], selector); - w[58] = hc_byte_perm (w[39], w[38], selector); - w[57] = hc_byte_perm (w[38], w[37], selector); - w[56] = hc_byte_perm (w[37], w[36], selector); - w[55] = hc_byte_perm (w[36], w[35], selector); - w[54] = hc_byte_perm (w[35], w[34], selector); - w[53] = hc_byte_perm (w[34], w[33], selector); - w[52] = hc_byte_perm (w[33], w[32], selector); - w[51] = hc_byte_perm (w[32], w[31], selector); - w[50] = hc_byte_perm (w[31], w[30], selector); - w[49] = hc_byte_perm (w[30], w[29], selector); - w[48] = hc_byte_perm (w[29], w[28], selector); - w[47] = hc_byte_perm (w[28], w[27], selector); - w[46] = hc_byte_perm (w[27], w[26], selector); - w[45] = hc_byte_perm (w[26], w[25], selector); - w[44] = hc_byte_perm (w[25], w[24], selector); - w[43] = hc_byte_perm (w[24], w[23], selector); - w[42] = hc_byte_perm (w[23], w[22], selector); - w[41] = hc_byte_perm (w[22], w[21], selector); - w[40] = hc_byte_perm (w[21], w[20], selector); - w[39] = hc_byte_perm (w[20], w[19], selector); - w[38] = hc_byte_perm (w[19], w[18], selector); - w[37] = hc_byte_perm (w[18], w[17], selector); - w[36] = hc_byte_perm (w[17], w[16], selector); - w[35] = hc_byte_perm (w[16], w[15], selector); - w[34] = hc_byte_perm (w[15], w[14], selector); - w[33] = hc_byte_perm (w[14], w[13], selector); - w[32] = hc_byte_perm (w[13], w[12], selector); - w[31] = hc_byte_perm (w[12], w[11], selector); - w[30] = hc_byte_perm (w[11], w[10], selector); - w[29] = hc_byte_perm (w[10], w[ 9], selector); - w[28] = hc_byte_perm (w[ 9], w[ 8], selector); - w[27] = hc_byte_perm (w[ 8], w[ 7], selector); - w[26] = hc_byte_perm (w[ 7], w[ 6], selector); - w[25] = hc_byte_perm (w[ 6], w[ 5], selector); - w[24] = hc_byte_perm (w[ 5], w[ 4], selector); - w[23] = hc_byte_perm (w[ 4], w[ 3], selector); - w[22] = hc_byte_perm (w[ 3], w[ 2], selector); - w[21] = hc_byte_perm (w[ 2], w[ 1], selector); - w[20] = hc_byte_perm (w[ 1], w[ 0], selector); - w[19] = hc_byte_perm (w[ 0], 0, selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm (w[43], w[42], selector); - w[62] = hc_byte_perm (w[42], w[41], selector); - w[61] = hc_byte_perm (w[41], w[40], selector); - w[60] = hc_byte_perm (w[40], w[39], selector); - w[59] = hc_byte_perm (w[39], w[38], selector); - w[58] = hc_byte_perm (w[38], w[37], selector); - w[57] = hc_byte_perm (w[37], w[36], selector); - w[56] = hc_byte_perm (w[36], w[35], selector); - w[55] = hc_byte_perm (w[35], w[34], selector); - w[54] = hc_byte_perm (w[34], w[33], selector); - w[53] = hc_byte_perm (w[33], w[32], selector); - w[52] = hc_byte_perm (w[32], w[31], selector); - w[51] = hc_byte_perm (w[31], w[30], selector); - w[50] = hc_byte_perm (w[30], w[29], selector); - w[49] = hc_byte_perm (w[29], w[28], selector); - w[48] = hc_byte_perm (w[28], w[27], selector); - w[47] = hc_byte_perm (w[27], w[26], selector); - w[46] = hc_byte_perm (w[26], w[25], selector); - w[45] = hc_byte_perm (w[25], w[24], selector); - w[44] = hc_byte_perm (w[24], w[23], selector); - w[43] = hc_byte_perm (w[23], w[22], selector); - w[42] = hc_byte_perm (w[22], w[21], selector); - w[41] = hc_byte_perm (w[21], w[20], selector); - w[40] = hc_byte_perm (w[20], w[19], selector); - w[39] = hc_byte_perm (w[19], w[18], selector); - w[38] = hc_byte_perm (w[18], w[17], selector); - w[37] = hc_byte_perm (w[17], w[16], selector); - w[36] = hc_byte_perm (w[16], w[15], selector); - w[35] = hc_byte_perm (w[15], w[14], selector); - w[34] = hc_byte_perm (w[14], w[13], selector); - w[33] = hc_byte_perm (w[13], w[12], selector); - w[32] = hc_byte_perm (w[12], w[11], selector); - w[31] = hc_byte_perm (w[11], w[10], selector); - w[30] = hc_byte_perm (w[10], w[ 9], selector); - w[29] = hc_byte_perm (w[ 9], w[ 8], selector); - w[28] = hc_byte_perm (w[ 8], w[ 7], selector); - w[27] = hc_byte_perm (w[ 7], w[ 6], selector); - w[26] = hc_byte_perm (w[ 6], w[ 5], selector); - w[25] = hc_byte_perm (w[ 5], w[ 4], selector); - w[24] = hc_byte_perm (w[ 4], w[ 3], selector); - w[23] = hc_byte_perm (w[ 3], w[ 2], selector); - w[22] = hc_byte_perm (w[ 2], w[ 1], selector); - w[21] = hc_byte_perm (w[ 1], w[ 0], selector); - w[20] = hc_byte_perm (w[ 0], 0, selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm (w[42], w[41], selector); - w[62] = hc_byte_perm (w[41], w[40], selector); - w[61] = hc_byte_perm (w[40], w[39], selector); - w[60] = hc_byte_perm (w[39], w[38], selector); - w[59] = hc_byte_perm (w[38], w[37], selector); - w[58] = hc_byte_perm (w[37], w[36], selector); - w[57] = hc_byte_perm (w[36], w[35], selector); - w[56] = hc_byte_perm (w[35], w[34], selector); - w[55] = hc_byte_perm (w[34], w[33], selector); - w[54] = hc_byte_perm (w[33], w[32], selector); - w[53] = hc_byte_perm (w[32], w[31], selector); - w[52] = hc_byte_perm (w[31], w[30], selector); - w[51] = hc_byte_perm (w[30], w[29], selector); - w[50] = hc_byte_perm (w[29], w[28], selector); - w[49] = hc_byte_perm (w[28], w[27], selector); - w[48] = hc_byte_perm (w[27], w[26], selector); - w[47] = hc_byte_perm (w[26], w[25], selector); - w[46] = hc_byte_perm (w[25], w[24], selector); - w[45] = hc_byte_perm (w[24], w[23], selector); - w[44] = hc_byte_perm (w[23], w[22], selector); - w[43] = hc_byte_perm (w[22], w[21], selector); - w[42] = hc_byte_perm (w[21], w[20], selector); - w[41] = hc_byte_perm (w[20], w[19], selector); - w[40] = hc_byte_perm (w[19], w[18], selector); - w[39] = hc_byte_perm (w[18], w[17], selector); - w[38] = hc_byte_perm (w[17], w[16], selector); - w[37] = hc_byte_perm (w[16], w[15], selector); - w[36] = hc_byte_perm (w[15], w[14], selector); - w[35] = hc_byte_perm (w[14], w[13], selector); - w[34] = hc_byte_perm (w[13], w[12], selector); - w[33] = hc_byte_perm (w[12], w[11], selector); - w[32] = hc_byte_perm (w[11], w[10], selector); - w[31] = hc_byte_perm (w[10], w[ 9], selector); - w[30] = hc_byte_perm (w[ 9], w[ 8], selector); - w[29] = hc_byte_perm (w[ 8], w[ 7], selector); - w[28] = hc_byte_perm (w[ 7], w[ 6], selector); - w[27] = hc_byte_perm (w[ 6], w[ 5], selector); - w[26] = hc_byte_perm (w[ 5], w[ 4], selector); - w[25] = hc_byte_perm (w[ 4], w[ 3], selector); - w[24] = hc_byte_perm (w[ 3], w[ 2], selector); - w[23] = hc_byte_perm (w[ 2], w[ 1], selector); - w[22] = hc_byte_perm (w[ 1], w[ 0], selector); - w[21] = hc_byte_perm (w[ 0], 0, selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm (w[41], w[40], selector); - w[62] = hc_byte_perm (w[40], w[39], selector); - w[61] = hc_byte_perm (w[39], w[38], selector); - w[60] = hc_byte_perm (w[38], w[37], selector); - w[59] = hc_byte_perm (w[37], w[36], selector); - w[58] = hc_byte_perm (w[36], w[35], selector); - w[57] = hc_byte_perm (w[35], w[34], selector); - w[56] = hc_byte_perm (w[34], w[33], selector); - w[55] = hc_byte_perm (w[33], w[32], selector); - w[54] = hc_byte_perm (w[32], w[31], selector); - w[53] = hc_byte_perm (w[31], w[30], selector); - w[52] = hc_byte_perm (w[30], w[29], selector); - w[51] = hc_byte_perm (w[29], w[28], selector); - w[50] = hc_byte_perm (w[28], w[27], selector); - w[49] = hc_byte_perm (w[27], w[26], selector); - w[48] = hc_byte_perm (w[26], w[25], selector); - w[47] = hc_byte_perm (w[25], w[24], selector); - w[46] = hc_byte_perm (w[24], w[23], selector); - w[45] = hc_byte_perm (w[23], w[22], selector); - w[44] = hc_byte_perm (w[22], w[21], selector); - w[43] = hc_byte_perm (w[21], w[20], selector); - w[42] = hc_byte_perm (w[20], w[19], selector); - w[41] = hc_byte_perm (w[19], w[18], selector); - w[40] = hc_byte_perm (w[18], w[17], selector); - w[39] = hc_byte_perm (w[17], w[16], selector); - w[38] = hc_byte_perm (w[16], w[15], selector); - w[37] = hc_byte_perm (w[15], w[14], selector); - w[36] = hc_byte_perm (w[14], w[13], selector); - w[35] = hc_byte_perm (w[13], w[12], selector); - w[34] = hc_byte_perm (w[12], w[11], selector); - w[33] = hc_byte_perm (w[11], w[10], selector); - w[32] = hc_byte_perm (w[10], w[ 9], selector); - w[31] = hc_byte_perm (w[ 9], w[ 8], selector); - w[30] = hc_byte_perm (w[ 8], w[ 7], selector); - w[29] = hc_byte_perm (w[ 7], w[ 6], selector); - w[28] = hc_byte_perm (w[ 6], w[ 5], selector); - w[27] = hc_byte_perm (w[ 5], w[ 4], selector); - w[26] = hc_byte_perm (w[ 4], w[ 3], selector); - w[25] = hc_byte_perm (w[ 3], w[ 2], selector); - w[24] = hc_byte_perm (w[ 2], w[ 1], selector); - w[23] = hc_byte_perm (w[ 1], w[ 0], selector); - w[22] = hc_byte_perm (w[ 0], 0, selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm (w[40], w[39], selector); - w[62] = hc_byte_perm (w[39], w[38], selector); - w[61] = hc_byte_perm (w[38], w[37], selector); - w[60] = hc_byte_perm (w[37], w[36], selector); - w[59] = hc_byte_perm (w[36], w[35], selector); - w[58] = hc_byte_perm (w[35], w[34], selector); - w[57] = hc_byte_perm (w[34], w[33], selector); - w[56] = hc_byte_perm (w[33], w[32], selector); - w[55] = hc_byte_perm (w[32], w[31], selector); - w[54] = hc_byte_perm (w[31], w[30], selector); - w[53] = hc_byte_perm (w[30], w[29], selector); - w[52] = hc_byte_perm (w[29], w[28], selector); - w[51] = hc_byte_perm (w[28], w[27], selector); - w[50] = hc_byte_perm (w[27], w[26], selector); - w[49] = hc_byte_perm (w[26], w[25], selector); - w[48] = hc_byte_perm (w[25], w[24], selector); - w[47] = hc_byte_perm (w[24], w[23], selector); - w[46] = hc_byte_perm (w[23], w[22], selector); - w[45] = hc_byte_perm (w[22], w[21], selector); - w[44] = hc_byte_perm (w[21], w[20], selector); - w[43] = hc_byte_perm (w[20], w[19], selector); - w[42] = hc_byte_perm (w[19], w[18], selector); - w[41] = hc_byte_perm (w[18], w[17], selector); - w[40] = hc_byte_perm (w[17], w[16], selector); - w[39] = hc_byte_perm (w[16], w[15], selector); - w[38] = hc_byte_perm (w[15], w[14], selector); - w[37] = hc_byte_perm (w[14], w[13], selector); - w[36] = hc_byte_perm (w[13], w[12], selector); - w[35] = hc_byte_perm (w[12], w[11], selector); - w[34] = hc_byte_perm (w[11], w[10], selector); - w[33] = hc_byte_perm (w[10], w[ 9], selector); - w[32] = hc_byte_perm (w[ 9], w[ 8], selector); - w[31] = hc_byte_perm (w[ 8], w[ 7], selector); - w[30] = hc_byte_perm (w[ 7], w[ 6], selector); - w[29] = hc_byte_perm (w[ 6], w[ 5], selector); - w[28] = hc_byte_perm (w[ 5], w[ 4], selector); - w[27] = hc_byte_perm (w[ 4], w[ 3], selector); - w[26] = hc_byte_perm (w[ 3], w[ 2], selector); - w[25] = hc_byte_perm (w[ 2], w[ 1], selector); - w[24] = hc_byte_perm (w[ 1], w[ 0], selector); - w[23] = hc_byte_perm (w[ 0], 0, selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm (w[39], w[38], selector); - w[62] = hc_byte_perm (w[38], w[37], selector); - w[61] = hc_byte_perm (w[37], w[36], selector); - w[60] = hc_byte_perm (w[36], w[35], selector); - w[59] = hc_byte_perm (w[35], w[34], selector); - w[58] = hc_byte_perm (w[34], w[33], selector); - w[57] = hc_byte_perm (w[33], w[32], selector); - w[56] = hc_byte_perm (w[32], w[31], selector); - w[55] = hc_byte_perm (w[31], w[30], selector); - w[54] = hc_byte_perm (w[30], w[29], selector); - w[53] = hc_byte_perm (w[29], w[28], selector); - w[52] = hc_byte_perm (w[28], w[27], selector); - w[51] = hc_byte_perm (w[27], w[26], selector); - w[50] = hc_byte_perm (w[26], w[25], selector); - w[49] = hc_byte_perm (w[25], w[24], selector); - w[48] = hc_byte_perm (w[24], w[23], selector); - w[47] = hc_byte_perm (w[23], w[22], selector); - w[46] = hc_byte_perm (w[22], w[21], selector); - w[45] = hc_byte_perm (w[21], w[20], selector); - w[44] = hc_byte_perm (w[20], w[19], selector); - w[43] = hc_byte_perm (w[19], w[18], selector); - w[42] = hc_byte_perm (w[18], w[17], selector); - w[41] = hc_byte_perm (w[17], w[16], selector); - w[40] = hc_byte_perm (w[16], w[15], selector); - w[39] = hc_byte_perm (w[15], w[14], selector); - w[38] = hc_byte_perm (w[14], w[13], selector); - w[37] = hc_byte_perm (w[13], w[12], selector); - w[36] = hc_byte_perm (w[12], w[11], selector); - w[35] = hc_byte_perm (w[11], w[10], selector); - w[34] = hc_byte_perm (w[10], w[ 9], selector); - w[33] = hc_byte_perm (w[ 9], w[ 8], selector); - w[32] = hc_byte_perm (w[ 8], w[ 7], selector); - w[31] = hc_byte_perm (w[ 7], w[ 6], selector); - w[30] = hc_byte_perm (w[ 6], w[ 5], selector); - w[29] = hc_byte_perm (w[ 5], w[ 4], selector); - w[28] = hc_byte_perm (w[ 4], w[ 3], selector); - w[27] = hc_byte_perm (w[ 3], w[ 2], selector); - w[26] = hc_byte_perm (w[ 2], w[ 1], selector); - w[25] = hc_byte_perm (w[ 1], w[ 0], selector); - w[24] = hc_byte_perm (w[ 0], 0, selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm (w[38], w[37], selector); - w[62] = hc_byte_perm (w[37], w[36], selector); - w[61] = hc_byte_perm (w[36], w[35], selector); - w[60] = hc_byte_perm (w[35], w[34], selector); - w[59] = hc_byte_perm (w[34], w[33], selector); - w[58] = hc_byte_perm (w[33], w[32], selector); - w[57] = hc_byte_perm (w[32], w[31], selector); - w[56] = hc_byte_perm (w[31], w[30], selector); - w[55] = hc_byte_perm (w[30], w[29], selector); - w[54] = hc_byte_perm (w[29], w[28], selector); - w[53] = hc_byte_perm (w[28], w[27], selector); - w[52] = hc_byte_perm (w[27], w[26], selector); - w[51] = hc_byte_perm (w[26], w[25], selector); - w[50] = hc_byte_perm (w[25], w[24], selector); - w[49] = hc_byte_perm (w[24], w[23], selector); - w[48] = hc_byte_perm (w[23], w[22], selector); - w[47] = hc_byte_perm (w[22], w[21], selector); - w[46] = hc_byte_perm (w[21], w[20], selector); - w[45] = hc_byte_perm (w[20], w[19], selector); - w[44] = hc_byte_perm (w[19], w[18], selector); - w[43] = hc_byte_perm (w[18], w[17], selector); - w[42] = hc_byte_perm (w[17], w[16], selector); - w[41] = hc_byte_perm (w[16], w[15], selector); - w[40] = hc_byte_perm (w[15], w[14], selector); - w[39] = hc_byte_perm (w[14], w[13], selector); - w[38] = hc_byte_perm (w[13], w[12], selector); - w[37] = hc_byte_perm (w[12], w[11], selector); - w[36] = hc_byte_perm (w[11], w[10], selector); - w[35] = hc_byte_perm (w[10], w[ 9], selector); - w[34] = hc_byte_perm (w[ 9], w[ 8], selector); - w[33] = hc_byte_perm (w[ 8], w[ 7], selector); - w[32] = hc_byte_perm (w[ 7], w[ 6], selector); - w[31] = hc_byte_perm (w[ 6], w[ 5], selector); - w[30] = hc_byte_perm (w[ 5], w[ 4], selector); - w[29] = hc_byte_perm (w[ 4], w[ 3], selector); - w[28] = hc_byte_perm (w[ 3], w[ 2], selector); - w[27] = hc_byte_perm (w[ 2], w[ 1], selector); - w[26] = hc_byte_perm (w[ 1], w[ 0], selector); - w[25] = hc_byte_perm (w[ 0], 0, selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm (w[37], w[36], selector); - w[62] = hc_byte_perm (w[36], w[35], selector); - w[61] = hc_byte_perm (w[35], w[34], selector); - w[60] = hc_byte_perm (w[34], w[33], selector); - w[59] = hc_byte_perm (w[33], w[32], selector); - w[58] = hc_byte_perm (w[32], w[31], selector); - w[57] = hc_byte_perm (w[31], w[30], selector); - w[56] = hc_byte_perm (w[30], w[29], selector); - w[55] = hc_byte_perm (w[29], w[28], selector); - w[54] = hc_byte_perm (w[28], w[27], selector); - w[53] = hc_byte_perm (w[27], w[26], selector); - w[52] = hc_byte_perm (w[26], w[25], selector); - w[51] = hc_byte_perm (w[25], w[24], selector); - w[50] = hc_byte_perm (w[24], w[23], selector); - w[49] = hc_byte_perm (w[23], w[22], selector); - w[48] = hc_byte_perm (w[22], w[21], selector); - w[47] = hc_byte_perm (w[21], w[20], selector); - w[46] = hc_byte_perm (w[20], w[19], selector); - w[45] = hc_byte_perm (w[19], w[18], selector); - w[44] = hc_byte_perm (w[18], w[17], selector); - w[43] = hc_byte_perm (w[17], w[16], selector); - w[42] = hc_byte_perm (w[16], w[15], selector); - w[41] = hc_byte_perm (w[15], w[14], selector); - w[40] = hc_byte_perm (w[14], w[13], selector); - w[39] = hc_byte_perm (w[13], w[12], selector); - w[38] = hc_byte_perm (w[12], w[11], selector); - w[37] = hc_byte_perm (w[11], w[10], selector); - w[36] = hc_byte_perm (w[10], w[ 9], selector); - w[35] = hc_byte_perm (w[ 9], w[ 8], selector); - w[34] = hc_byte_perm (w[ 8], w[ 7], selector); - w[33] = hc_byte_perm (w[ 7], w[ 6], selector); - w[32] = hc_byte_perm (w[ 6], w[ 5], selector); - w[31] = hc_byte_perm (w[ 5], w[ 4], selector); - w[30] = hc_byte_perm (w[ 4], w[ 3], selector); - w[29] = hc_byte_perm (w[ 3], w[ 2], selector); - w[28] = hc_byte_perm (w[ 2], w[ 1], selector); - w[27] = hc_byte_perm (w[ 1], w[ 0], selector); - w[26] = hc_byte_perm (w[ 0], 0, selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm (w[36], w[35], selector); - w[62] = hc_byte_perm (w[35], w[34], selector); - w[61] = hc_byte_perm (w[34], w[33], selector); - w[60] = hc_byte_perm (w[33], w[32], selector); - w[59] = hc_byte_perm (w[32], w[31], selector); - w[58] = hc_byte_perm (w[31], w[30], selector); - w[57] = hc_byte_perm (w[30], w[29], selector); - w[56] = hc_byte_perm (w[29], w[28], selector); - w[55] = hc_byte_perm (w[28], w[27], selector); - w[54] = hc_byte_perm (w[27], w[26], selector); - w[53] = hc_byte_perm (w[26], w[25], selector); - w[52] = hc_byte_perm (w[25], w[24], selector); - w[51] = hc_byte_perm (w[24], w[23], selector); - w[50] = hc_byte_perm (w[23], w[22], selector); - w[49] = hc_byte_perm (w[22], w[21], selector); - w[48] = hc_byte_perm (w[21], w[20], selector); - w[47] = hc_byte_perm (w[20], w[19], selector); - w[46] = hc_byte_perm (w[19], w[18], selector); - w[45] = hc_byte_perm (w[18], w[17], selector); - w[44] = hc_byte_perm (w[17], w[16], selector); - w[43] = hc_byte_perm (w[16], w[15], selector); - w[42] = hc_byte_perm (w[15], w[14], selector); - w[41] = hc_byte_perm (w[14], w[13], selector); - w[40] = hc_byte_perm (w[13], w[12], selector); - w[39] = hc_byte_perm (w[12], w[11], selector); - w[38] = hc_byte_perm (w[11], w[10], selector); - w[37] = hc_byte_perm (w[10], w[ 9], selector); - w[36] = hc_byte_perm (w[ 9], w[ 8], selector); - w[35] = hc_byte_perm (w[ 8], w[ 7], selector); - w[34] = hc_byte_perm (w[ 7], w[ 6], selector); - w[33] = hc_byte_perm (w[ 6], w[ 5], selector); - w[32] = hc_byte_perm (w[ 5], w[ 4], selector); - w[31] = hc_byte_perm (w[ 4], w[ 3], selector); - w[30] = hc_byte_perm (w[ 3], w[ 2], selector); - w[29] = hc_byte_perm (w[ 2], w[ 1], selector); - w[28] = hc_byte_perm (w[ 1], w[ 0], selector); - w[27] = hc_byte_perm (w[ 0], 0, selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm (w[35], w[34], selector); - w[62] = hc_byte_perm (w[34], w[33], selector); - w[61] = hc_byte_perm (w[33], w[32], selector); - w[60] = hc_byte_perm (w[32], w[31], selector); - w[59] = hc_byte_perm (w[31], w[30], selector); - w[58] = hc_byte_perm (w[30], w[29], selector); - w[57] = hc_byte_perm (w[29], w[28], selector); - w[56] = hc_byte_perm (w[28], w[27], selector); - w[55] = hc_byte_perm (w[27], w[26], selector); - w[54] = hc_byte_perm (w[26], w[25], selector); - w[53] = hc_byte_perm (w[25], w[24], selector); - w[52] = hc_byte_perm (w[24], w[23], selector); - w[51] = hc_byte_perm (w[23], w[22], selector); - w[50] = hc_byte_perm (w[22], w[21], selector); - w[49] = hc_byte_perm (w[21], w[20], selector); - w[48] = hc_byte_perm (w[20], w[19], selector); - w[47] = hc_byte_perm (w[19], w[18], selector); - w[46] = hc_byte_perm (w[18], w[17], selector); - w[45] = hc_byte_perm (w[17], w[16], selector); - w[44] = hc_byte_perm (w[16], w[15], selector); - w[43] = hc_byte_perm (w[15], w[14], selector); - w[42] = hc_byte_perm (w[14], w[13], selector); - w[41] = hc_byte_perm (w[13], w[12], selector); - w[40] = hc_byte_perm (w[12], w[11], selector); - w[39] = hc_byte_perm (w[11], w[10], selector); - w[38] = hc_byte_perm (w[10], w[ 9], selector); - w[37] = hc_byte_perm (w[ 9], w[ 8], selector); - w[36] = hc_byte_perm (w[ 8], w[ 7], selector); - w[35] = hc_byte_perm (w[ 7], w[ 6], selector); - w[34] = hc_byte_perm (w[ 6], w[ 5], selector); - w[33] = hc_byte_perm (w[ 5], w[ 4], selector); - w[32] = hc_byte_perm (w[ 4], w[ 3], selector); - w[31] = hc_byte_perm (w[ 3], w[ 2], selector); - w[30] = hc_byte_perm (w[ 2], w[ 1], selector); - w[29] = hc_byte_perm (w[ 1], w[ 0], selector); - w[28] = hc_byte_perm (w[ 0], 0, selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm (w[34], w[33], selector); - w[62] = hc_byte_perm (w[33], w[32], selector); - w[61] = hc_byte_perm (w[32], w[31], selector); - w[60] = hc_byte_perm (w[31], w[30], selector); - w[59] = hc_byte_perm (w[30], w[29], selector); - w[58] = hc_byte_perm (w[29], w[28], selector); - w[57] = hc_byte_perm (w[28], w[27], selector); - w[56] = hc_byte_perm (w[27], w[26], selector); - w[55] = hc_byte_perm (w[26], w[25], selector); - w[54] = hc_byte_perm (w[25], w[24], selector); - w[53] = hc_byte_perm (w[24], w[23], selector); - w[52] = hc_byte_perm (w[23], w[22], selector); - w[51] = hc_byte_perm (w[22], w[21], selector); - w[50] = hc_byte_perm (w[21], w[20], selector); - w[49] = hc_byte_perm (w[20], w[19], selector); - w[48] = hc_byte_perm (w[19], w[18], selector); - w[47] = hc_byte_perm (w[18], w[17], selector); - w[46] = hc_byte_perm (w[17], w[16], selector); - w[45] = hc_byte_perm (w[16], w[15], selector); - w[44] = hc_byte_perm (w[15], w[14], selector); - w[43] = hc_byte_perm (w[14], w[13], selector); - w[42] = hc_byte_perm (w[13], w[12], selector); - w[41] = hc_byte_perm (w[12], w[11], selector); - w[40] = hc_byte_perm (w[11], w[10], selector); - w[39] = hc_byte_perm (w[10], w[ 9], selector); - w[38] = hc_byte_perm (w[ 9], w[ 8], selector); - w[37] = hc_byte_perm (w[ 8], w[ 7], selector); - w[36] = hc_byte_perm (w[ 7], w[ 6], selector); - w[35] = hc_byte_perm (w[ 6], w[ 5], selector); - w[34] = hc_byte_perm (w[ 5], w[ 4], selector); - w[33] = hc_byte_perm (w[ 4], w[ 3], selector); - w[32] = hc_byte_perm (w[ 3], w[ 2], selector); - w[31] = hc_byte_perm (w[ 2], w[ 1], selector); - w[30] = hc_byte_perm (w[ 1], w[ 0], selector); - w[29] = hc_byte_perm (w[ 0], 0, selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm (w[33], w[32], selector); - w[62] = hc_byte_perm (w[32], w[31], selector); - w[61] = hc_byte_perm (w[31], w[30], selector); - w[60] = hc_byte_perm (w[30], w[29], selector); - w[59] = hc_byte_perm (w[29], w[28], selector); - w[58] = hc_byte_perm (w[28], w[27], selector); - w[57] = hc_byte_perm (w[27], w[26], selector); - w[56] = hc_byte_perm (w[26], w[25], selector); - w[55] = hc_byte_perm (w[25], w[24], selector); - w[54] = hc_byte_perm (w[24], w[23], selector); - w[53] = hc_byte_perm (w[23], w[22], selector); - w[52] = hc_byte_perm (w[22], w[21], selector); - w[51] = hc_byte_perm (w[21], w[20], selector); - w[50] = hc_byte_perm (w[20], w[19], selector); - w[49] = hc_byte_perm (w[19], w[18], selector); - w[48] = hc_byte_perm (w[18], w[17], selector); - w[47] = hc_byte_perm (w[17], w[16], selector); - w[46] = hc_byte_perm (w[16], w[15], selector); - w[45] = hc_byte_perm (w[15], w[14], selector); - w[44] = hc_byte_perm (w[14], w[13], selector); - w[43] = hc_byte_perm (w[13], w[12], selector); - w[42] = hc_byte_perm (w[12], w[11], selector); - w[41] = hc_byte_perm (w[11], w[10], selector); - w[40] = hc_byte_perm (w[10], w[ 9], selector); - w[39] = hc_byte_perm (w[ 9], w[ 8], selector); - w[38] = hc_byte_perm (w[ 8], w[ 7], selector); - w[37] = hc_byte_perm (w[ 7], w[ 6], selector); - w[36] = hc_byte_perm (w[ 6], w[ 5], selector); - w[35] = hc_byte_perm (w[ 5], w[ 4], selector); - w[34] = hc_byte_perm (w[ 4], w[ 3], selector); - w[33] = hc_byte_perm (w[ 3], w[ 2], selector); - w[32] = hc_byte_perm (w[ 2], w[ 1], selector); - w[31] = hc_byte_perm (w[ 1], w[ 0], selector); - w[30] = hc_byte_perm (w[ 0], 0, selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm (w[32], w[31], selector); - w[62] = hc_byte_perm (w[31], w[30], selector); - w[61] = hc_byte_perm (w[30], w[29], selector); - w[60] = hc_byte_perm (w[29], w[28], selector); - w[59] = hc_byte_perm (w[28], w[27], selector); - w[58] = hc_byte_perm (w[27], w[26], selector); - w[57] = hc_byte_perm (w[26], w[25], selector); - w[56] = hc_byte_perm (w[25], w[24], selector); - w[55] = hc_byte_perm (w[24], w[23], selector); - w[54] = hc_byte_perm (w[23], w[22], selector); - w[53] = hc_byte_perm (w[22], w[21], selector); - w[52] = hc_byte_perm (w[21], w[20], selector); - w[51] = hc_byte_perm (w[20], w[19], selector); - w[50] = hc_byte_perm (w[19], w[18], selector); - w[49] = hc_byte_perm (w[18], w[17], selector); - w[48] = hc_byte_perm (w[17], w[16], selector); - w[47] = hc_byte_perm (w[16], w[15], selector); - w[46] = hc_byte_perm (w[15], w[14], selector); - w[45] = hc_byte_perm (w[14], w[13], selector); - w[44] = hc_byte_perm (w[13], w[12], selector); - w[43] = hc_byte_perm (w[12], w[11], selector); - w[42] = hc_byte_perm (w[11], w[10], selector); - w[41] = hc_byte_perm (w[10], w[ 9], selector); - w[40] = hc_byte_perm (w[ 9], w[ 8], selector); - w[39] = hc_byte_perm (w[ 8], w[ 7], selector); - w[38] = hc_byte_perm (w[ 7], w[ 6], selector); - w[37] = hc_byte_perm (w[ 6], w[ 5], selector); - w[36] = hc_byte_perm (w[ 5], w[ 4], selector); - w[35] = hc_byte_perm (w[ 4], w[ 3], selector); - w[34] = hc_byte_perm (w[ 3], w[ 2], selector); - w[33] = hc_byte_perm (w[ 2], w[ 1], selector); - w[32] = hc_byte_perm (w[ 1], w[ 0], selector); - w[31] = hc_byte_perm (w[ 0], 0, selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm (w[31], w[30], selector); - w[62] = hc_byte_perm (w[30], w[29], selector); - w[61] = hc_byte_perm (w[29], w[28], selector); - w[60] = hc_byte_perm (w[28], w[27], selector); - w[59] = hc_byte_perm (w[27], w[26], selector); - w[58] = hc_byte_perm (w[26], w[25], selector); - w[57] = hc_byte_perm (w[25], w[24], selector); - w[56] = hc_byte_perm (w[24], w[23], selector); - w[55] = hc_byte_perm (w[23], w[22], selector); - w[54] = hc_byte_perm (w[22], w[21], selector); - w[53] = hc_byte_perm (w[21], w[20], selector); - w[52] = hc_byte_perm (w[20], w[19], selector); - w[51] = hc_byte_perm (w[19], w[18], selector); - w[50] = hc_byte_perm (w[18], w[17], selector); - w[49] = hc_byte_perm (w[17], w[16], selector); - w[48] = hc_byte_perm (w[16], w[15], selector); - w[47] = hc_byte_perm (w[15], w[14], selector); - w[46] = hc_byte_perm (w[14], w[13], selector); - w[45] = hc_byte_perm (w[13], w[12], selector); - w[44] = hc_byte_perm (w[12], w[11], selector); - w[43] = hc_byte_perm (w[11], w[10], selector); - w[42] = hc_byte_perm (w[10], w[ 9], selector); - w[41] = hc_byte_perm (w[ 9], w[ 8], selector); - w[40] = hc_byte_perm (w[ 8], w[ 7], selector); - w[39] = hc_byte_perm (w[ 7], w[ 6], selector); - w[38] = hc_byte_perm (w[ 6], w[ 5], selector); - w[37] = hc_byte_perm (w[ 5], w[ 4], selector); - w[36] = hc_byte_perm (w[ 4], w[ 3], selector); - w[35] = hc_byte_perm (w[ 3], w[ 2], selector); - w[34] = hc_byte_perm (w[ 2], w[ 1], selector); - w[33] = hc_byte_perm (w[ 1], w[ 0], selector); - w[32] = hc_byte_perm (w[ 0], 0, selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm (w[30], w[29], selector); - w[62] = hc_byte_perm (w[29], w[28], selector); - w[61] = hc_byte_perm (w[28], w[27], selector); - w[60] = hc_byte_perm (w[27], w[26], selector); - w[59] = hc_byte_perm (w[26], w[25], selector); - w[58] = hc_byte_perm (w[25], w[24], selector); - w[57] = hc_byte_perm (w[24], w[23], selector); - w[56] = hc_byte_perm (w[23], w[22], selector); - w[55] = hc_byte_perm (w[22], w[21], selector); - w[54] = hc_byte_perm (w[21], w[20], selector); - w[53] = hc_byte_perm (w[20], w[19], selector); - w[52] = hc_byte_perm (w[19], w[18], selector); - w[51] = hc_byte_perm (w[18], w[17], selector); - w[50] = hc_byte_perm (w[17], w[16], selector); - w[49] = hc_byte_perm (w[16], w[15], selector); - w[48] = hc_byte_perm (w[15], w[14], selector); - w[47] = hc_byte_perm (w[14], w[13], selector); - w[46] = hc_byte_perm (w[13], w[12], selector); - w[45] = hc_byte_perm (w[12], w[11], selector); - w[44] = hc_byte_perm (w[11], w[10], selector); - w[43] = hc_byte_perm (w[10], w[ 9], selector); - w[42] = hc_byte_perm (w[ 9], w[ 8], selector); - w[41] = hc_byte_perm (w[ 8], w[ 7], selector); - w[40] = hc_byte_perm (w[ 7], w[ 6], selector); - w[39] = hc_byte_perm (w[ 6], w[ 5], selector); - w[38] = hc_byte_perm (w[ 5], w[ 4], selector); - w[37] = hc_byte_perm (w[ 4], w[ 3], selector); - w[36] = hc_byte_perm (w[ 3], w[ 2], selector); - w[35] = hc_byte_perm (w[ 2], w[ 1], selector); - w[34] = hc_byte_perm (w[ 1], w[ 0], selector); - w[33] = hc_byte_perm (w[ 0], 0, selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm (w[29], w[28], selector); - w[62] = hc_byte_perm (w[28], w[27], selector); - w[61] = hc_byte_perm (w[27], w[26], selector); - w[60] = hc_byte_perm (w[26], w[25], selector); - w[59] = hc_byte_perm (w[25], w[24], selector); - w[58] = hc_byte_perm (w[24], w[23], selector); - w[57] = hc_byte_perm (w[23], w[22], selector); - w[56] = hc_byte_perm (w[22], w[21], selector); - w[55] = hc_byte_perm (w[21], w[20], selector); - w[54] = hc_byte_perm (w[20], w[19], selector); - w[53] = hc_byte_perm (w[19], w[18], selector); - w[52] = hc_byte_perm (w[18], w[17], selector); - w[51] = hc_byte_perm (w[17], w[16], selector); - w[50] = hc_byte_perm (w[16], w[15], selector); - w[49] = hc_byte_perm (w[15], w[14], selector); - w[48] = hc_byte_perm (w[14], w[13], selector); - w[47] = hc_byte_perm (w[13], w[12], selector); - w[46] = hc_byte_perm (w[12], w[11], selector); - w[45] = hc_byte_perm (w[11], w[10], selector); - w[44] = hc_byte_perm (w[10], w[ 9], selector); - w[43] = hc_byte_perm (w[ 9], w[ 8], selector); - w[42] = hc_byte_perm (w[ 8], w[ 7], selector); - w[41] = hc_byte_perm (w[ 7], w[ 6], selector); - w[40] = hc_byte_perm (w[ 6], w[ 5], selector); - w[39] = hc_byte_perm (w[ 5], w[ 4], selector); - w[38] = hc_byte_perm (w[ 4], w[ 3], selector); - w[37] = hc_byte_perm (w[ 3], w[ 2], selector); - w[36] = hc_byte_perm (w[ 2], w[ 1], selector); - w[35] = hc_byte_perm (w[ 1], w[ 0], selector); - w[34] = hc_byte_perm (w[ 0], 0, selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm (w[28], w[27], selector); - w[62] = hc_byte_perm (w[27], w[26], selector); - w[61] = hc_byte_perm (w[26], w[25], selector); - w[60] = hc_byte_perm (w[25], w[24], selector); - w[59] = hc_byte_perm (w[24], w[23], selector); - w[58] = hc_byte_perm (w[23], w[22], selector); - w[57] = hc_byte_perm (w[22], w[21], selector); - w[56] = hc_byte_perm (w[21], w[20], selector); - w[55] = hc_byte_perm (w[20], w[19], selector); - w[54] = hc_byte_perm (w[19], w[18], selector); - w[53] = hc_byte_perm (w[18], w[17], selector); - w[52] = hc_byte_perm (w[17], w[16], selector); - w[51] = hc_byte_perm (w[16], w[15], selector); - w[50] = hc_byte_perm (w[15], w[14], selector); - w[49] = hc_byte_perm (w[14], w[13], selector); - w[48] = hc_byte_perm (w[13], w[12], selector); - w[47] = hc_byte_perm (w[12], w[11], selector); - w[46] = hc_byte_perm (w[11], w[10], selector); - w[45] = hc_byte_perm (w[10], w[ 9], selector); - w[44] = hc_byte_perm (w[ 9], w[ 8], selector); - w[43] = hc_byte_perm (w[ 8], w[ 7], selector); - w[42] = hc_byte_perm (w[ 7], w[ 6], selector); - w[41] = hc_byte_perm (w[ 6], w[ 5], selector); - w[40] = hc_byte_perm (w[ 5], w[ 4], selector); - w[39] = hc_byte_perm (w[ 4], w[ 3], selector); - w[38] = hc_byte_perm (w[ 3], w[ 2], selector); - w[37] = hc_byte_perm (w[ 2], w[ 1], selector); - w[36] = hc_byte_perm (w[ 1], w[ 0], selector); - w[35] = hc_byte_perm (w[ 0], 0, selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm (w[27], w[26], selector); - w[62] = hc_byte_perm (w[26], w[25], selector); - w[61] = hc_byte_perm (w[25], w[24], selector); - w[60] = hc_byte_perm (w[24], w[23], selector); - w[59] = hc_byte_perm (w[23], w[22], selector); - w[58] = hc_byte_perm (w[22], w[21], selector); - w[57] = hc_byte_perm (w[21], w[20], selector); - w[56] = hc_byte_perm (w[20], w[19], selector); - w[55] = hc_byte_perm (w[19], w[18], selector); - w[54] = hc_byte_perm (w[18], w[17], selector); - w[53] = hc_byte_perm (w[17], w[16], selector); - w[52] = hc_byte_perm (w[16], w[15], selector); - w[51] = hc_byte_perm (w[15], w[14], selector); - w[50] = hc_byte_perm (w[14], w[13], selector); - w[49] = hc_byte_perm (w[13], w[12], selector); - w[48] = hc_byte_perm (w[12], w[11], selector); - w[47] = hc_byte_perm (w[11], w[10], selector); - w[46] = hc_byte_perm (w[10], w[ 9], selector); - w[45] = hc_byte_perm (w[ 9], w[ 8], selector); - w[44] = hc_byte_perm (w[ 8], w[ 7], selector); - w[43] = hc_byte_perm (w[ 7], w[ 6], selector); - w[42] = hc_byte_perm (w[ 6], w[ 5], selector); - w[41] = hc_byte_perm (w[ 5], w[ 4], selector); - w[40] = hc_byte_perm (w[ 4], w[ 3], selector); - w[39] = hc_byte_perm (w[ 3], w[ 2], selector); - w[38] = hc_byte_perm (w[ 2], w[ 1], selector); - w[37] = hc_byte_perm (w[ 1], w[ 0], selector); - w[36] = hc_byte_perm (w[ 0], 0, selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm (w[26], w[25], selector); - w[62] = hc_byte_perm (w[25], w[24], selector); - w[61] = hc_byte_perm (w[24], w[23], selector); - w[60] = hc_byte_perm (w[23], w[22], selector); - w[59] = hc_byte_perm (w[22], w[21], selector); - w[58] = hc_byte_perm (w[21], w[20], selector); - w[57] = hc_byte_perm (w[20], w[19], selector); - w[56] = hc_byte_perm (w[19], w[18], selector); - w[55] = hc_byte_perm (w[18], w[17], selector); - w[54] = hc_byte_perm (w[17], w[16], selector); - w[53] = hc_byte_perm (w[16], w[15], selector); - w[52] = hc_byte_perm (w[15], w[14], selector); - w[51] = hc_byte_perm (w[14], w[13], selector); - w[50] = hc_byte_perm (w[13], w[12], selector); - w[49] = hc_byte_perm (w[12], w[11], selector); - w[48] = hc_byte_perm (w[11], w[10], selector); - w[47] = hc_byte_perm (w[10], w[ 9], selector); - w[46] = hc_byte_perm (w[ 9], w[ 8], selector); - w[45] = hc_byte_perm (w[ 8], w[ 7], selector); - w[44] = hc_byte_perm (w[ 7], w[ 6], selector); - w[43] = hc_byte_perm (w[ 6], w[ 5], selector); - w[42] = hc_byte_perm (w[ 5], w[ 4], selector); - w[41] = hc_byte_perm (w[ 4], w[ 3], selector); - w[40] = hc_byte_perm (w[ 3], w[ 2], selector); - w[39] = hc_byte_perm (w[ 2], w[ 1], selector); - w[38] = hc_byte_perm (w[ 1], w[ 0], selector); - w[37] = hc_byte_perm (w[ 0], 0, selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm (w[25], w[24], selector); - w[62] = hc_byte_perm (w[24], w[23], selector); - w[61] = hc_byte_perm (w[23], w[22], selector); - w[60] = hc_byte_perm (w[22], w[21], selector); - w[59] = hc_byte_perm (w[21], w[20], selector); - w[58] = hc_byte_perm (w[20], w[19], selector); - w[57] = hc_byte_perm (w[19], w[18], selector); - w[56] = hc_byte_perm (w[18], w[17], selector); - w[55] = hc_byte_perm (w[17], w[16], selector); - w[54] = hc_byte_perm (w[16], w[15], selector); - w[53] = hc_byte_perm (w[15], w[14], selector); - w[52] = hc_byte_perm (w[14], w[13], selector); - w[51] = hc_byte_perm (w[13], w[12], selector); - w[50] = hc_byte_perm (w[12], w[11], selector); - w[49] = hc_byte_perm (w[11], w[10], selector); - w[48] = hc_byte_perm (w[10], w[ 9], selector); - w[47] = hc_byte_perm (w[ 9], w[ 8], selector); - w[46] = hc_byte_perm (w[ 8], w[ 7], selector); - w[45] = hc_byte_perm (w[ 7], w[ 6], selector); - w[44] = hc_byte_perm (w[ 6], w[ 5], selector); - w[43] = hc_byte_perm (w[ 5], w[ 4], selector); - w[42] = hc_byte_perm (w[ 4], w[ 3], selector); - w[41] = hc_byte_perm (w[ 3], w[ 2], selector); - w[40] = hc_byte_perm (w[ 2], w[ 1], selector); - w[39] = hc_byte_perm (w[ 1], w[ 0], selector); - w[38] = hc_byte_perm (w[ 0], 0, selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm (w[24], w[23], selector); - w[62] = hc_byte_perm (w[23], w[22], selector); - w[61] = hc_byte_perm (w[22], w[21], selector); - w[60] = hc_byte_perm (w[21], w[20], selector); - w[59] = hc_byte_perm (w[20], w[19], selector); - w[58] = hc_byte_perm (w[19], w[18], selector); - w[57] = hc_byte_perm (w[18], w[17], selector); - w[56] = hc_byte_perm (w[17], w[16], selector); - w[55] = hc_byte_perm (w[16], w[15], selector); - w[54] = hc_byte_perm (w[15], w[14], selector); - w[53] = hc_byte_perm (w[14], w[13], selector); - w[52] = hc_byte_perm (w[13], w[12], selector); - w[51] = hc_byte_perm (w[12], w[11], selector); - w[50] = hc_byte_perm (w[11], w[10], selector); - w[49] = hc_byte_perm (w[10], w[ 9], selector); - w[48] = hc_byte_perm (w[ 9], w[ 8], selector); - w[47] = hc_byte_perm (w[ 8], w[ 7], selector); - w[46] = hc_byte_perm (w[ 7], w[ 6], selector); - w[45] = hc_byte_perm (w[ 6], w[ 5], selector); - w[44] = hc_byte_perm (w[ 5], w[ 4], selector); - w[43] = hc_byte_perm (w[ 4], w[ 3], selector); - w[42] = hc_byte_perm (w[ 3], w[ 2], selector); - w[41] = hc_byte_perm (w[ 2], w[ 1], selector); - w[40] = hc_byte_perm (w[ 1], w[ 0], selector); - w[39] = hc_byte_perm (w[ 0], 0, selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm (w[23], w[22], selector); - w[62] = hc_byte_perm (w[22], w[21], selector); - w[61] = hc_byte_perm (w[21], w[20], selector); - w[60] = hc_byte_perm (w[20], w[19], selector); - w[59] = hc_byte_perm (w[19], w[18], selector); - w[58] = hc_byte_perm (w[18], w[17], selector); - w[57] = hc_byte_perm (w[17], w[16], selector); - w[56] = hc_byte_perm (w[16], w[15], selector); - w[55] = hc_byte_perm (w[15], w[14], selector); - w[54] = hc_byte_perm (w[14], w[13], selector); - w[53] = hc_byte_perm (w[13], w[12], selector); - w[52] = hc_byte_perm (w[12], w[11], selector); - w[51] = hc_byte_perm (w[11], w[10], selector); - w[50] = hc_byte_perm (w[10], w[ 9], selector); - w[49] = hc_byte_perm (w[ 9], w[ 8], selector); - w[48] = hc_byte_perm (w[ 8], w[ 7], selector); - w[47] = hc_byte_perm (w[ 7], w[ 6], selector); - w[46] = hc_byte_perm (w[ 6], w[ 5], selector); - w[45] = hc_byte_perm (w[ 5], w[ 4], selector); - w[44] = hc_byte_perm (w[ 4], w[ 3], selector); - w[43] = hc_byte_perm (w[ 3], w[ 2], selector); - w[42] = hc_byte_perm (w[ 2], w[ 1], selector); - w[41] = hc_byte_perm (w[ 1], w[ 0], selector); - w[40] = hc_byte_perm (w[ 0], 0, selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm (w[22], w[21], selector); - w[62] = hc_byte_perm (w[21], w[20], selector); - w[61] = hc_byte_perm (w[20], w[19], selector); - w[60] = hc_byte_perm (w[19], w[18], selector); - w[59] = hc_byte_perm (w[18], w[17], selector); - w[58] = hc_byte_perm (w[17], w[16], selector); - w[57] = hc_byte_perm (w[16], w[15], selector); - w[56] = hc_byte_perm (w[15], w[14], selector); - w[55] = hc_byte_perm (w[14], w[13], selector); - w[54] = hc_byte_perm (w[13], w[12], selector); - w[53] = hc_byte_perm (w[12], w[11], selector); - w[52] = hc_byte_perm (w[11], w[10], selector); - w[51] = hc_byte_perm (w[10], w[ 9], selector); - w[50] = hc_byte_perm (w[ 9], w[ 8], selector); - w[49] = hc_byte_perm (w[ 8], w[ 7], selector); - w[48] = hc_byte_perm (w[ 7], w[ 6], selector); - w[47] = hc_byte_perm (w[ 6], w[ 5], selector); - w[46] = hc_byte_perm (w[ 5], w[ 4], selector); - w[45] = hc_byte_perm (w[ 4], w[ 3], selector); - w[44] = hc_byte_perm (w[ 3], w[ 2], selector); - w[43] = hc_byte_perm (w[ 2], w[ 1], selector); - w[42] = hc_byte_perm (w[ 1], w[ 0], selector); - w[41] = hc_byte_perm (w[ 0], 0, selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm (w[21], w[20], selector); - w[62] = hc_byte_perm (w[20], w[19], selector); - w[61] = hc_byte_perm (w[19], w[18], selector); - w[60] = hc_byte_perm (w[18], w[17], selector); - w[59] = hc_byte_perm (w[17], w[16], selector); - w[58] = hc_byte_perm (w[16], w[15], selector); - w[57] = hc_byte_perm (w[15], w[14], selector); - w[56] = hc_byte_perm (w[14], w[13], selector); - w[55] = hc_byte_perm (w[13], w[12], selector); - w[54] = hc_byte_perm (w[12], w[11], selector); - w[53] = hc_byte_perm (w[11], w[10], selector); - w[52] = hc_byte_perm (w[10], w[ 9], selector); - w[51] = hc_byte_perm (w[ 9], w[ 8], selector); - w[50] = hc_byte_perm (w[ 8], w[ 7], selector); - w[49] = hc_byte_perm (w[ 7], w[ 6], selector); - w[48] = hc_byte_perm (w[ 6], w[ 5], selector); - w[47] = hc_byte_perm (w[ 5], w[ 4], selector); - w[46] = hc_byte_perm (w[ 4], w[ 3], selector); - w[45] = hc_byte_perm (w[ 3], w[ 2], selector); - w[44] = hc_byte_perm (w[ 2], w[ 1], selector); - w[43] = hc_byte_perm (w[ 1], w[ 0], selector); - w[42] = hc_byte_perm (w[ 0], 0, selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm (w[20], w[19], selector); - w[62] = hc_byte_perm (w[19], w[18], selector); - w[61] = hc_byte_perm (w[18], w[17], selector); - w[60] = hc_byte_perm (w[17], w[16], selector); - w[59] = hc_byte_perm (w[16], w[15], selector); - w[58] = hc_byte_perm (w[15], w[14], selector); - w[57] = hc_byte_perm (w[14], w[13], selector); - w[56] = hc_byte_perm (w[13], w[12], selector); - w[55] = hc_byte_perm (w[12], w[11], selector); - w[54] = hc_byte_perm (w[11], w[10], selector); - w[53] = hc_byte_perm (w[10], w[ 9], selector); - w[52] = hc_byte_perm (w[ 9], w[ 8], selector); - w[51] = hc_byte_perm (w[ 8], w[ 7], selector); - w[50] = hc_byte_perm (w[ 7], w[ 6], selector); - w[49] = hc_byte_perm (w[ 6], w[ 5], selector); - w[48] = hc_byte_perm (w[ 5], w[ 4], selector); - w[47] = hc_byte_perm (w[ 4], w[ 3], selector); - w[46] = hc_byte_perm (w[ 3], w[ 2], selector); - w[45] = hc_byte_perm (w[ 2], w[ 1], selector); - w[44] = hc_byte_perm (w[ 1], w[ 0], selector); - w[43] = hc_byte_perm (w[ 0], 0, selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm (w[19], w[18], selector); - w[62] = hc_byte_perm (w[18], w[17], selector); - w[61] = hc_byte_perm (w[17], w[16], selector); - w[60] = hc_byte_perm (w[16], w[15], selector); - w[59] = hc_byte_perm (w[15], w[14], selector); - w[58] = hc_byte_perm (w[14], w[13], selector); - w[57] = hc_byte_perm (w[13], w[12], selector); - w[56] = hc_byte_perm (w[12], w[11], selector); - w[55] = hc_byte_perm (w[11], w[10], selector); - w[54] = hc_byte_perm (w[10], w[ 9], selector); - w[53] = hc_byte_perm (w[ 9], w[ 8], selector); - w[52] = hc_byte_perm (w[ 8], w[ 7], selector); - w[51] = hc_byte_perm (w[ 7], w[ 6], selector); - w[50] = hc_byte_perm (w[ 6], w[ 5], selector); - w[49] = hc_byte_perm (w[ 5], w[ 4], selector); - w[48] = hc_byte_perm (w[ 4], w[ 3], selector); - w[47] = hc_byte_perm (w[ 3], w[ 2], selector); - w[46] = hc_byte_perm (w[ 2], w[ 1], selector); - w[45] = hc_byte_perm (w[ 1], w[ 0], selector); - w[44] = hc_byte_perm (w[ 0], 0, selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm (w[18], w[17], selector); - w[62] = hc_byte_perm (w[17], w[16], selector); - w[61] = hc_byte_perm (w[16], w[15], selector); - w[60] = hc_byte_perm (w[15], w[14], selector); - w[59] = hc_byte_perm (w[14], w[13], selector); - w[58] = hc_byte_perm (w[13], w[12], selector); - w[57] = hc_byte_perm (w[12], w[11], selector); - w[56] = hc_byte_perm (w[11], w[10], selector); - w[55] = hc_byte_perm (w[10], w[ 9], selector); - w[54] = hc_byte_perm (w[ 9], w[ 8], selector); - w[53] = hc_byte_perm (w[ 8], w[ 7], selector); - w[52] = hc_byte_perm (w[ 7], w[ 6], selector); - w[51] = hc_byte_perm (w[ 6], w[ 5], selector); - w[50] = hc_byte_perm (w[ 5], w[ 4], selector); - w[49] = hc_byte_perm (w[ 4], w[ 3], selector); - w[48] = hc_byte_perm (w[ 3], w[ 2], selector); - w[47] = hc_byte_perm (w[ 2], w[ 1], selector); - w[46] = hc_byte_perm (w[ 1], w[ 0], selector); - w[45] = hc_byte_perm (w[ 0], 0, selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm (w[17], w[16], selector); - w[62] = hc_byte_perm (w[16], w[15], selector); - w[61] = hc_byte_perm (w[15], w[14], selector); - w[60] = hc_byte_perm (w[14], w[13], selector); - w[59] = hc_byte_perm (w[13], w[12], selector); - w[58] = hc_byte_perm (w[12], w[11], selector); - w[57] = hc_byte_perm (w[11], w[10], selector); - w[56] = hc_byte_perm (w[10], w[ 9], selector); - w[55] = hc_byte_perm (w[ 9], w[ 8], selector); - w[54] = hc_byte_perm (w[ 8], w[ 7], selector); - w[53] = hc_byte_perm (w[ 7], w[ 6], selector); - w[52] = hc_byte_perm (w[ 6], w[ 5], selector); - w[51] = hc_byte_perm (w[ 5], w[ 4], selector); - w[50] = hc_byte_perm (w[ 4], w[ 3], selector); - w[49] = hc_byte_perm (w[ 3], w[ 2], selector); - w[48] = hc_byte_perm (w[ 2], w[ 1], selector); - w[47] = hc_byte_perm (w[ 1], w[ 0], selector); - w[46] = hc_byte_perm (w[ 0], 0, selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm (w[16], w[15], selector); - w[62] = hc_byte_perm (w[15], w[14], selector); - w[61] = hc_byte_perm (w[14], w[13], selector); - w[60] = hc_byte_perm (w[13], w[12], selector); - w[59] = hc_byte_perm (w[12], w[11], selector); - w[58] = hc_byte_perm (w[11], w[10], selector); - w[57] = hc_byte_perm (w[10], w[ 9], selector); - w[56] = hc_byte_perm (w[ 9], w[ 8], selector); - w[55] = hc_byte_perm (w[ 8], w[ 7], selector); - w[54] = hc_byte_perm (w[ 7], w[ 6], selector); - w[53] = hc_byte_perm (w[ 6], w[ 5], selector); - w[52] = hc_byte_perm (w[ 5], w[ 4], selector); - w[51] = hc_byte_perm (w[ 4], w[ 3], selector); - w[50] = hc_byte_perm (w[ 3], w[ 2], selector); - w[49] = hc_byte_perm (w[ 2], w[ 1], selector); - w[48] = hc_byte_perm (w[ 1], w[ 0], selector); - w[47] = hc_byte_perm (w[ 0], 0, selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm (w[15], w[14], selector); - w[62] = hc_byte_perm (w[14], w[13], selector); - w[61] = hc_byte_perm (w[13], w[12], selector); - w[60] = hc_byte_perm (w[12], w[11], selector); - w[59] = hc_byte_perm (w[11], w[10], selector); - w[58] = hc_byte_perm (w[10], w[ 9], selector); - w[57] = hc_byte_perm (w[ 9], w[ 8], selector); - w[56] = hc_byte_perm (w[ 8], w[ 7], selector); - w[55] = hc_byte_perm (w[ 7], w[ 6], selector); - w[54] = hc_byte_perm (w[ 6], w[ 5], selector); - w[53] = hc_byte_perm (w[ 5], w[ 4], selector); - w[52] = hc_byte_perm (w[ 4], w[ 3], selector); - w[51] = hc_byte_perm (w[ 3], w[ 2], selector); - w[50] = hc_byte_perm (w[ 2], w[ 1], selector); - w[49] = hc_byte_perm (w[ 1], w[ 0], selector); - w[48] = hc_byte_perm (w[ 0], 0, selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm (w[14], w[13], selector); - w[62] = hc_byte_perm (w[13], w[12], selector); - w[61] = hc_byte_perm (w[12], w[11], selector); - w[60] = hc_byte_perm (w[11], w[10], selector); - w[59] = hc_byte_perm (w[10], w[ 9], selector); - w[58] = hc_byte_perm (w[ 9], w[ 8], selector); - w[57] = hc_byte_perm (w[ 8], w[ 7], selector); - w[56] = hc_byte_perm (w[ 7], w[ 6], selector); - w[55] = hc_byte_perm (w[ 6], w[ 5], selector); - w[54] = hc_byte_perm (w[ 5], w[ 4], selector); - w[53] = hc_byte_perm (w[ 4], w[ 3], selector); - w[52] = hc_byte_perm (w[ 3], w[ 2], selector); - w[51] = hc_byte_perm (w[ 2], w[ 1], selector); - w[50] = hc_byte_perm (w[ 1], w[ 0], selector); - w[49] = hc_byte_perm (w[ 0], 0, selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm (w[13], w[12], selector); - w[62] = hc_byte_perm (w[12], w[11], selector); - w[61] = hc_byte_perm (w[11], w[10], selector); - w[60] = hc_byte_perm (w[10], w[ 9], selector); - w[59] = hc_byte_perm (w[ 9], w[ 8], selector); - w[58] = hc_byte_perm (w[ 8], w[ 7], selector); - w[57] = hc_byte_perm (w[ 7], w[ 6], selector); - w[56] = hc_byte_perm (w[ 6], w[ 5], selector); - w[55] = hc_byte_perm (w[ 5], w[ 4], selector); - w[54] = hc_byte_perm (w[ 4], w[ 3], selector); - w[53] = hc_byte_perm (w[ 3], w[ 2], selector); - w[52] = hc_byte_perm (w[ 2], w[ 1], selector); - w[51] = hc_byte_perm (w[ 1], w[ 0], selector); - w[50] = hc_byte_perm (w[ 0], 0, selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm (w[12], w[11], selector); - w[62] = hc_byte_perm (w[11], w[10], selector); - w[61] = hc_byte_perm (w[10], w[ 9], selector); - w[60] = hc_byte_perm (w[ 9], w[ 8], selector); - w[59] = hc_byte_perm (w[ 8], w[ 7], selector); - w[58] = hc_byte_perm (w[ 7], w[ 6], selector); - w[57] = hc_byte_perm (w[ 6], w[ 5], selector); - w[56] = hc_byte_perm (w[ 5], w[ 4], selector); - w[55] = hc_byte_perm (w[ 4], w[ 3], selector); - w[54] = hc_byte_perm (w[ 3], w[ 2], selector); - w[53] = hc_byte_perm (w[ 2], w[ 1], selector); - w[52] = hc_byte_perm (w[ 1], w[ 0], selector); - w[51] = hc_byte_perm (w[ 0], 0, selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm (w[11], w[10], selector); - w[62] = hc_byte_perm (w[10], w[ 9], selector); - w[61] = hc_byte_perm (w[ 9], w[ 8], selector); - w[60] = hc_byte_perm (w[ 8], w[ 7], selector); - w[59] = hc_byte_perm (w[ 7], w[ 6], selector); - w[58] = hc_byte_perm (w[ 6], w[ 5], selector); - w[57] = hc_byte_perm (w[ 5], w[ 4], selector); - w[56] = hc_byte_perm (w[ 4], w[ 3], selector); - w[55] = hc_byte_perm (w[ 3], w[ 2], selector); - w[54] = hc_byte_perm (w[ 2], w[ 1], selector); - w[53] = hc_byte_perm (w[ 1], w[ 0], selector); - w[52] = hc_byte_perm (w[ 0], 0, selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm (w[10], w[ 9], selector); - w[62] = hc_byte_perm (w[ 9], w[ 8], selector); - w[61] = hc_byte_perm (w[ 8], w[ 7], selector); - w[60] = hc_byte_perm (w[ 7], w[ 6], selector); - w[59] = hc_byte_perm (w[ 6], w[ 5], selector); - w[58] = hc_byte_perm (w[ 5], w[ 4], selector); - w[57] = hc_byte_perm (w[ 4], w[ 3], selector); - w[56] = hc_byte_perm (w[ 3], w[ 2], selector); - w[55] = hc_byte_perm (w[ 2], w[ 1], selector); - w[54] = hc_byte_perm (w[ 1], w[ 0], selector); - w[53] = hc_byte_perm (w[ 0], 0, selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm (w[ 9], w[ 8], selector); - w[62] = hc_byte_perm (w[ 8], w[ 7], selector); - w[61] = hc_byte_perm (w[ 7], w[ 6], selector); - w[60] = hc_byte_perm (w[ 6], w[ 5], selector); - w[59] = hc_byte_perm (w[ 5], w[ 4], selector); - w[58] = hc_byte_perm (w[ 4], w[ 3], selector); - w[57] = hc_byte_perm (w[ 3], w[ 2], selector); - w[56] = hc_byte_perm (w[ 2], w[ 1], selector); - w[55] = hc_byte_perm (w[ 1], w[ 0], selector); - w[54] = hc_byte_perm (w[ 0], 0, selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm (w[ 8], w[ 7], selector); - w[62] = hc_byte_perm (w[ 7], w[ 6], selector); - w[61] = hc_byte_perm (w[ 6], w[ 5], selector); - w[60] = hc_byte_perm (w[ 5], w[ 4], selector); - w[59] = hc_byte_perm (w[ 4], w[ 3], selector); - w[58] = hc_byte_perm (w[ 3], w[ 2], selector); - w[57] = hc_byte_perm (w[ 2], w[ 1], selector); - w[56] = hc_byte_perm (w[ 1], w[ 0], selector); - w[55] = hc_byte_perm (w[ 0], 0, selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm (w[ 7], w[ 6], selector); - w[62] = hc_byte_perm (w[ 6], w[ 5], selector); - w[61] = hc_byte_perm (w[ 5], w[ 4], selector); - w[60] = hc_byte_perm (w[ 4], w[ 3], selector); - w[59] = hc_byte_perm (w[ 3], w[ 2], selector); - w[58] = hc_byte_perm (w[ 2], w[ 1], selector); - w[57] = hc_byte_perm (w[ 1], w[ 0], selector); - w[56] = hc_byte_perm (w[ 0], 0, selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm (w[ 6], w[ 5], selector); - w[62] = hc_byte_perm (w[ 5], w[ 4], selector); - w[61] = hc_byte_perm (w[ 4], w[ 3], selector); - w[60] = hc_byte_perm (w[ 3], w[ 2], selector); - w[59] = hc_byte_perm (w[ 2], w[ 1], selector); - w[58] = hc_byte_perm (w[ 1], w[ 0], selector); - w[57] = hc_byte_perm (w[ 0], 0, selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm (w[ 5], w[ 4], selector); - w[62] = hc_byte_perm (w[ 4], w[ 3], selector); - w[61] = hc_byte_perm (w[ 3], w[ 2], selector); - w[60] = hc_byte_perm (w[ 2], w[ 1], selector); - w[59] = hc_byte_perm (w[ 1], w[ 0], selector); - w[58] = hc_byte_perm (w[ 0], 0, selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm (w[ 4], w[ 3], selector); - w[62] = hc_byte_perm (w[ 3], w[ 2], selector); - w[61] = hc_byte_perm (w[ 2], w[ 1], selector); - w[60] = hc_byte_perm (w[ 1], w[ 0], selector); - w[59] = hc_byte_perm (w[ 0], 0, selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm (w[ 3], w[ 2], selector); - w[62] = hc_byte_perm (w[ 2], w[ 1], selector); - w[61] = hc_byte_perm (w[ 1], w[ 0], selector); - w[60] = hc_byte_perm (w[ 0], 0, selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm (w[ 2], w[ 1], selector); - w[62] = hc_byte_perm (w[ 1], w[ 0], selector); - w[61] = hc_byte_perm (w[ 0], 0, selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm (w[ 1], w[ 0], selector); - w[62] = hc_byte_perm (w[ 0], 0, selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm (w[ 0], 0, selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } /** @@ -36899,7 +21520,7 @@ DECLSPEC void make_utf16be_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PR out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); @@ -36937,7 +21558,7 @@ DECLSPEC void make_utf16beN_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, P out1[1] = hc_byte_perm_S (in[0], 0, 0x1707); out1[0] = hc_byte_perm_S (in[0], 0, 0x3727); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x01070007); out2[2] = hc_byte_perm_S (in[3], 0, 0x03070207); @@ -36975,7 +21596,7 @@ DECLSPEC void make_utf16le_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PR out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); @@ -37009,7 +21630,7 @@ DECLSPEC void undo_utf16be_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *i out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); @@ -37039,7 +21660,7 @@ DECLSPEC void undo_utf16le_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *i out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); @@ -37064,7 +21685,6 @@ DECLSPEC void switch_buffer_by_offset_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -37387,352 +22007,12 @@ DECLSPEC void switch_buffer_by_offset_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - - break; - - case 1: - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38191,476 +22471,12 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS break; } - #endif - - #ifdef IS_NV - // could be improved, too - switch (offset_switch) - { - case 0: - c0[0] = hc_bytealign_S (w3[3], 0, offset); - w3[3] = hc_bytealign_S (w3[2], w3[3], offset); - w3[2] = hc_bytealign_S (w3[1], w3[2], offset); - w3[1] = hc_bytealign_S (w3[0], w3[1], offset); - w3[0] = hc_bytealign_S (w2[3], w3[0], offset); - w2[3] = hc_bytealign_S (w2[2], w2[3], offset); - w2[2] = hc_bytealign_S (w2[1], w2[2], offset); - w2[1] = hc_bytealign_S (w2[0], w2[1], offset); - w2[0] = hc_bytealign_S (w1[3], w2[0], offset); - w1[3] = hc_bytealign_S (w1[2], w1[3], offset); - w1[2] = hc_bytealign_S (w1[1], w1[2], offset); - w1[1] = hc_bytealign_S (w1[0], w1[1], offset); - w1[0] = hc_bytealign_S (w0[3], w1[0], offset); - w0[3] = hc_bytealign_S (w0[2], w0[3], offset); - w0[2] = hc_bytealign_S (w0[1], w0[2], offset); - w0[1] = hc_bytealign_S (w0[0], w0[1], offset); - w0[0] = hc_bytealign_S ( 0, w0[0], offset); - - break; - - case 1: - c0[1] = hc_bytealign_S (w3[3], 0, offset); - c0[0] = hc_bytealign_S (w3[2], w3[3], offset); - w3[3] = hc_bytealign_S (w3[1], w3[2], offset); - w3[2] = hc_bytealign_S (w3[0], w3[1], offset); - w3[1] = hc_bytealign_S (w2[3], w3[0], offset); - w3[0] = hc_bytealign_S (w2[2], w2[3], offset); - w2[3] = hc_bytealign_S (w2[1], w2[2], offset); - w2[2] = hc_bytealign_S (w2[0], w2[1], offset); - w2[1] = hc_bytealign_S (w1[3], w2[0], offset); - w2[0] = hc_bytealign_S (w1[2], w1[3], offset); - w1[3] = hc_bytealign_S (w1[1], w1[2], offset); - w1[2] = hc_bytealign_S (w1[0], w1[1], offset); - w1[1] = hc_bytealign_S (w0[3], w1[0], offset); - w1[0] = hc_bytealign_S (w0[2], w0[3], offset); - w0[3] = hc_bytealign_S (w0[1], w0[2], offset); - w0[2] = hc_bytealign_S (w0[0], w0[1], offset); - w0[1] = hc_bytealign_S ( 0, w0[0], offset); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_bytealign_S (w3[3], 0, offset); - c0[1] = hc_bytealign_S (w3[2], w3[3], offset); - c0[0] = hc_bytealign_S (w3[1], w3[2], offset); - w3[3] = hc_bytealign_S (w3[0], w3[1], offset); - w3[2] = hc_bytealign_S (w2[3], w3[0], offset); - w3[1] = hc_bytealign_S (w2[2], w2[3], offset); - w3[0] = hc_bytealign_S (w2[1], w2[2], offset); - w2[3] = hc_bytealign_S (w2[0], w2[1], offset); - w2[2] = hc_bytealign_S (w1[3], w2[0], offset); - w2[1] = hc_bytealign_S (w1[2], w1[3], offset); - w2[0] = hc_bytealign_S (w1[1], w1[2], offset); - w1[3] = hc_bytealign_S (w1[0], w1[1], offset); - w1[2] = hc_bytealign_S (w0[3], w1[0], offset); - w1[1] = hc_bytealign_S (w0[2], w0[3], offset); - w1[0] = hc_bytealign_S (w0[1], w0[2], offset); - w0[3] = hc_bytealign_S (w0[0], w0[1], offset); - w0[2] = hc_bytealign_S ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_bytealign_S (w3[3], 0, offset); - c0[2] = hc_bytealign_S (w3[2], w3[3], offset); - c0[1] = hc_bytealign_S (w3[1], w3[2], offset); - c0[0] = hc_bytealign_S (w3[0], w3[1], offset); - w3[3] = hc_bytealign_S (w2[3], w3[0], offset); - w3[2] = hc_bytealign_S (w2[2], w2[3], offset); - w3[1] = hc_bytealign_S (w2[1], w2[2], offset); - w3[0] = hc_bytealign_S (w2[0], w2[1], offset); - w2[3] = hc_bytealign_S (w1[3], w2[0], offset); - w2[2] = hc_bytealign_S (w1[2], w1[3], offset); - w2[1] = hc_bytealign_S (w1[1], w1[2], offset); - w2[0] = hc_bytealign_S (w1[0], w1[1], offset); - w1[3] = hc_bytealign_S (w0[3], w1[0], offset); - w1[2] = hc_bytealign_S (w0[2], w0[3], offset); - w1[1] = hc_bytealign_S (w0[1], w0[2], offset); - w1[0] = hc_bytealign_S (w0[0], w0[1], offset); - w0[3] = hc_bytealign_S ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_bytealign_S (w3[3], 0, offset); - c0[3] = hc_bytealign_S (w3[2], w3[3], offset); - c0[2] = hc_bytealign_S (w3[1], w3[2], offset); - c0[1] = hc_bytealign_S (w3[0], w3[1], offset); - c0[0] = hc_bytealign_S (w2[3], w3[0], offset); - w3[3] = hc_bytealign_S (w2[2], w2[3], offset); - w3[2] = hc_bytealign_S (w2[1], w2[2], offset); - w3[1] = hc_bytealign_S (w2[0], w2[1], offset); - w3[0] = hc_bytealign_S (w1[3], w2[0], offset); - w2[3] = hc_bytealign_S (w1[2], w1[3], offset); - w2[2] = hc_bytealign_S (w1[1], w1[2], offset); - w2[1] = hc_bytealign_S (w1[0], w1[1], offset); - w2[0] = hc_bytealign_S (w0[3], w1[0], offset); - w1[3] = hc_bytealign_S (w0[2], w0[3], offset); - w1[2] = hc_bytealign_S (w0[1], w0[2], offset); - w1[1] = hc_bytealign_S (w0[0], w0[1], offset); - w1[0] = hc_bytealign_S ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_bytealign_S (w3[3], 0, offset); - c1[0] = hc_bytealign_S (w3[2], w3[3], offset); - c0[3] = hc_bytealign_S (w3[1], w3[2], offset); - c0[2] = hc_bytealign_S (w3[0], w3[1], offset); - c0[1] = hc_bytealign_S (w2[3], w3[0], offset); - c0[0] = hc_bytealign_S (w2[2], w2[3], offset); - w3[3] = hc_bytealign_S (w2[1], w2[2], offset); - w3[2] = hc_bytealign_S (w2[0], w2[1], offset); - w3[1] = hc_bytealign_S (w1[3], w2[0], offset); - w3[0] = hc_bytealign_S (w1[2], w1[3], offset); - w2[3] = hc_bytealign_S (w1[1], w1[2], offset); - w2[2] = hc_bytealign_S (w1[0], w1[1], offset); - w2[1] = hc_bytealign_S (w0[3], w1[0], offset); - w2[0] = hc_bytealign_S (w0[2], w0[3], offset); - w1[3] = hc_bytealign_S (w0[1], w0[2], offset); - w1[2] = hc_bytealign_S (w0[0], w0[1], offset); - w1[1] = hc_bytealign_S ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_bytealign_S (w3[3], 0, offset); - c1[1] = hc_bytealign_S (w3[2], w3[3], offset); - c1[0] = hc_bytealign_S (w3[1], w3[2], offset); - c0[3] = hc_bytealign_S (w3[0], w3[1], offset); - c0[2] = hc_bytealign_S (w2[3], w3[0], offset); - c0[1] = hc_bytealign_S (w2[2], w2[3], offset); - c0[0] = hc_bytealign_S (w2[1], w2[2], offset); - w3[3] = hc_bytealign_S (w2[0], w2[1], offset); - w3[2] = hc_bytealign_S (w1[3], w2[0], offset); - w3[1] = hc_bytealign_S (w1[2], w1[3], offset); - w3[0] = hc_bytealign_S (w1[1], w1[2], offset); - w2[3] = hc_bytealign_S (w1[0], w1[1], offset); - w2[2] = hc_bytealign_S (w0[3], w1[0], offset); - w2[1] = hc_bytealign_S (w0[2], w0[3], offset); - w2[0] = hc_bytealign_S (w0[1], w0[2], offset); - w1[3] = hc_bytealign_S (w0[0], w0[1], offset); - w1[2] = hc_bytealign_S ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_bytealign_S (w3[3], 0, offset); - c1[2] = hc_bytealign_S (w3[2], w3[3], offset); - c1[1] = hc_bytealign_S (w3[1], w3[2], offset); - c1[0] = hc_bytealign_S (w3[0], w3[1], offset); - c0[3] = hc_bytealign_S (w2[3], w3[0], offset); - c0[2] = hc_bytealign_S (w2[2], w2[3], offset); - c0[1] = hc_bytealign_S (w2[1], w2[2], offset); - c0[0] = hc_bytealign_S (w2[0], w2[1], offset); - w3[3] = hc_bytealign_S (w1[3], w2[0], offset); - w3[2] = hc_bytealign_S (w1[2], w1[3], offset); - w3[1] = hc_bytealign_S (w1[1], w1[2], offset); - w3[0] = hc_bytealign_S (w1[0], w1[1], offset); - w2[3] = hc_bytealign_S (w0[3], w1[0], offset); - w2[2] = hc_bytealign_S (w0[2], w0[3], offset); - w2[1] = hc_bytealign_S (w0[1], w0[2], offset); - w2[0] = hc_bytealign_S (w0[0], w0[1], offset); - w1[3] = hc_bytealign_S ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_bytealign_S (w3[3], 0, offset); - c1[3] = hc_bytealign_S (w3[2], w3[3], offset); - c1[2] = hc_bytealign_S (w3[1], w3[2], offset); - c1[1] = hc_bytealign_S (w3[0], w3[1], offset); - c1[0] = hc_bytealign_S (w2[3], w3[0], offset); - c0[3] = hc_bytealign_S (w2[2], w2[3], offset); - c0[2] = hc_bytealign_S (w2[1], w2[2], offset); - c0[1] = hc_bytealign_S (w2[0], w2[1], offset); - c0[0] = hc_bytealign_S (w1[3], w2[0], offset); - w3[3] = hc_bytealign_S (w1[2], w1[3], offset); - w3[2] = hc_bytealign_S (w1[1], w1[2], offset); - w3[1] = hc_bytealign_S (w1[0], w1[1], offset); - w3[0] = hc_bytealign_S (w0[3], w1[0], offset); - w2[3] = hc_bytealign_S (w0[2], w0[3], offset); - w2[2] = hc_bytealign_S (w0[1], w0[2], offset); - w2[1] = hc_bytealign_S (w0[0], w0[1], offset); - w2[0] = hc_bytealign_S ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_bytealign_S (w3[3], 0, offset); - c2[0] = hc_bytealign_S (w3[2], w3[3], offset); - c1[3] = hc_bytealign_S (w3[1], w3[2], offset); - c1[2] = hc_bytealign_S (w3[0], w3[1], offset); - c1[1] = hc_bytealign_S (w2[3], w3[0], offset); - c1[0] = hc_bytealign_S (w2[2], w2[3], offset); - c0[3] = hc_bytealign_S (w2[1], w2[2], offset); - c0[2] = hc_bytealign_S (w2[0], w2[1], offset); - c0[1] = hc_bytealign_S (w1[3], w2[0], offset); - c0[0] = hc_bytealign_S (w1[2], w1[3], offset); - w3[3] = hc_bytealign_S (w1[1], w1[2], offset); - w3[2] = hc_bytealign_S (w1[0], w1[1], offset); - w3[1] = hc_bytealign_S (w0[3], w1[0], offset); - w3[0] = hc_bytealign_S (w0[2], w0[3], offset); - w2[3] = hc_bytealign_S (w0[1], w0[2], offset); - w2[2] = hc_bytealign_S (w0[0], w0[1], offset); - w2[1] = hc_bytealign_S ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_bytealign_S (w3[3], 0, offset); - c2[1] = hc_bytealign_S (w3[2], w3[3], offset); - c2[0] = hc_bytealign_S (w3[1], w3[2], offset); - c1[3] = hc_bytealign_S (w3[0], w3[1], offset); - c1[2] = hc_bytealign_S (w2[3], w3[0], offset); - c1[1] = hc_bytealign_S (w2[2], w2[3], offset); - c1[0] = hc_bytealign_S (w2[1], w2[2], offset); - c0[3] = hc_bytealign_S (w2[0], w2[1], offset); - c0[2] = hc_bytealign_S (w1[3], w2[0], offset); - c0[1] = hc_bytealign_S (w1[2], w1[3], offset); - c0[0] = hc_bytealign_S (w1[1], w1[2], offset); - w3[3] = hc_bytealign_S (w1[0], w1[1], offset); - w3[2] = hc_bytealign_S (w0[3], w1[0], offset); - w3[1] = hc_bytealign_S (w0[2], w0[3], offset); - w3[0] = hc_bytealign_S (w0[1], w0[2], offset); - w2[3] = hc_bytealign_S (w0[0], w0[1], offset); - w2[2] = hc_bytealign_S ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_bytealign_S (w3[3], 0, offset); - c2[2] = hc_bytealign_S (w3[2], w3[3], offset); - c2[1] = hc_bytealign_S (w3[1], w3[2], offset); - c2[0] = hc_bytealign_S (w3[0], w3[1], offset); - c1[3] = hc_bytealign_S (w2[3], w3[0], offset); - c1[2] = hc_bytealign_S (w2[2], w2[3], offset); - c1[1] = hc_bytealign_S (w2[1], w2[2], offset); - c1[0] = hc_bytealign_S (w2[0], w2[1], offset); - c0[3] = hc_bytealign_S (w1[3], w2[0], offset); - c0[2] = hc_bytealign_S (w1[2], w1[3], offset); - c0[1] = hc_bytealign_S (w1[1], w1[2], offset); - c0[0] = hc_bytealign_S (w1[0], w1[1], offset); - w3[3] = hc_bytealign_S (w0[3], w1[0], offset); - w3[2] = hc_bytealign_S (w0[2], w0[3], offset); - w3[1] = hc_bytealign_S (w0[1], w0[2], offset); - w3[0] = hc_bytealign_S (w0[0], w0[1], offset); - w2[3] = hc_bytealign_S ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_bytealign_S (w3[3], 0, offset); - c2[3] = hc_bytealign_S (w3[2], w3[3], offset); - c2[2] = hc_bytealign_S (w3[1], w3[2], offset); - c2[1] = hc_bytealign_S (w3[0], w3[1], offset); - c2[0] = hc_bytealign_S (w2[3], w3[0], offset); - c1[3] = hc_bytealign_S (w2[2], w2[3], offset); - c1[2] = hc_bytealign_S (w2[1], w2[2], offset); - c1[1] = hc_bytealign_S (w2[0], w2[1], offset); - c1[0] = hc_bytealign_S (w1[3], w2[0], offset); - c0[3] = hc_bytealign_S (w1[2], w1[3], offset); - c0[2] = hc_bytealign_S (w1[1], w1[2], offset); - c0[1] = hc_bytealign_S (w1[0], w1[1], offset); - c0[0] = hc_bytealign_S (w0[3], w1[0], offset); - w3[3] = hc_bytealign_S (w0[2], w0[3], offset); - w3[2] = hc_bytealign_S (w0[1], w0[2], offset); - w3[1] = hc_bytealign_S (w0[0], w0[1], offset); - w3[0] = hc_bytealign_S ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_bytealign_S (w3[3], 0, offset); - c3[0] = hc_bytealign_S (w3[2], w3[3], offset); - c2[3] = hc_bytealign_S (w3[1], w3[2], offset); - c2[2] = hc_bytealign_S (w3[0], w3[1], offset); - c2[1] = hc_bytealign_S (w2[3], w3[0], offset); - c2[0] = hc_bytealign_S (w2[2], w2[3], offset); - c1[3] = hc_bytealign_S (w2[1], w2[2], offset); - c1[2] = hc_bytealign_S (w2[0], w2[1], offset); - c1[1] = hc_bytealign_S (w1[3], w2[0], offset); - c1[0] = hc_bytealign_S (w1[2], w1[3], offset); - c0[3] = hc_bytealign_S (w1[1], w1[2], offset); - c0[2] = hc_bytealign_S (w1[0], w1[1], offset); - c0[1] = hc_bytealign_S (w0[3], w1[0], offset); - c0[0] = hc_bytealign_S (w0[2], w0[3], offset); - w3[3] = hc_bytealign_S (w0[1], w0[2], offset); - w3[2] = hc_bytealign_S (w0[0], w0[1], offset); - w3[1] = hc_bytealign_S ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_bytealign_S (w3[3], 0, offset); - c3[1] = hc_bytealign_S (w3[2], w3[3], offset); - c3[0] = hc_bytealign_S (w3[1], w3[2], offset); - c2[3] = hc_bytealign_S (w3[0], w3[1], offset); - c2[2] = hc_bytealign_S (w2[3], w3[0], offset); - c2[1] = hc_bytealign_S (w2[2], w2[3], offset); - c2[0] = hc_bytealign_S (w2[1], w2[2], offset); - c1[3] = hc_bytealign_S (w2[0], w2[1], offset); - c1[2] = hc_bytealign_S (w1[3], w2[0], offset); - c1[1] = hc_bytealign_S (w1[2], w1[3], offset); - c1[0] = hc_bytealign_S (w1[1], w1[2], offset); - c0[3] = hc_bytealign_S (w1[0], w1[1], offset); - c0[2] = hc_bytealign_S (w0[3], w1[0], offset); - c0[1] = hc_bytealign_S (w0[2], w0[3], offset); - c0[0] = hc_bytealign_S (w0[1], w0[2], offset); - w3[3] = hc_bytealign_S (w0[0], w0[1], offset); - w3[2] = hc_bytealign_S ( 0, w0[0], offset); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_bytealign_S (w3[3], 0, offset); - c3[2] = hc_bytealign_S (w3[2], w3[3], offset); - c3[1] = hc_bytealign_S (w3[1], w3[2], offset); - c3[0] = hc_bytealign_S (w3[0], w3[1], offset); - c2[3] = hc_bytealign_S (w2[3], w3[0], offset); - c2[2] = hc_bytealign_S (w2[2], w2[3], offset); - c2[1] = hc_bytealign_S (w2[1], w2[2], offset); - c2[0] = hc_bytealign_S (w2[0], w2[1], offset); - c1[3] = hc_bytealign_S (w1[3], w2[0], offset); - c1[2] = hc_bytealign_S (w1[2], w1[3], offset); - c1[1] = hc_bytealign_S (w1[1], w1[2], offset); - c1[0] = hc_bytealign_S (w1[0], w1[1], offset); - c0[3] = hc_bytealign_S (w0[3], w1[0], offset); - c0[2] = hc_bytealign_S (w0[2], w0[3], offset); - c0[1] = hc_bytealign_S (w0[1], w0[2], offset); - c0[0] = hc_bytealign_S (w0[0], w0[1], offset); - w3[3] = hc_bytealign_S ( 0, w0[0], offset); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38983,348 +22799,12 @@ DECLSPEC void switch_buffer_by_offset_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -39783,484 +23263,12 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S ( 0, w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S ( 0, w3[3], selector); - c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S ( 0, w3[3], selector); - c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S ( 0, w3[3], selector); - c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S ( 0, w3[3], selector); - c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S ( 0, w3[3], selector); - c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S ( 0, w3[3], selector); - c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S ( 0, w3[3], selector); - c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S ( 0, w3[3], selector); - c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S ( 0, w3[3], selector); - c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S ( 0, w3[3], selector); - c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S ( 0, w3[3], selector); - c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S ( 0, w3[3], selector); - c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S ( 0, w3[3], selector); - c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S ( 0, w3[3], selector); - c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S ( 0, w3[3], selector); - c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -41415,1167 +24423,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - break; - - case 1: - w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - break; - - case 2: - w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 14: - w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 15: - w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 16: - w7[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[0] = hc_byte_perm_S ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[1] = hc_byte_perm_S ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[2] = hc_byte_perm_S ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[3] = hc_byte_perm_S ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[0] = hc_byte_perm_S ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[1] = hc_byte_perm_S ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[2] = hc_byte_perm_S ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[3] = hc_byte_perm_S ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[0] = hc_byte_perm_S ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[1] = hc_byte_perm_S ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[2] = hc_byte_perm_S ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[3] = hc_byte_perm_S ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[0] = hc_byte_perm_S ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[1] = hc_byte_perm_S ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[2] = hc_byte_perm_S ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm_S ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -44258,1712 +26111,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (PRIVATE_AS u32 *w0, PRIVAT break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S (w7[3], 0, selector); - w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S (w7[3], 0, selector); - c0[0] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S (w7[3], 0, selector); - c0[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[0] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S (w7[3], 0, selector); - c0[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[0] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S (w7[3], 0, selector); - c0[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S (w7[3], 0, selector); - c1[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S (w7[3], 0, selector); - c1[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S (w7[3], 0, selector); - c1[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S (w7[3], 0, selector); - c1[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S (w7[3], 0, selector); - c2[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S (w7[3], 0, selector); - c2[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S (w7[3], 0, selector); - c2[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S (w7[3], 0, selector); - c2[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S (w7[3], 0, selector); - c3[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S (w7[3], 0, selector); - c3[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S (w7[3], 0, selector); - c3[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm_S (w7[3], 0, selector); - c3[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[0] = hc_byte_perm_S ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm_S (w7[3], 0, selector); - c4[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[1] = hc_byte_perm_S ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm_S (w7[3], 0, selector); - c4[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[2] = hc_byte_perm_S ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm_S (w7[3], 0, selector); - c4[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[3] = hc_byte_perm_S ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm_S (w7[3], 0, selector); - c4[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[0] = hc_byte_perm_S ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm_S (w7[3], 0, selector); - c5[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[1] = hc_byte_perm_S ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm_S (w7[3], 0, selector); - c5[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[2] = hc_byte_perm_S ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm_S (w7[3], 0, selector); - c5[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[3] = hc_byte_perm_S ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm_S (w7[3], 0, selector); - c5[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[0] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[0] = hc_byte_perm_S ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm_S (w7[3], 0, selector); - c6[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[1] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[1] = hc_byte_perm_S ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm_S (w7[3], 0, selector); - c6[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[2] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[2] = hc_byte_perm_S ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm_S (w7[3], 0, selector); - c6[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[3] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[3] = hc_byte_perm_S ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm_S (w7[3], 0, selector); - c6[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[0] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[0] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[0] = hc_byte_perm_S ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm_S (w7[3], 0, selector); - c7[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[1] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[1] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[1] = hc_byte_perm_S ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm_S (w7[3], 0, selector); - c7[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c7[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c6[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c5[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c4[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[2] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[2] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[2] = hc_byte_perm_S ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm_S (w7[3], 0, selector); - c7[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c7[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c7[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c6[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c6[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c5[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c5[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c4[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c4[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c3[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[3] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[2] = hc_byte_perm_S (w1[2], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[1], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[3] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[2] = hc_byte_perm_S (w0[2], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[1], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[3] = hc_byte_perm_S ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -47118,1180 +27271,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[0] = hc_byte_perm_S (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[1] = hc_byte_perm_S (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[2] = hc_byte_perm_S (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[3] = hc_byte_perm_S (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[0] = hc_byte_perm_S (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[1] = hc_byte_perm_S (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[2] = hc_byte_perm_S (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[3] = hc_byte_perm_S (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[0] = hc_byte_perm_S (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[1] = hc_byte_perm_S (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[2] = hc_byte_perm_S (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[3] = hc_byte_perm_S (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[0] = hc_byte_perm_S (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[1] = hc_byte_perm_S (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[2] = hc_byte_perm_S (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm_S (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -49974,1708 +28959,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (PRIVATE_AS u32 *w0, PRIVAT break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S ( 0, w7[3], selector); - w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S ( 0, w7[3], selector); - c0[0] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S ( 0, w7[3], selector); - c0[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[0] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S ( 0, w7[3], selector); - c0[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[0] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S ( 0, w7[3], selector); - c0[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S ( 0, w7[3], selector); - c1[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S ( 0, w7[3], selector); - c1[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S ( 0, w7[3], selector); - c1[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S ( 0, w7[3], selector); - c1[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S ( 0, w7[3], selector); - c2[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S ( 0, w7[3], selector); - c2[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S ( 0, w7[3], selector); - c2[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S ( 0, w7[3], selector); - c2[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S ( 0, w7[3], selector); - c3[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S ( 0, w7[3], selector); - c3[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S ( 0, w7[3], selector); - c3[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm_S ( 0, w7[3], selector); - c3[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[0] = hc_byte_perm_S (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm_S ( 0, w7[3], selector); - c4[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[1] = hc_byte_perm_S (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm_S ( 0, w7[3], selector); - c4[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[2] = hc_byte_perm_S (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm_S ( 0, w7[3], selector); - c4[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[3] = hc_byte_perm_S (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm_S ( 0, w7[3], selector); - c4[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[0] = hc_byte_perm_S (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm_S ( 0, w7[3], selector); - c5[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[1] = hc_byte_perm_S (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm_S ( 0, w7[3], selector); - c5[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[2] = hc_byte_perm_S (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm_S ( 0, w7[3], selector); - c5[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[3] = hc_byte_perm_S (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm_S ( 0, w7[3], selector); - c5[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[0] = hc_byte_perm_S (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm_S ( 0, w7[3], selector); - c6[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[1] = hc_byte_perm_S (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm_S ( 0, w7[3], selector); - c6[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[2] = hc_byte_perm_S (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm_S ( 0, w7[3], selector); - c6[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[3] = hc_byte_perm_S (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm_S ( 0, w7[3], selector); - c6[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[0] = hc_byte_perm_S (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm_S ( 0, w7[3], selector); - c7[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[1] = hc_byte_perm_S (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm_S ( 0, w7[3], selector); - c7[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c7[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c6[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c5[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c4[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[2] = hc_byte_perm_S (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm_S ( 0, w7[3], selector); - c7[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c7[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c7[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c6[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c6[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c5[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c5[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c4[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c4[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[3] = hc_byte_perm_S (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_le_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -56030,4384 +33319,12 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (PRIVATE_AS u32 *w, const u32 of break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm_S (w[62], w[63], selector); - w[62] = hc_byte_perm_S (w[61], w[62], selector); - w[61] = hc_byte_perm_S (w[60], w[61], selector); - w[60] = hc_byte_perm_S (w[59], w[60], selector); - w[59] = hc_byte_perm_S (w[58], w[59], selector); - w[58] = hc_byte_perm_S (w[57], w[58], selector); - w[57] = hc_byte_perm_S (w[56], w[57], selector); - w[56] = hc_byte_perm_S (w[55], w[56], selector); - w[55] = hc_byte_perm_S (w[54], w[55], selector); - w[54] = hc_byte_perm_S (w[53], w[54], selector); - w[53] = hc_byte_perm_S (w[52], w[53], selector); - w[52] = hc_byte_perm_S (w[51], w[52], selector); - w[51] = hc_byte_perm_S (w[50], w[51], selector); - w[50] = hc_byte_perm_S (w[49], w[50], selector); - w[49] = hc_byte_perm_S (w[48], w[49], selector); - w[48] = hc_byte_perm_S (w[47], w[48], selector); - w[47] = hc_byte_perm_S (w[46], w[47], selector); - w[46] = hc_byte_perm_S (w[45], w[46], selector); - w[45] = hc_byte_perm_S (w[44], w[45], selector); - w[44] = hc_byte_perm_S (w[43], w[44], selector); - w[43] = hc_byte_perm_S (w[42], w[43], selector); - w[42] = hc_byte_perm_S (w[41], w[42], selector); - w[41] = hc_byte_perm_S (w[40], w[41], selector); - w[40] = hc_byte_perm_S (w[39], w[40], selector); - w[39] = hc_byte_perm_S (w[38], w[39], selector); - w[38] = hc_byte_perm_S (w[37], w[38], selector); - w[37] = hc_byte_perm_S (w[36], w[37], selector); - w[36] = hc_byte_perm_S (w[35], w[36], selector); - w[35] = hc_byte_perm_S (w[34], w[35], selector); - w[34] = hc_byte_perm_S (w[33], w[34], selector); - w[33] = hc_byte_perm_S (w[32], w[33], selector); - w[32] = hc_byte_perm_S (w[31], w[32], selector); - w[31] = hc_byte_perm_S (w[30], w[31], selector); - w[30] = hc_byte_perm_S (w[29], w[30], selector); - w[29] = hc_byte_perm_S (w[28], w[29], selector); - w[28] = hc_byte_perm_S (w[27], w[28], selector); - w[27] = hc_byte_perm_S (w[26], w[27], selector); - w[26] = hc_byte_perm_S (w[25], w[26], selector); - w[25] = hc_byte_perm_S (w[24], w[25], selector); - w[24] = hc_byte_perm_S (w[23], w[24], selector); - w[23] = hc_byte_perm_S (w[22], w[23], selector); - w[22] = hc_byte_perm_S (w[21], w[22], selector); - w[21] = hc_byte_perm_S (w[20], w[21], selector); - w[20] = hc_byte_perm_S (w[19], w[20], selector); - w[19] = hc_byte_perm_S (w[18], w[19], selector); - w[18] = hc_byte_perm_S (w[17], w[18], selector); - w[17] = hc_byte_perm_S (w[16], w[17], selector); - w[16] = hc_byte_perm_S (w[15], w[16], selector); - w[15] = hc_byte_perm_S (w[14], w[15], selector); - w[14] = hc_byte_perm_S (w[13], w[14], selector); - w[13] = hc_byte_perm_S (w[12], w[13], selector); - w[12] = hc_byte_perm_S (w[11], w[12], selector); - w[11] = hc_byte_perm_S (w[10], w[11], selector); - w[10] = hc_byte_perm_S (w[ 9], w[10], selector); - w[ 9] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[ 8] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 7] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 6] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 5] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 4] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 3] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 2] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 1] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 0] = hc_byte_perm_S ( 0, w[ 0], selector); - - break; - - case 1: - w[63] = hc_byte_perm_S (w[61], w[62], selector); - w[62] = hc_byte_perm_S (w[60], w[61], selector); - w[61] = hc_byte_perm_S (w[59], w[60], selector); - w[60] = hc_byte_perm_S (w[58], w[59], selector); - w[59] = hc_byte_perm_S (w[57], w[58], selector); - w[58] = hc_byte_perm_S (w[56], w[57], selector); - w[57] = hc_byte_perm_S (w[55], w[56], selector); - w[56] = hc_byte_perm_S (w[54], w[55], selector); - w[55] = hc_byte_perm_S (w[53], w[54], selector); - w[54] = hc_byte_perm_S (w[52], w[53], selector); - w[53] = hc_byte_perm_S (w[51], w[52], selector); - w[52] = hc_byte_perm_S (w[50], w[51], selector); - w[51] = hc_byte_perm_S (w[49], w[50], selector); - w[50] = hc_byte_perm_S (w[48], w[49], selector); - w[49] = hc_byte_perm_S (w[47], w[48], selector); - w[48] = hc_byte_perm_S (w[46], w[47], selector); - w[47] = hc_byte_perm_S (w[45], w[46], selector); - w[46] = hc_byte_perm_S (w[44], w[45], selector); - w[45] = hc_byte_perm_S (w[43], w[44], selector); - w[44] = hc_byte_perm_S (w[42], w[43], selector); - w[43] = hc_byte_perm_S (w[41], w[42], selector); - w[42] = hc_byte_perm_S (w[40], w[41], selector); - w[41] = hc_byte_perm_S (w[39], w[40], selector); - w[40] = hc_byte_perm_S (w[38], w[39], selector); - w[39] = hc_byte_perm_S (w[37], w[38], selector); - w[38] = hc_byte_perm_S (w[36], w[37], selector); - w[37] = hc_byte_perm_S (w[35], w[36], selector); - w[36] = hc_byte_perm_S (w[34], w[35], selector); - w[35] = hc_byte_perm_S (w[33], w[34], selector); - w[34] = hc_byte_perm_S (w[32], w[33], selector); - w[33] = hc_byte_perm_S (w[31], w[32], selector); - w[32] = hc_byte_perm_S (w[30], w[31], selector); - w[31] = hc_byte_perm_S (w[29], w[30], selector); - w[30] = hc_byte_perm_S (w[28], w[29], selector); - w[29] = hc_byte_perm_S (w[27], w[28], selector); - w[28] = hc_byte_perm_S (w[26], w[27], selector); - w[27] = hc_byte_perm_S (w[25], w[26], selector); - w[26] = hc_byte_perm_S (w[24], w[25], selector); - w[25] = hc_byte_perm_S (w[23], w[24], selector); - w[24] = hc_byte_perm_S (w[22], w[23], selector); - w[23] = hc_byte_perm_S (w[21], w[22], selector); - w[22] = hc_byte_perm_S (w[20], w[21], selector); - w[21] = hc_byte_perm_S (w[19], w[20], selector); - w[20] = hc_byte_perm_S (w[18], w[19], selector); - w[19] = hc_byte_perm_S (w[17], w[18], selector); - w[18] = hc_byte_perm_S (w[16], w[17], selector); - w[17] = hc_byte_perm_S (w[15], w[16], selector); - w[16] = hc_byte_perm_S (w[14], w[15], selector); - w[15] = hc_byte_perm_S (w[13], w[14], selector); - w[14] = hc_byte_perm_S (w[12], w[13], selector); - w[13] = hc_byte_perm_S (w[11], w[12], selector); - w[12] = hc_byte_perm_S (w[10], w[11], selector); - w[11] = hc_byte_perm_S (w[ 9], w[10], selector); - w[10] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[ 9] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 8] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 7] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 6] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 5] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 4] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 3] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 2] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 1] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm_S (w[60], w[61], selector); - w[62] = hc_byte_perm_S (w[59], w[60], selector); - w[61] = hc_byte_perm_S (w[58], w[59], selector); - w[60] = hc_byte_perm_S (w[57], w[58], selector); - w[59] = hc_byte_perm_S (w[56], w[57], selector); - w[58] = hc_byte_perm_S (w[55], w[56], selector); - w[57] = hc_byte_perm_S (w[54], w[55], selector); - w[56] = hc_byte_perm_S (w[53], w[54], selector); - w[55] = hc_byte_perm_S (w[52], w[53], selector); - w[54] = hc_byte_perm_S (w[51], w[52], selector); - w[53] = hc_byte_perm_S (w[50], w[51], selector); - w[52] = hc_byte_perm_S (w[49], w[50], selector); - w[51] = hc_byte_perm_S (w[48], w[49], selector); - w[50] = hc_byte_perm_S (w[47], w[48], selector); - w[49] = hc_byte_perm_S (w[46], w[47], selector); - w[48] = hc_byte_perm_S (w[45], w[46], selector); - w[47] = hc_byte_perm_S (w[44], w[45], selector); - w[46] = hc_byte_perm_S (w[43], w[44], selector); - w[45] = hc_byte_perm_S (w[42], w[43], selector); - w[44] = hc_byte_perm_S (w[41], w[42], selector); - w[43] = hc_byte_perm_S (w[40], w[41], selector); - w[42] = hc_byte_perm_S (w[39], w[40], selector); - w[41] = hc_byte_perm_S (w[38], w[39], selector); - w[40] = hc_byte_perm_S (w[37], w[38], selector); - w[39] = hc_byte_perm_S (w[36], w[37], selector); - w[38] = hc_byte_perm_S (w[35], w[36], selector); - w[37] = hc_byte_perm_S (w[34], w[35], selector); - w[36] = hc_byte_perm_S (w[33], w[34], selector); - w[35] = hc_byte_perm_S (w[32], w[33], selector); - w[34] = hc_byte_perm_S (w[31], w[32], selector); - w[33] = hc_byte_perm_S (w[30], w[31], selector); - w[32] = hc_byte_perm_S (w[29], w[30], selector); - w[31] = hc_byte_perm_S (w[28], w[29], selector); - w[30] = hc_byte_perm_S (w[27], w[28], selector); - w[29] = hc_byte_perm_S (w[26], w[27], selector); - w[28] = hc_byte_perm_S (w[25], w[26], selector); - w[27] = hc_byte_perm_S (w[24], w[25], selector); - w[26] = hc_byte_perm_S (w[23], w[24], selector); - w[25] = hc_byte_perm_S (w[22], w[23], selector); - w[24] = hc_byte_perm_S (w[21], w[22], selector); - w[23] = hc_byte_perm_S (w[20], w[21], selector); - w[22] = hc_byte_perm_S (w[19], w[20], selector); - w[21] = hc_byte_perm_S (w[18], w[19], selector); - w[20] = hc_byte_perm_S (w[17], w[18], selector); - w[19] = hc_byte_perm_S (w[16], w[17], selector); - w[18] = hc_byte_perm_S (w[15], w[16], selector); - w[17] = hc_byte_perm_S (w[14], w[15], selector); - w[16] = hc_byte_perm_S (w[13], w[14], selector); - w[15] = hc_byte_perm_S (w[12], w[13], selector); - w[14] = hc_byte_perm_S (w[11], w[12], selector); - w[13] = hc_byte_perm_S (w[10], w[11], selector); - w[12] = hc_byte_perm_S (w[ 9], w[10], selector); - w[11] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[10] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 9] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 8] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 7] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 6] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 5] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 4] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 3] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 2] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm_S (w[59], w[60], selector); - w[62] = hc_byte_perm_S (w[58], w[59], selector); - w[61] = hc_byte_perm_S (w[57], w[58], selector); - w[60] = hc_byte_perm_S (w[56], w[57], selector); - w[59] = hc_byte_perm_S (w[55], w[56], selector); - w[58] = hc_byte_perm_S (w[54], w[55], selector); - w[57] = hc_byte_perm_S (w[53], w[54], selector); - w[56] = hc_byte_perm_S (w[52], w[53], selector); - w[55] = hc_byte_perm_S (w[51], w[52], selector); - w[54] = hc_byte_perm_S (w[50], w[51], selector); - w[53] = hc_byte_perm_S (w[49], w[50], selector); - w[52] = hc_byte_perm_S (w[48], w[49], selector); - w[51] = hc_byte_perm_S (w[47], w[48], selector); - w[50] = hc_byte_perm_S (w[46], w[47], selector); - w[49] = hc_byte_perm_S (w[45], w[46], selector); - w[48] = hc_byte_perm_S (w[44], w[45], selector); - w[47] = hc_byte_perm_S (w[43], w[44], selector); - w[46] = hc_byte_perm_S (w[42], w[43], selector); - w[45] = hc_byte_perm_S (w[41], w[42], selector); - w[44] = hc_byte_perm_S (w[40], w[41], selector); - w[43] = hc_byte_perm_S (w[39], w[40], selector); - w[42] = hc_byte_perm_S (w[38], w[39], selector); - w[41] = hc_byte_perm_S (w[37], w[38], selector); - w[40] = hc_byte_perm_S (w[36], w[37], selector); - w[39] = hc_byte_perm_S (w[35], w[36], selector); - w[38] = hc_byte_perm_S (w[34], w[35], selector); - w[37] = hc_byte_perm_S (w[33], w[34], selector); - w[36] = hc_byte_perm_S (w[32], w[33], selector); - w[35] = hc_byte_perm_S (w[31], w[32], selector); - w[34] = hc_byte_perm_S (w[30], w[31], selector); - w[33] = hc_byte_perm_S (w[29], w[30], selector); - w[32] = hc_byte_perm_S (w[28], w[29], selector); - w[31] = hc_byte_perm_S (w[27], w[28], selector); - w[30] = hc_byte_perm_S (w[26], w[27], selector); - w[29] = hc_byte_perm_S (w[25], w[26], selector); - w[28] = hc_byte_perm_S (w[24], w[25], selector); - w[27] = hc_byte_perm_S (w[23], w[24], selector); - w[26] = hc_byte_perm_S (w[22], w[23], selector); - w[25] = hc_byte_perm_S (w[21], w[22], selector); - w[24] = hc_byte_perm_S (w[20], w[21], selector); - w[23] = hc_byte_perm_S (w[19], w[20], selector); - w[22] = hc_byte_perm_S (w[18], w[19], selector); - w[21] = hc_byte_perm_S (w[17], w[18], selector); - w[20] = hc_byte_perm_S (w[16], w[17], selector); - w[19] = hc_byte_perm_S (w[15], w[16], selector); - w[18] = hc_byte_perm_S (w[14], w[15], selector); - w[17] = hc_byte_perm_S (w[13], w[14], selector); - w[16] = hc_byte_perm_S (w[12], w[13], selector); - w[15] = hc_byte_perm_S (w[11], w[12], selector); - w[14] = hc_byte_perm_S (w[10], w[11], selector); - w[13] = hc_byte_perm_S (w[ 9], w[10], selector); - w[12] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[11] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[10] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 9] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 8] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 7] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 6] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 5] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 4] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 3] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm_S (w[58], w[59], selector); - w[62] = hc_byte_perm_S (w[57], w[58], selector); - w[61] = hc_byte_perm_S (w[56], w[57], selector); - w[60] = hc_byte_perm_S (w[55], w[56], selector); - w[59] = hc_byte_perm_S (w[54], w[55], selector); - w[58] = hc_byte_perm_S (w[53], w[54], selector); - w[57] = hc_byte_perm_S (w[52], w[53], selector); - w[56] = hc_byte_perm_S (w[51], w[52], selector); - w[55] = hc_byte_perm_S (w[50], w[51], selector); - w[54] = hc_byte_perm_S (w[49], w[50], selector); - w[53] = hc_byte_perm_S (w[48], w[49], selector); - w[52] = hc_byte_perm_S (w[47], w[48], selector); - w[51] = hc_byte_perm_S (w[46], w[47], selector); - w[50] = hc_byte_perm_S (w[45], w[46], selector); - w[49] = hc_byte_perm_S (w[44], w[45], selector); - w[48] = hc_byte_perm_S (w[43], w[44], selector); - w[47] = hc_byte_perm_S (w[42], w[43], selector); - w[46] = hc_byte_perm_S (w[41], w[42], selector); - w[45] = hc_byte_perm_S (w[40], w[41], selector); - w[44] = hc_byte_perm_S (w[39], w[40], selector); - w[43] = hc_byte_perm_S (w[38], w[39], selector); - w[42] = hc_byte_perm_S (w[37], w[38], selector); - w[41] = hc_byte_perm_S (w[36], w[37], selector); - w[40] = hc_byte_perm_S (w[35], w[36], selector); - w[39] = hc_byte_perm_S (w[34], w[35], selector); - w[38] = hc_byte_perm_S (w[33], w[34], selector); - w[37] = hc_byte_perm_S (w[32], w[33], selector); - w[36] = hc_byte_perm_S (w[31], w[32], selector); - w[35] = hc_byte_perm_S (w[30], w[31], selector); - w[34] = hc_byte_perm_S (w[29], w[30], selector); - w[33] = hc_byte_perm_S (w[28], w[29], selector); - w[32] = hc_byte_perm_S (w[27], w[28], selector); - w[31] = hc_byte_perm_S (w[26], w[27], selector); - w[30] = hc_byte_perm_S (w[25], w[26], selector); - w[29] = hc_byte_perm_S (w[24], w[25], selector); - w[28] = hc_byte_perm_S (w[23], w[24], selector); - w[27] = hc_byte_perm_S (w[22], w[23], selector); - w[26] = hc_byte_perm_S (w[21], w[22], selector); - w[25] = hc_byte_perm_S (w[20], w[21], selector); - w[24] = hc_byte_perm_S (w[19], w[20], selector); - w[23] = hc_byte_perm_S (w[18], w[19], selector); - w[22] = hc_byte_perm_S (w[17], w[18], selector); - w[21] = hc_byte_perm_S (w[16], w[17], selector); - w[20] = hc_byte_perm_S (w[15], w[16], selector); - w[19] = hc_byte_perm_S (w[14], w[15], selector); - w[18] = hc_byte_perm_S (w[13], w[14], selector); - w[17] = hc_byte_perm_S (w[12], w[13], selector); - w[16] = hc_byte_perm_S (w[11], w[12], selector); - w[15] = hc_byte_perm_S (w[10], w[11], selector); - w[14] = hc_byte_perm_S (w[ 9], w[10], selector); - w[13] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[12] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[11] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[10] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 9] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 8] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 7] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 6] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 5] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 4] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm_S (w[57], w[58], selector); - w[62] = hc_byte_perm_S (w[56], w[57], selector); - w[61] = hc_byte_perm_S (w[55], w[56], selector); - w[60] = hc_byte_perm_S (w[54], w[55], selector); - w[59] = hc_byte_perm_S (w[53], w[54], selector); - w[58] = hc_byte_perm_S (w[52], w[53], selector); - w[57] = hc_byte_perm_S (w[51], w[52], selector); - w[56] = hc_byte_perm_S (w[50], w[51], selector); - w[55] = hc_byte_perm_S (w[49], w[50], selector); - w[54] = hc_byte_perm_S (w[48], w[49], selector); - w[53] = hc_byte_perm_S (w[47], w[48], selector); - w[52] = hc_byte_perm_S (w[46], w[47], selector); - w[51] = hc_byte_perm_S (w[45], w[46], selector); - w[50] = hc_byte_perm_S (w[44], w[45], selector); - w[49] = hc_byte_perm_S (w[43], w[44], selector); - w[48] = hc_byte_perm_S (w[42], w[43], selector); - w[47] = hc_byte_perm_S (w[41], w[42], selector); - w[46] = hc_byte_perm_S (w[40], w[41], selector); - w[45] = hc_byte_perm_S (w[39], w[40], selector); - w[44] = hc_byte_perm_S (w[38], w[39], selector); - w[43] = hc_byte_perm_S (w[37], w[38], selector); - w[42] = hc_byte_perm_S (w[36], w[37], selector); - w[41] = hc_byte_perm_S (w[35], w[36], selector); - w[40] = hc_byte_perm_S (w[34], w[35], selector); - w[39] = hc_byte_perm_S (w[33], w[34], selector); - w[38] = hc_byte_perm_S (w[32], w[33], selector); - w[37] = hc_byte_perm_S (w[31], w[32], selector); - w[36] = hc_byte_perm_S (w[30], w[31], selector); - w[35] = hc_byte_perm_S (w[29], w[30], selector); - w[34] = hc_byte_perm_S (w[28], w[29], selector); - w[33] = hc_byte_perm_S (w[27], w[28], selector); - w[32] = hc_byte_perm_S (w[26], w[27], selector); - w[31] = hc_byte_perm_S (w[25], w[26], selector); - w[30] = hc_byte_perm_S (w[24], w[25], selector); - w[29] = hc_byte_perm_S (w[23], w[24], selector); - w[28] = hc_byte_perm_S (w[22], w[23], selector); - w[27] = hc_byte_perm_S (w[21], w[22], selector); - w[26] = hc_byte_perm_S (w[20], w[21], selector); - w[25] = hc_byte_perm_S (w[19], w[20], selector); - w[24] = hc_byte_perm_S (w[18], w[19], selector); - w[23] = hc_byte_perm_S (w[17], w[18], selector); - w[22] = hc_byte_perm_S (w[16], w[17], selector); - w[21] = hc_byte_perm_S (w[15], w[16], selector); - w[20] = hc_byte_perm_S (w[14], w[15], selector); - w[19] = hc_byte_perm_S (w[13], w[14], selector); - w[18] = hc_byte_perm_S (w[12], w[13], selector); - w[17] = hc_byte_perm_S (w[11], w[12], selector); - w[16] = hc_byte_perm_S (w[10], w[11], selector); - w[15] = hc_byte_perm_S (w[ 9], w[10], selector); - w[14] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[13] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[12] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[11] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[10] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 9] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 8] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 7] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 6] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 5] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm_S (w[56], w[57], selector); - w[62] = hc_byte_perm_S (w[55], w[56], selector); - w[61] = hc_byte_perm_S (w[54], w[55], selector); - w[60] = hc_byte_perm_S (w[53], w[54], selector); - w[59] = hc_byte_perm_S (w[52], w[53], selector); - w[58] = hc_byte_perm_S (w[51], w[52], selector); - w[57] = hc_byte_perm_S (w[50], w[51], selector); - w[56] = hc_byte_perm_S (w[49], w[50], selector); - w[55] = hc_byte_perm_S (w[48], w[49], selector); - w[54] = hc_byte_perm_S (w[47], w[48], selector); - w[53] = hc_byte_perm_S (w[46], w[47], selector); - w[52] = hc_byte_perm_S (w[45], w[46], selector); - w[51] = hc_byte_perm_S (w[44], w[45], selector); - w[50] = hc_byte_perm_S (w[43], w[44], selector); - w[49] = hc_byte_perm_S (w[42], w[43], selector); - w[48] = hc_byte_perm_S (w[41], w[42], selector); - w[47] = hc_byte_perm_S (w[40], w[41], selector); - w[46] = hc_byte_perm_S (w[39], w[40], selector); - w[45] = hc_byte_perm_S (w[38], w[39], selector); - w[44] = hc_byte_perm_S (w[37], w[38], selector); - w[43] = hc_byte_perm_S (w[36], w[37], selector); - w[42] = hc_byte_perm_S (w[35], w[36], selector); - w[41] = hc_byte_perm_S (w[34], w[35], selector); - w[40] = hc_byte_perm_S (w[33], w[34], selector); - w[39] = hc_byte_perm_S (w[32], w[33], selector); - w[38] = hc_byte_perm_S (w[31], w[32], selector); - w[37] = hc_byte_perm_S (w[30], w[31], selector); - w[36] = hc_byte_perm_S (w[29], w[30], selector); - w[35] = hc_byte_perm_S (w[28], w[29], selector); - w[34] = hc_byte_perm_S (w[27], w[28], selector); - w[33] = hc_byte_perm_S (w[26], w[27], selector); - w[32] = hc_byte_perm_S (w[25], w[26], selector); - w[31] = hc_byte_perm_S (w[24], w[25], selector); - w[30] = hc_byte_perm_S (w[23], w[24], selector); - w[29] = hc_byte_perm_S (w[22], w[23], selector); - w[28] = hc_byte_perm_S (w[21], w[22], selector); - w[27] = hc_byte_perm_S (w[20], w[21], selector); - w[26] = hc_byte_perm_S (w[19], w[20], selector); - w[25] = hc_byte_perm_S (w[18], w[19], selector); - w[24] = hc_byte_perm_S (w[17], w[18], selector); - w[23] = hc_byte_perm_S (w[16], w[17], selector); - w[22] = hc_byte_perm_S (w[15], w[16], selector); - w[21] = hc_byte_perm_S (w[14], w[15], selector); - w[20] = hc_byte_perm_S (w[13], w[14], selector); - w[19] = hc_byte_perm_S (w[12], w[13], selector); - w[18] = hc_byte_perm_S (w[11], w[12], selector); - w[17] = hc_byte_perm_S (w[10], w[11], selector); - w[16] = hc_byte_perm_S (w[ 9], w[10], selector); - w[15] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[14] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[13] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[12] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[11] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[10] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 9] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 8] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 7] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 6] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm_S (w[55], w[56], selector); - w[62] = hc_byte_perm_S (w[54], w[55], selector); - w[61] = hc_byte_perm_S (w[53], w[54], selector); - w[60] = hc_byte_perm_S (w[52], w[53], selector); - w[59] = hc_byte_perm_S (w[51], w[52], selector); - w[58] = hc_byte_perm_S (w[50], w[51], selector); - w[57] = hc_byte_perm_S (w[49], w[50], selector); - w[56] = hc_byte_perm_S (w[48], w[49], selector); - w[55] = hc_byte_perm_S (w[47], w[48], selector); - w[54] = hc_byte_perm_S (w[46], w[47], selector); - w[53] = hc_byte_perm_S (w[45], w[46], selector); - w[52] = hc_byte_perm_S (w[44], w[45], selector); - w[51] = hc_byte_perm_S (w[43], w[44], selector); - w[50] = hc_byte_perm_S (w[42], w[43], selector); - w[49] = hc_byte_perm_S (w[41], w[42], selector); - w[48] = hc_byte_perm_S (w[40], w[41], selector); - w[47] = hc_byte_perm_S (w[39], w[40], selector); - w[46] = hc_byte_perm_S (w[38], w[39], selector); - w[45] = hc_byte_perm_S (w[37], w[38], selector); - w[44] = hc_byte_perm_S (w[36], w[37], selector); - w[43] = hc_byte_perm_S (w[35], w[36], selector); - w[42] = hc_byte_perm_S (w[34], w[35], selector); - w[41] = hc_byte_perm_S (w[33], w[34], selector); - w[40] = hc_byte_perm_S (w[32], w[33], selector); - w[39] = hc_byte_perm_S (w[31], w[32], selector); - w[38] = hc_byte_perm_S (w[30], w[31], selector); - w[37] = hc_byte_perm_S (w[29], w[30], selector); - w[36] = hc_byte_perm_S (w[28], w[29], selector); - w[35] = hc_byte_perm_S (w[27], w[28], selector); - w[34] = hc_byte_perm_S (w[26], w[27], selector); - w[33] = hc_byte_perm_S (w[25], w[26], selector); - w[32] = hc_byte_perm_S (w[24], w[25], selector); - w[31] = hc_byte_perm_S (w[23], w[24], selector); - w[30] = hc_byte_perm_S (w[22], w[23], selector); - w[29] = hc_byte_perm_S (w[21], w[22], selector); - w[28] = hc_byte_perm_S (w[20], w[21], selector); - w[27] = hc_byte_perm_S (w[19], w[20], selector); - w[26] = hc_byte_perm_S (w[18], w[19], selector); - w[25] = hc_byte_perm_S (w[17], w[18], selector); - w[24] = hc_byte_perm_S (w[16], w[17], selector); - w[23] = hc_byte_perm_S (w[15], w[16], selector); - w[22] = hc_byte_perm_S (w[14], w[15], selector); - w[21] = hc_byte_perm_S (w[13], w[14], selector); - w[20] = hc_byte_perm_S (w[12], w[13], selector); - w[19] = hc_byte_perm_S (w[11], w[12], selector); - w[18] = hc_byte_perm_S (w[10], w[11], selector); - w[17] = hc_byte_perm_S (w[ 9], w[10], selector); - w[16] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[15] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[14] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[13] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[12] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[11] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[10] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 9] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 8] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 7] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm_S (w[54], w[55], selector); - w[62] = hc_byte_perm_S (w[53], w[54], selector); - w[61] = hc_byte_perm_S (w[52], w[53], selector); - w[60] = hc_byte_perm_S (w[51], w[52], selector); - w[59] = hc_byte_perm_S (w[50], w[51], selector); - w[58] = hc_byte_perm_S (w[49], w[50], selector); - w[57] = hc_byte_perm_S (w[48], w[49], selector); - w[56] = hc_byte_perm_S (w[47], w[48], selector); - w[55] = hc_byte_perm_S (w[46], w[47], selector); - w[54] = hc_byte_perm_S (w[45], w[46], selector); - w[53] = hc_byte_perm_S (w[44], w[45], selector); - w[52] = hc_byte_perm_S (w[43], w[44], selector); - w[51] = hc_byte_perm_S (w[42], w[43], selector); - w[50] = hc_byte_perm_S (w[41], w[42], selector); - w[49] = hc_byte_perm_S (w[40], w[41], selector); - w[48] = hc_byte_perm_S (w[39], w[40], selector); - w[47] = hc_byte_perm_S (w[38], w[39], selector); - w[46] = hc_byte_perm_S (w[37], w[38], selector); - w[45] = hc_byte_perm_S (w[36], w[37], selector); - w[44] = hc_byte_perm_S (w[35], w[36], selector); - w[43] = hc_byte_perm_S (w[34], w[35], selector); - w[42] = hc_byte_perm_S (w[33], w[34], selector); - w[41] = hc_byte_perm_S (w[32], w[33], selector); - w[40] = hc_byte_perm_S (w[31], w[32], selector); - w[39] = hc_byte_perm_S (w[30], w[31], selector); - w[38] = hc_byte_perm_S (w[29], w[30], selector); - w[37] = hc_byte_perm_S (w[28], w[29], selector); - w[36] = hc_byte_perm_S (w[27], w[28], selector); - w[35] = hc_byte_perm_S (w[26], w[27], selector); - w[34] = hc_byte_perm_S (w[25], w[26], selector); - w[33] = hc_byte_perm_S (w[24], w[25], selector); - w[32] = hc_byte_perm_S (w[23], w[24], selector); - w[31] = hc_byte_perm_S (w[22], w[23], selector); - w[30] = hc_byte_perm_S (w[21], w[22], selector); - w[29] = hc_byte_perm_S (w[20], w[21], selector); - w[28] = hc_byte_perm_S (w[19], w[20], selector); - w[27] = hc_byte_perm_S (w[18], w[19], selector); - w[26] = hc_byte_perm_S (w[17], w[18], selector); - w[25] = hc_byte_perm_S (w[16], w[17], selector); - w[24] = hc_byte_perm_S (w[15], w[16], selector); - w[23] = hc_byte_perm_S (w[14], w[15], selector); - w[22] = hc_byte_perm_S (w[13], w[14], selector); - w[21] = hc_byte_perm_S (w[12], w[13], selector); - w[20] = hc_byte_perm_S (w[11], w[12], selector); - w[19] = hc_byte_perm_S (w[10], w[11], selector); - w[18] = hc_byte_perm_S (w[ 9], w[10], selector); - w[17] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[16] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[15] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[14] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[13] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[12] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[11] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[10] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 9] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 8] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm_S (w[53], w[54], selector); - w[62] = hc_byte_perm_S (w[52], w[53], selector); - w[61] = hc_byte_perm_S (w[51], w[52], selector); - w[60] = hc_byte_perm_S (w[50], w[51], selector); - w[59] = hc_byte_perm_S (w[49], w[50], selector); - w[58] = hc_byte_perm_S (w[48], w[49], selector); - w[57] = hc_byte_perm_S (w[47], w[48], selector); - w[56] = hc_byte_perm_S (w[46], w[47], selector); - w[55] = hc_byte_perm_S (w[45], w[46], selector); - w[54] = hc_byte_perm_S (w[44], w[45], selector); - w[53] = hc_byte_perm_S (w[43], w[44], selector); - w[52] = hc_byte_perm_S (w[42], w[43], selector); - w[51] = hc_byte_perm_S (w[41], w[42], selector); - w[50] = hc_byte_perm_S (w[40], w[41], selector); - w[49] = hc_byte_perm_S (w[39], w[40], selector); - w[48] = hc_byte_perm_S (w[38], w[39], selector); - w[47] = hc_byte_perm_S (w[37], w[38], selector); - w[46] = hc_byte_perm_S (w[36], w[37], selector); - w[45] = hc_byte_perm_S (w[35], w[36], selector); - w[44] = hc_byte_perm_S (w[34], w[35], selector); - w[43] = hc_byte_perm_S (w[33], w[34], selector); - w[42] = hc_byte_perm_S (w[32], w[33], selector); - w[41] = hc_byte_perm_S (w[31], w[32], selector); - w[40] = hc_byte_perm_S (w[30], w[31], selector); - w[39] = hc_byte_perm_S (w[29], w[30], selector); - w[38] = hc_byte_perm_S (w[28], w[29], selector); - w[37] = hc_byte_perm_S (w[27], w[28], selector); - w[36] = hc_byte_perm_S (w[26], w[27], selector); - w[35] = hc_byte_perm_S (w[25], w[26], selector); - w[34] = hc_byte_perm_S (w[24], w[25], selector); - w[33] = hc_byte_perm_S (w[23], w[24], selector); - w[32] = hc_byte_perm_S (w[22], w[23], selector); - w[31] = hc_byte_perm_S (w[21], w[22], selector); - w[30] = hc_byte_perm_S (w[20], w[21], selector); - w[29] = hc_byte_perm_S (w[19], w[20], selector); - w[28] = hc_byte_perm_S (w[18], w[19], selector); - w[27] = hc_byte_perm_S (w[17], w[18], selector); - w[26] = hc_byte_perm_S (w[16], w[17], selector); - w[25] = hc_byte_perm_S (w[15], w[16], selector); - w[24] = hc_byte_perm_S (w[14], w[15], selector); - w[23] = hc_byte_perm_S (w[13], w[14], selector); - w[22] = hc_byte_perm_S (w[12], w[13], selector); - w[21] = hc_byte_perm_S (w[11], w[12], selector); - w[20] = hc_byte_perm_S (w[10], w[11], selector); - w[19] = hc_byte_perm_S (w[ 9], w[10], selector); - w[18] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[17] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[16] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[15] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[14] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[13] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[12] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[11] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[10] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 9] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm_S (w[52], w[53], selector); - w[62] = hc_byte_perm_S (w[51], w[52], selector); - w[61] = hc_byte_perm_S (w[50], w[51], selector); - w[60] = hc_byte_perm_S (w[49], w[50], selector); - w[59] = hc_byte_perm_S (w[48], w[49], selector); - w[58] = hc_byte_perm_S (w[47], w[48], selector); - w[57] = hc_byte_perm_S (w[46], w[47], selector); - w[56] = hc_byte_perm_S (w[45], w[46], selector); - w[55] = hc_byte_perm_S (w[44], w[45], selector); - w[54] = hc_byte_perm_S (w[43], w[44], selector); - w[53] = hc_byte_perm_S (w[42], w[43], selector); - w[52] = hc_byte_perm_S (w[41], w[42], selector); - w[51] = hc_byte_perm_S (w[40], w[41], selector); - w[50] = hc_byte_perm_S (w[39], w[40], selector); - w[49] = hc_byte_perm_S (w[38], w[39], selector); - w[48] = hc_byte_perm_S (w[37], w[38], selector); - w[47] = hc_byte_perm_S (w[36], w[37], selector); - w[46] = hc_byte_perm_S (w[35], w[36], selector); - w[45] = hc_byte_perm_S (w[34], w[35], selector); - w[44] = hc_byte_perm_S (w[33], w[34], selector); - w[43] = hc_byte_perm_S (w[32], w[33], selector); - w[42] = hc_byte_perm_S (w[31], w[32], selector); - w[41] = hc_byte_perm_S (w[30], w[31], selector); - w[40] = hc_byte_perm_S (w[29], w[30], selector); - w[39] = hc_byte_perm_S (w[28], w[29], selector); - w[38] = hc_byte_perm_S (w[27], w[28], selector); - w[37] = hc_byte_perm_S (w[26], w[27], selector); - w[36] = hc_byte_perm_S (w[25], w[26], selector); - w[35] = hc_byte_perm_S (w[24], w[25], selector); - w[34] = hc_byte_perm_S (w[23], w[24], selector); - w[33] = hc_byte_perm_S (w[22], w[23], selector); - w[32] = hc_byte_perm_S (w[21], w[22], selector); - w[31] = hc_byte_perm_S (w[20], w[21], selector); - w[30] = hc_byte_perm_S (w[19], w[20], selector); - w[29] = hc_byte_perm_S (w[18], w[19], selector); - w[28] = hc_byte_perm_S (w[17], w[18], selector); - w[27] = hc_byte_perm_S (w[16], w[17], selector); - w[26] = hc_byte_perm_S (w[15], w[16], selector); - w[25] = hc_byte_perm_S (w[14], w[15], selector); - w[24] = hc_byte_perm_S (w[13], w[14], selector); - w[23] = hc_byte_perm_S (w[12], w[13], selector); - w[22] = hc_byte_perm_S (w[11], w[12], selector); - w[21] = hc_byte_perm_S (w[10], w[11], selector); - w[20] = hc_byte_perm_S (w[ 9], w[10], selector); - w[19] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[18] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[17] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[16] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[15] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[14] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[13] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[12] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[11] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[10] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm_S (w[51], w[52], selector); - w[62] = hc_byte_perm_S (w[50], w[51], selector); - w[61] = hc_byte_perm_S (w[49], w[50], selector); - w[60] = hc_byte_perm_S (w[48], w[49], selector); - w[59] = hc_byte_perm_S (w[47], w[48], selector); - w[58] = hc_byte_perm_S (w[46], w[47], selector); - w[57] = hc_byte_perm_S (w[45], w[46], selector); - w[56] = hc_byte_perm_S (w[44], w[45], selector); - w[55] = hc_byte_perm_S (w[43], w[44], selector); - w[54] = hc_byte_perm_S (w[42], w[43], selector); - w[53] = hc_byte_perm_S (w[41], w[42], selector); - w[52] = hc_byte_perm_S (w[40], w[41], selector); - w[51] = hc_byte_perm_S (w[39], w[40], selector); - w[50] = hc_byte_perm_S (w[38], w[39], selector); - w[49] = hc_byte_perm_S (w[37], w[38], selector); - w[48] = hc_byte_perm_S (w[36], w[37], selector); - w[47] = hc_byte_perm_S (w[35], w[36], selector); - w[46] = hc_byte_perm_S (w[34], w[35], selector); - w[45] = hc_byte_perm_S (w[33], w[34], selector); - w[44] = hc_byte_perm_S (w[32], w[33], selector); - w[43] = hc_byte_perm_S (w[31], w[32], selector); - w[42] = hc_byte_perm_S (w[30], w[31], selector); - w[41] = hc_byte_perm_S (w[29], w[30], selector); - w[40] = hc_byte_perm_S (w[28], w[29], selector); - w[39] = hc_byte_perm_S (w[27], w[28], selector); - w[38] = hc_byte_perm_S (w[26], w[27], selector); - w[37] = hc_byte_perm_S (w[25], w[26], selector); - w[36] = hc_byte_perm_S (w[24], w[25], selector); - w[35] = hc_byte_perm_S (w[23], w[24], selector); - w[34] = hc_byte_perm_S (w[22], w[23], selector); - w[33] = hc_byte_perm_S (w[21], w[22], selector); - w[32] = hc_byte_perm_S (w[20], w[21], selector); - w[31] = hc_byte_perm_S (w[19], w[20], selector); - w[30] = hc_byte_perm_S (w[18], w[19], selector); - w[29] = hc_byte_perm_S (w[17], w[18], selector); - w[28] = hc_byte_perm_S (w[16], w[17], selector); - w[27] = hc_byte_perm_S (w[15], w[16], selector); - w[26] = hc_byte_perm_S (w[14], w[15], selector); - w[25] = hc_byte_perm_S (w[13], w[14], selector); - w[24] = hc_byte_perm_S (w[12], w[13], selector); - w[23] = hc_byte_perm_S (w[11], w[12], selector); - w[22] = hc_byte_perm_S (w[10], w[11], selector); - w[21] = hc_byte_perm_S (w[ 9], w[10], selector); - w[20] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[19] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[18] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[17] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[16] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[15] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[14] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[13] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[12] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[11] = hc_byte_perm_S ( 0, w[ 0], selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm_S (w[50], w[51], selector); - w[62] = hc_byte_perm_S (w[49], w[50], selector); - w[61] = hc_byte_perm_S (w[48], w[49], selector); - w[60] = hc_byte_perm_S (w[47], w[48], selector); - w[59] = hc_byte_perm_S (w[46], w[47], selector); - w[58] = hc_byte_perm_S (w[45], w[46], selector); - w[57] = hc_byte_perm_S (w[44], w[45], selector); - w[56] = hc_byte_perm_S (w[43], w[44], selector); - w[55] = hc_byte_perm_S (w[42], w[43], selector); - w[54] = hc_byte_perm_S (w[41], w[42], selector); - w[53] = hc_byte_perm_S (w[40], w[41], selector); - w[52] = hc_byte_perm_S (w[39], w[40], selector); - w[51] = hc_byte_perm_S (w[38], w[39], selector); - w[50] = hc_byte_perm_S (w[37], w[38], selector); - w[49] = hc_byte_perm_S (w[36], w[37], selector); - w[48] = hc_byte_perm_S (w[35], w[36], selector); - w[47] = hc_byte_perm_S (w[34], w[35], selector); - w[46] = hc_byte_perm_S (w[33], w[34], selector); - w[45] = hc_byte_perm_S (w[32], w[33], selector); - w[44] = hc_byte_perm_S (w[31], w[32], selector); - w[43] = hc_byte_perm_S (w[30], w[31], selector); - w[42] = hc_byte_perm_S (w[29], w[30], selector); - w[41] = hc_byte_perm_S (w[28], w[29], selector); - w[40] = hc_byte_perm_S (w[27], w[28], selector); - w[39] = hc_byte_perm_S (w[26], w[27], selector); - w[38] = hc_byte_perm_S (w[25], w[26], selector); - w[37] = hc_byte_perm_S (w[24], w[25], selector); - w[36] = hc_byte_perm_S (w[23], w[24], selector); - w[35] = hc_byte_perm_S (w[22], w[23], selector); - w[34] = hc_byte_perm_S (w[21], w[22], selector); - w[33] = hc_byte_perm_S (w[20], w[21], selector); - w[32] = hc_byte_perm_S (w[19], w[20], selector); - w[31] = hc_byte_perm_S (w[18], w[19], selector); - w[30] = hc_byte_perm_S (w[17], w[18], selector); - w[29] = hc_byte_perm_S (w[16], w[17], selector); - w[28] = hc_byte_perm_S (w[15], w[16], selector); - w[27] = hc_byte_perm_S (w[14], w[15], selector); - w[26] = hc_byte_perm_S (w[13], w[14], selector); - w[25] = hc_byte_perm_S (w[12], w[13], selector); - w[24] = hc_byte_perm_S (w[11], w[12], selector); - w[23] = hc_byte_perm_S (w[10], w[11], selector); - w[22] = hc_byte_perm_S (w[ 9], w[10], selector); - w[21] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[20] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[19] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[18] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[17] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[16] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[15] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[14] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[13] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[12] = hc_byte_perm_S ( 0, w[ 0], selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm_S (w[49], w[50], selector); - w[62] = hc_byte_perm_S (w[48], w[49], selector); - w[61] = hc_byte_perm_S (w[47], w[48], selector); - w[60] = hc_byte_perm_S (w[46], w[47], selector); - w[59] = hc_byte_perm_S (w[45], w[46], selector); - w[58] = hc_byte_perm_S (w[44], w[45], selector); - w[57] = hc_byte_perm_S (w[43], w[44], selector); - w[56] = hc_byte_perm_S (w[42], w[43], selector); - w[55] = hc_byte_perm_S (w[41], w[42], selector); - w[54] = hc_byte_perm_S (w[40], w[41], selector); - w[53] = hc_byte_perm_S (w[39], w[40], selector); - w[52] = hc_byte_perm_S (w[38], w[39], selector); - w[51] = hc_byte_perm_S (w[37], w[38], selector); - w[50] = hc_byte_perm_S (w[36], w[37], selector); - w[49] = hc_byte_perm_S (w[35], w[36], selector); - w[48] = hc_byte_perm_S (w[34], w[35], selector); - w[47] = hc_byte_perm_S (w[33], w[34], selector); - w[46] = hc_byte_perm_S (w[32], w[33], selector); - w[45] = hc_byte_perm_S (w[31], w[32], selector); - w[44] = hc_byte_perm_S (w[30], w[31], selector); - w[43] = hc_byte_perm_S (w[29], w[30], selector); - w[42] = hc_byte_perm_S (w[28], w[29], selector); - w[41] = hc_byte_perm_S (w[27], w[28], selector); - w[40] = hc_byte_perm_S (w[26], w[27], selector); - w[39] = hc_byte_perm_S (w[25], w[26], selector); - w[38] = hc_byte_perm_S (w[24], w[25], selector); - w[37] = hc_byte_perm_S (w[23], w[24], selector); - w[36] = hc_byte_perm_S (w[22], w[23], selector); - w[35] = hc_byte_perm_S (w[21], w[22], selector); - w[34] = hc_byte_perm_S (w[20], w[21], selector); - w[33] = hc_byte_perm_S (w[19], w[20], selector); - w[32] = hc_byte_perm_S (w[18], w[19], selector); - w[31] = hc_byte_perm_S (w[17], w[18], selector); - w[30] = hc_byte_perm_S (w[16], w[17], selector); - w[29] = hc_byte_perm_S (w[15], w[16], selector); - w[28] = hc_byte_perm_S (w[14], w[15], selector); - w[27] = hc_byte_perm_S (w[13], w[14], selector); - w[26] = hc_byte_perm_S (w[12], w[13], selector); - w[25] = hc_byte_perm_S (w[11], w[12], selector); - w[24] = hc_byte_perm_S (w[10], w[11], selector); - w[23] = hc_byte_perm_S (w[ 9], w[10], selector); - w[22] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[21] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[20] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[19] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[18] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[17] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[16] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[15] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[14] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[13] = hc_byte_perm_S ( 0, w[ 0], selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm_S (w[48], w[49], selector); - w[62] = hc_byte_perm_S (w[47], w[48], selector); - w[61] = hc_byte_perm_S (w[46], w[47], selector); - w[60] = hc_byte_perm_S (w[45], w[46], selector); - w[59] = hc_byte_perm_S (w[44], w[45], selector); - w[58] = hc_byte_perm_S (w[43], w[44], selector); - w[57] = hc_byte_perm_S (w[42], w[43], selector); - w[56] = hc_byte_perm_S (w[41], w[42], selector); - w[55] = hc_byte_perm_S (w[40], w[41], selector); - w[54] = hc_byte_perm_S (w[39], w[40], selector); - w[53] = hc_byte_perm_S (w[38], w[39], selector); - w[52] = hc_byte_perm_S (w[37], w[38], selector); - w[51] = hc_byte_perm_S (w[36], w[37], selector); - w[50] = hc_byte_perm_S (w[35], w[36], selector); - w[49] = hc_byte_perm_S (w[34], w[35], selector); - w[48] = hc_byte_perm_S (w[33], w[34], selector); - w[47] = hc_byte_perm_S (w[32], w[33], selector); - w[46] = hc_byte_perm_S (w[31], w[32], selector); - w[45] = hc_byte_perm_S (w[30], w[31], selector); - w[44] = hc_byte_perm_S (w[29], w[30], selector); - w[43] = hc_byte_perm_S (w[28], w[29], selector); - w[42] = hc_byte_perm_S (w[27], w[28], selector); - w[41] = hc_byte_perm_S (w[26], w[27], selector); - w[40] = hc_byte_perm_S (w[25], w[26], selector); - w[39] = hc_byte_perm_S (w[24], w[25], selector); - w[38] = hc_byte_perm_S (w[23], w[24], selector); - w[37] = hc_byte_perm_S (w[22], w[23], selector); - w[36] = hc_byte_perm_S (w[21], w[22], selector); - w[35] = hc_byte_perm_S (w[20], w[21], selector); - w[34] = hc_byte_perm_S (w[19], w[20], selector); - w[33] = hc_byte_perm_S (w[18], w[19], selector); - w[32] = hc_byte_perm_S (w[17], w[18], selector); - w[31] = hc_byte_perm_S (w[16], w[17], selector); - w[30] = hc_byte_perm_S (w[15], w[16], selector); - w[29] = hc_byte_perm_S (w[14], w[15], selector); - w[28] = hc_byte_perm_S (w[13], w[14], selector); - w[27] = hc_byte_perm_S (w[12], w[13], selector); - w[26] = hc_byte_perm_S (w[11], w[12], selector); - w[25] = hc_byte_perm_S (w[10], w[11], selector); - w[24] = hc_byte_perm_S (w[ 9], w[10], selector); - w[23] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[22] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[21] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[20] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[19] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[18] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[17] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[16] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[15] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[14] = hc_byte_perm_S ( 0, w[ 0], selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm_S (w[47], w[48], selector); - w[62] = hc_byte_perm_S (w[46], w[47], selector); - w[61] = hc_byte_perm_S (w[45], w[46], selector); - w[60] = hc_byte_perm_S (w[44], w[45], selector); - w[59] = hc_byte_perm_S (w[43], w[44], selector); - w[58] = hc_byte_perm_S (w[42], w[43], selector); - w[57] = hc_byte_perm_S (w[41], w[42], selector); - w[56] = hc_byte_perm_S (w[40], w[41], selector); - w[55] = hc_byte_perm_S (w[39], w[40], selector); - w[54] = hc_byte_perm_S (w[38], w[39], selector); - w[53] = hc_byte_perm_S (w[37], w[38], selector); - w[52] = hc_byte_perm_S (w[36], w[37], selector); - w[51] = hc_byte_perm_S (w[35], w[36], selector); - w[50] = hc_byte_perm_S (w[34], w[35], selector); - w[49] = hc_byte_perm_S (w[33], w[34], selector); - w[48] = hc_byte_perm_S (w[32], w[33], selector); - w[47] = hc_byte_perm_S (w[31], w[32], selector); - w[46] = hc_byte_perm_S (w[30], w[31], selector); - w[45] = hc_byte_perm_S (w[29], w[30], selector); - w[44] = hc_byte_perm_S (w[28], w[29], selector); - w[43] = hc_byte_perm_S (w[27], w[28], selector); - w[42] = hc_byte_perm_S (w[26], w[27], selector); - w[41] = hc_byte_perm_S (w[25], w[26], selector); - w[40] = hc_byte_perm_S (w[24], w[25], selector); - w[39] = hc_byte_perm_S (w[23], w[24], selector); - w[38] = hc_byte_perm_S (w[22], w[23], selector); - w[37] = hc_byte_perm_S (w[21], w[22], selector); - w[36] = hc_byte_perm_S (w[20], w[21], selector); - w[35] = hc_byte_perm_S (w[19], w[20], selector); - w[34] = hc_byte_perm_S (w[18], w[19], selector); - w[33] = hc_byte_perm_S (w[17], w[18], selector); - w[32] = hc_byte_perm_S (w[16], w[17], selector); - w[31] = hc_byte_perm_S (w[15], w[16], selector); - w[30] = hc_byte_perm_S (w[14], w[15], selector); - w[29] = hc_byte_perm_S (w[13], w[14], selector); - w[28] = hc_byte_perm_S (w[12], w[13], selector); - w[27] = hc_byte_perm_S (w[11], w[12], selector); - w[26] = hc_byte_perm_S (w[10], w[11], selector); - w[25] = hc_byte_perm_S (w[ 9], w[10], selector); - w[24] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[23] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[22] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[21] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[20] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[19] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[18] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[17] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[16] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[15] = hc_byte_perm_S ( 0, w[ 0], selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm_S (w[46], w[47], selector); - w[62] = hc_byte_perm_S (w[45], w[46], selector); - w[61] = hc_byte_perm_S (w[44], w[45], selector); - w[60] = hc_byte_perm_S (w[43], w[44], selector); - w[59] = hc_byte_perm_S (w[42], w[43], selector); - w[58] = hc_byte_perm_S (w[41], w[42], selector); - w[57] = hc_byte_perm_S (w[40], w[41], selector); - w[56] = hc_byte_perm_S (w[39], w[40], selector); - w[55] = hc_byte_perm_S (w[38], w[39], selector); - w[54] = hc_byte_perm_S (w[37], w[38], selector); - w[53] = hc_byte_perm_S (w[36], w[37], selector); - w[52] = hc_byte_perm_S (w[35], w[36], selector); - w[51] = hc_byte_perm_S (w[34], w[35], selector); - w[50] = hc_byte_perm_S (w[33], w[34], selector); - w[49] = hc_byte_perm_S (w[32], w[33], selector); - w[48] = hc_byte_perm_S (w[31], w[32], selector); - w[47] = hc_byte_perm_S (w[30], w[31], selector); - w[46] = hc_byte_perm_S (w[29], w[30], selector); - w[45] = hc_byte_perm_S (w[28], w[29], selector); - w[44] = hc_byte_perm_S (w[27], w[28], selector); - w[43] = hc_byte_perm_S (w[26], w[27], selector); - w[42] = hc_byte_perm_S (w[25], w[26], selector); - w[41] = hc_byte_perm_S (w[24], w[25], selector); - w[40] = hc_byte_perm_S (w[23], w[24], selector); - w[39] = hc_byte_perm_S (w[22], w[23], selector); - w[38] = hc_byte_perm_S (w[21], w[22], selector); - w[37] = hc_byte_perm_S (w[20], w[21], selector); - w[36] = hc_byte_perm_S (w[19], w[20], selector); - w[35] = hc_byte_perm_S (w[18], w[19], selector); - w[34] = hc_byte_perm_S (w[17], w[18], selector); - w[33] = hc_byte_perm_S (w[16], w[17], selector); - w[32] = hc_byte_perm_S (w[15], w[16], selector); - w[31] = hc_byte_perm_S (w[14], w[15], selector); - w[30] = hc_byte_perm_S (w[13], w[14], selector); - w[29] = hc_byte_perm_S (w[12], w[13], selector); - w[28] = hc_byte_perm_S (w[11], w[12], selector); - w[27] = hc_byte_perm_S (w[10], w[11], selector); - w[26] = hc_byte_perm_S (w[ 9], w[10], selector); - w[25] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[24] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[23] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[22] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[21] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[20] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[19] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[18] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[17] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[16] = hc_byte_perm_S ( 0, w[ 0], selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm_S (w[45], w[46], selector); - w[62] = hc_byte_perm_S (w[44], w[45], selector); - w[61] = hc_byte_perm_S (w[43], w[44], selector); - w[60] = hc_byte_perm_S (w[42], w[43], selector); - w[59] = hc_byte_perm_S (w[41], w[42], selector); - w[58] = hc_byte_perm_S (w[40], w[41], selector); - w[57] = hc_byte_perm_S (w[39], w[40], selector); - w[56] = hc_byte_perm_S (w[38], w[39], selector); - w[55] = hc_byte_perm_S (w[37], w[38], selector); - w[54] = hc_byte_perm_S (w[36], w[37], selector); - w[53] = hc_byte_perm_S (w[35], w[36], selector); - w[52] = hc_byte_perm_S (w[34], w[35], selector); - w[51] = hc_byte_perm_S (w[33], w[34], selector); - w[50] = hc_byte_perm_S (w[32], w[33], selector); - w[49] = hc_byte_perm_S (w[31], w[32], selector); - w[48] = hc_byte_perm_S (w[30], w[31], selector); - w[47] = hc_byte_perm_S (w[29], w[30], selector); - w[46] = hc_byte_perm_S (w[28], w[29], selector); - w[45] = hc_byte_perm_S (w[27], w[28], selector); - w[44] = hc_byte_perm_S (w[26], w[27], selector); - w[43] = hc_byte_perm_S (w[25], w[26], selector); - w[42] = hc_byte_perm_S (w[24], w[25], selector); - w[41] = hc_byte_perm_S (w[23], w[24], selector); - w[40] = hc_byte_perm_S (w[22], w[23], selector); - w[39] = hc_byte_perm_S (w[21], w[22], selector); - w[38] = hc_byte_perm_S (w[20], w[21], selector); - w[37] = hc_byte_perm_S (w[19], w[20], selector); - w[36] = hc_byte_perm_S (w[18], w[19], selector); - w[35] = hc_byte_perm_S (w[17], w[18], selector); - w[34] = hc_byte_perm_S (w[16], w[17], selector); - w[33] = hc_byte_perm_S (w[15], w[16], selector); - w[32] = hc_byte_perm_S (w[14], w[15], selector); - w[31] = hc_byte_perm_S (w[13], w[14], selector); - w[30] = hc_byte_perm_S (w[12], w[13], selector); - w[29] = hc_byte_perm_S (w[11], w[12], selector); - w[28] = hc_byte_perm_S (w[10], w[11], selector); - w[27] = hc_byte_perm_S (w[ 9], w[10], selector); - w[26] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[25] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[24] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[23] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[22] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[21] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[20] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[19] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[18] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[17] = hc_byte_perm_S ( 0, w[ 0], selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm_S (w[44], w[45], selector); - w[62] = hc_byte_perm_S (w[43], w[44], selector); - w[61] = hc_byte_perm_S (w[42], w[43], selector); - w[60] = hc_byte_perm_S (w[41], w[42], selector); - w[59] = hc_byte_perm_S (w[40], w[41], selector); - w[58] = hc_byte_perm_S (w[39], w[40], selector); - w[57] = hc_byte_perm_S (w[38], w[39], selector); - w[56] = hc_byte_perm_S (w[37], w[38], selector); - w[55] = hc_byte_perm_S (w[36], w[37], selector); - w[54] = hc_byte_perm_S (w[35], w[36], selector); - w[53] = hc_byte_perm_S (w[34], w[35], selector); - w[52] = hc_byte_perm_S (w[33], w[34], selector); - w[51] = hc_byte_perm_S (w[32], w[33], selector); - w[50] = hc_byte_perm_S (w[31], w[32], selector); - w[49] = hc_byte_perm_S (w[30], w[31], selector); - w[48] = hc_byte_perm_S (w[29], w[30], selector); - w[47] = hc_byte_perm_S (w[28], w[29], selector); - w[46] = hc_byte_perm_S (w[27], w[28], selector); - w[45] = hc_byte_perm_S (w[26], w[27], selector); - w[44] = hc_byte_perm_S (w[25], w[26], selector); - w[43] = hc_byte_perm_S (w[24], w[25], selector); - w[42] = hc_byte_perm_S (w[23], w[24], selector); - w[41] = hc_byte_perm_S (w[22], w[23], selector); - w[40] = hc_byte_perm_S (w[21], w[22], selector); - w[39] = hc_byte_perm_S (w[20], w[21], selector); - w[38] = hc_byte_perm_S (w[19], w[20], selector); - w[37] = hc_byte_perm_S (w[18], w[19], selector); - w[36] = hc_byte_perm_S (w[17], w[18], selector); - w[35] = hc_byte_perm_S (w[16], w[17], selector); - w[34] = hc_byte_perm_S (w[15], w[16], selector); - w[33] = hc_byte_perm_S (w[14], w[15], selector); - w[32] = hc_byte_perm_S (w[13], w[14], selector); - w[31] = hc_byte_perm_S (w[12], w[13], selector); - w[30] = hc_byte_perm_S (w[11], w[12], selector); - w[29] = hc_byte_perm_S (w[10], w[11], selector); - w[28] = hc_byte_perm_S (w[ 9], w[10], selector); - w[27] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[26] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[25] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[24] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[23] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[22] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[21] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[20] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[19] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[18] = hc_byte_perm_S ( 0, w[ 0], selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm_S (w[43], w[44], selector); - w[62] = hc_byte_perm_S (w[42], w[43], selector); - w[61] = hc_byte_perm_S (w[41], w[42], selector); - w[60] = hc_byte_perm_S (w[40], w[41], selector); - w[59] = hc_byte_perm_S (w[39], w[40], selector); - w[58] = hc_byte_perm_S (w[38], w[39], selector); - w[57] = hc_byte_perm_S (w[37], w[38], selector); - w[56] = hc_byte_perm_S (w[36], w[37], selector); - w[55] = hc_byte_perm_S (w[35], w[36], selector); - w[54] = hc_byte_perm_S (w[34], w[35], selector); - w[53] = hc_byte_perm_S (w[33], w[34], selector); - w[52] = hc_byte_perm_S (w[32], w[33], selector); - w[51] = hc_byte_perm_S (w[31], w[32], selector); - w[50] = hc_byte_perm_S (w[30], w[31], selector); - w[49] = hc_byte_perm_S (w[29], w[30], selector); - w[48] = hc_byte_perm_S (w[28], w[29], selector); - w[47] = hc_byte_perm_S (w[27], w[28], selector); - w[46] = hc_byte_perm_S (w[26], w[27], selector); - w[45] = hc_byte_perm_S (w[25], w[26], selector); - w[44] = hc_byte_perm_S (w[24], w[25], selector); - w[43] = hc_byte_perm_S (w[23], w[24], selector); - w[42] = hc_byte_perm_S (w[22], w[23], selector); - w[41] = hc_byte_perm_S (w[21], w[22], selector); - w[40] = hc_byte_perm_S (w[20], w[21], selector); - w[39] = hc_byte_perm_S (w[19], w[20], selector); - w[38] = hc_byte_perm_S (w[18], w[19], selector); - w[37] = hc_byte_perm_S (w[17], w[18], selector); - w[36] = hc_byte_perm_S (w[16], w[17], selector); - w[35] = hc_byte_perm_S (w[15], w[16], selector); - w[34] = hc_byte_perm_S (w[14], w[15], selector); - w[33] = hc_byte_perm_S (w[13], w[14], selector); - w[32] = hc_byte_perm_S (w[12], w[13], selector); - w[31] = hc_byte_perm_S (w[11], w[12], selector); - w[30] = hc_byte_perm_S (w[10], w[11], selector); - w[29] = hc_byte_perm_S (w[ 9], w[10], selector); - w[28] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[27] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[26] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[25] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[24] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[23] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[22] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[21] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[20] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[19] = hc_byte_perm_S ( 0, w[ 0], selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm_S (w[42], w[43], selector); - w[62] = hc_byte_perm_S (w[41], w[42], selector); - w[61] = hc_byte_perm_S (w[40], w[41], selector); - w[60] = hc_byte_perm_S (w[39], w[40], selector); - w[59] = hc_byte_perm_S (w[38], w[39], selector); - w[58] = hc_byte_perm_S (w[37], w[38], selector); - w[57] = hc_byte_perm_S (w[36], w[37], selector); - w[56] = hc_byte_perm_S (w[35], w[36], selector); - w[55] = hc_byte_perm_S (w[34], w[35], selector); - w[54] = hc_byte_perm_S (w[33], w[34], selector); - w[53] = hc_byte_perm_S (w[32], w[33], selector); - w[52] = hc_byte_perm_S (w[31], w[32], selector); - w[51] = hc_byte_perm_S (w[30], w[31], selector); - w[50] = hc_byte_perm_S (w[29], w[30], selector); - w[49] = hc_byte_perm_S (w[28], w[29], selector); - w[48] = hc_byte_perm_S (w[27], w[28], selector); - w[47] = hc_byte_perm_S (w[26], w[27], selector); - w[46] = hc_byte_perm_S (w[25], w[26], selector); - w[45] = hc_byte_perm_S (w[24], w[25], selector); - w[44] = hc_byte_perm_S (w[23], w[24], selector); - w[43] = hc_byte_perm_S (w[22], w[23], selector); - w[42] = hc_byte_perm_S (w[21], w[22], selector); - w[41] = hc_byte_perm_S (w[20], w[21], selector); - w[40] = hc_byte_perm_S (w[19], w[20], selector); - w[39] = hc_byte_perm_S (w[18], w[19], selector); - w[38] = hc_byte_perm_S (w[17], w[18], selector); - w[37] = hc_byte_perm_S (w[16], w[17], selector); - w[36] = hc_byte_perm_S (w[15], w[16], selector); - w[35] = hc_byte_perm_S (w[14], w[15], selector); - w[34] = hc_byte_perm_S (w[13], w[14], selector); - w[33] = hc_byte_perm_S (w[12], w[13], selector); - w[32] = hc_byte_perm_S (w[11], w[12], selector); - w[31] = hc_byte_perm_S (w[10], w[11], selector); - w[30] = hc_byte_perm_S (w[ 9], w[10], selector); - w[29] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[28] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[27] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[26] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[25] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[24] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[23] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[22] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[21] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[20] = hc_byte_perm_S ( 0, w[ 0], selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm_S (w[41], w[42], selector); - w[62] = hc_byte_perm_S (w[40], w[41], selector); - w[61] = hc_byte_perm_S (w[39], w[40], selector); - w[60] = hc_byte_perm_S (w[38], w[39], selector); - w[59] = hc_byte_perm_S (w[37], w[38], selector); - w[58] = hc_byte_perm_S (w[36], w[37], selector); - w[57] = hc_byte_perm_S (w[35], w[36], selector); - w[56] = hc_byte_perm_S (w[34], w[35], selector); - w[55] = hc_byte_perm_S (w[33], w[34], selector); - w[54] = hc_byte_perm_S (w[32], w[33], selector); - w[53] = hc_byte_perm_S (w[31], w[32], selector); - w[52] = hc_byte_perm_S (w[30], w[31], selector); - w[51] = hc_byte_perm_S (w[29], w[30], selector); - w[50] = hc_byte_perm_S (w[28], w[29], selector); - w[49] = hc_byte_perm_S (w[27], w[28], selector); - w[48] = hc_byte_perm_S (w[26], w[27], selector); - w[47] = hc_byte_perm_S (w[25], w[26], selector); - w[46] = hc_byte_perm_S (w[24], w[25], selector); - w[45] = hc_byte_perm_S (w[23], w[24], selector); - w[44] = hc_byte_perm_S (w[22], w[23], selector); - w[43] = hc_byte_perm_S (w[21], w[22], selector); - w[42] = hc_byte_perm_S (w[20], w[21], selector); - w[41] = hc_byte_perm_S (w[19], w[20], selector); - w[40] = hc_byte_perm_S (w[18], w[19], selector); - w[39] = hc_byte_perm_S (w[17], w[18], selector); - w[38] = hc_byte_perm_S (w[16], w[17], selector); - w[37] = hc_byte_perm_S (w[15], w[16], selector); - w[36] = hc_byte_perm_S (w[14], w[15], selector); - w[35] = hc_byte_perm_S (w[13], w[14], selector); - w[34] = hc_byte_perm_S (w[12], w[13], selector); - w[33] = hc_byte_perm_S (w[11], w[12], selector); - w[32] = hc_byte_perm_S (w[10], w[11], selector); - w[31] = hc_byte_perm_S (w[ 9], w[10], selector); - w[30] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[29] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[28] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[27] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[26] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[25] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[24] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[23] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[22] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[21] = hc_byte_perm_S ( 0, w[ 0], selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm_S (w[40], w[41], selector); - w[62] = hc_byte_perm_S (w[39], w[40], selector); - w[61] = hc_byte_perm_S (w[38], w[39], selector); - w[60] = hc_byte_perm_S (w[37], w[38], selector); - w[59] = hc_byte_perm_S (w[36], w[37], selector); - w[58] = hc_byte_perm_S (w[35], w[36], selector); - w[57] = hc_byte_perm_S (w[34], w[35], selector); - w[56] = hc_byte_perm_S (w[33], w[34], selector); - w[55] = hc_byte_perm_S (w[32], w[33], selector); - w[54] = hc_byte_perm_S (w[31], w[32], selector); - w[53] = hc_byte_perm_S (w[30], w[31], selector); - w[52] = hc_byte_perm_S (w[29], w[30], selector); - w[51] = hc_byte_perm_S (w[28], w[29], selector); - w[50] = hc_byte_perm_S (w[27], w[28], selector); - w[49] = hc_byte_perm_S (w[26], w[27], selector); - w[48] = hc_byte_perm_S (w[25], w[26], selector); - w[47] = hc_byte_perm_S (w[24], w[25], selector); - w[46] = hc_byte_perm_S (w[23], w[24], selector); - w[45] = hc_byte_perm_S (w[22], w[23], selector); - w[44] = hc_byte_perm_S (w[21], w[22], selector); - w[43] = hc_byte_perm_S (w[20], w[21], selector); - w[42] = hc_byte_perm_S (w[19], w[20], selector); - w[41] = hc_byte_perm_S (w[18], w[19], selector); - w[40] = hc_byte_perm_S (w[17], w[18], selector); - w[39] = hc_byte_perm_S (w[16], w[17], selector); - w[38] = hc_byte_perm_S (w[15], w[16], selector); - w[37] = hc_byte_perm_S (w[14], w[15], selector); - w[36] = hc_byte_perm_S (w[13], w[14], selector); - w[35] = hc_byte_perm_S (w[12], w[13], selector); - w[34] = hc_byte_perm_S (w[11], w[12], selector); - w[33] = hc_byte_perm_S (w[10], w[11], selector); - w[32] = hc_byte_perm_S (w[ 9], w[10], selector); - w[31] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[30] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[29] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[28] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[27] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[26] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[25] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[24] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[23] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[22] = hc_byte_perm_S ( 0, w[ 0], selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm_S (w[39], w[40], selector); - w[62] = hc_byte_perm_S (w[38], w[39], selector); - w[61] = hc_byte_perm_S (w[37], w[38], selector); - w[60] = hc_byte_perm_S (w[36], w[37], selector); - w[59] = hc_byte_perm_S (w[35], w[36], selector); - w[58] = hc_byte_perm_S (w[34], w[35], selector); - w[57] = hc_byte_perm_S (w[33], w[34], selector); - w[56] = hc_byte_perm_S (w[32], w[33], selector); - w[55] = hc_byte_perm_S (w[31], w[32], selector); - w[54] = hc_byte_perm_S (w[30], w[31], selector); - w[53] = hc_byte_perm_S (w[29], w[30], selector); - w[52] = hc_byte_perm_S (w[28], w[29], selector); - w[51] = hc_byte_perm_S (w[27], w[28], selector); - w[50] = hc_byte_perm_S (w[26], w[27], selector); - w[49] = hc_byte_perm_S (w[25], w[26], selector); - w[48] = hc_byte_perm_S (w[24], w[25], selector); - w[47] = hc_byte_perm_S (w[23], w[24], selector); - w[46] = hc_byte_perm_S (w[22], w[23], selector); - w[45] = hc_byte_perm_S (w[21], w[22], selector); - w[44] = hc_byte_perm_S (w[20], w[21], selector); - w[43] = hc_byte_perm_S (w[19], w[20], selector); - w[42] = hc_byte_perm_S (w[18], w[19], selector); - w[41] = hc_byte_perm_S (w[17], w[18], selector); - w[40] = hc_byte_perm_S (w[16], w[17], selector); - w[39] = hc_byte_perm_S (w[15], w[16], selector); - w[38] = hc_byte_perm_S (w[14], w[15], selector); - w[37] = hc_byte_perm_S (w[13], w[14], selector); - w[36] = hc_byte_perm_S (w[12], w[13], selector); - w[35] = hc_byte_perm_S (w[11], w[12], selector); - w[34] = hc_byte_perm_S (w[10], w[11], selector); - w[33] = hc_byte_perm_S (w[ 9], w[10], selector); - w[32] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[31] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[30] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[29] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[28] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[27] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[26] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[25] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[24] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[23] = hc_byte_perm_S ( 0, w[ 0], selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm_S (w[38], w[39], selector); - w[62] = hc_byte_perm_S (w[37], w[38], selector); - w[61] = hc_byte_perm_S (w[36], w[37], selector); - w[60] = hc_byte_perm_S (w[35], w[36], selector); - w[59] = hc_byte_perm_S (w[34], w[35], selector); - w[58] = hc_byte_perm_S (w[33], w[34], selector); - w[57] = hc_byte_perm_S (w[32], w[33], selector); - w[56] = hc_byte_perm_S (w[31], w[32], selector); - w[55] = hc_byte_perm_S (w[30], w[31], selector); - w[54] = hc_byte_perm_S (w[29], w[30], selector); - w[53] = hc_byte_perm_S (w[28], w[29], selector); - w[52] = hc_byte_perm_S (w[27], w[28], selector); - w[51] = hc_byte_perm_S (w[26], w[27], selector); - w[50] = hc_byte_perm_S (w[25], w[26], selector); - w[49] = hc_byte_perm_S (w[24], w[25], selector); - w[48] = hc_byte_perm_S (w[23], w[24], selector); - w[47] = hc_byte_perm_S (w[22], w[23], selector); - w[46] = hc_byte_perm_S (w[21], w[22], selector); - w[45] = hc_byte_perm_S (w[20], w[21], selector); - w[44] = hc_byte_perm_S (w[19], w[20], selector); - w[43] = hc_byte_perm_S (w[18], w[19], selector); - w[42] = hc_byte_perm_S (w[17], w[18], selector); - w[41] = hc_byte_perm_S (w[16], w[17], selector); - w[40] = hc_byte_perm_S (w[15], w[16], selector); - w[39] = hc_byte_perm_S (w[14], w[15], selector); - w[38] = hc_byte_perm_S (w[13], w[14], selector); - w[37] = hc_byte_perm_S (w[12], w[13], selector); - w[36] = hc_byte_perm_S (w[11], w[12], selector); - w[35] = hc_byte_perm_S (w[10], w[11], selector); - w[34] = hc_byte_perm_S (w[ 9], w[10], selector); - w[33] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[32] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[31] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[30] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[29] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[28] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[27] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[26] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[25] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[24] = hc_byte_perm_S ( 0, w[ 0], selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm_S (w[37], w[38], selector); - w[62] = hc_byte_perm_S (w[36], w[37], selector); - w[61] = hc_byte_perm_S (w[35], w[36], selector); - w[60] = hc_byte_perm_S (w[34], w[35], selector); - w[59] = hc_byte_perm_S (w[33], w[34], selector); - w[58] = hc_byte_perm_S (w[32], w[33], selector); - w[57] = hc_byte_perm_S (w[31], w[32], selector); - w[56] = hc_byte_perm_S (w[30], w[31], selector); - w[55] = hc_byte_perm_S (w[29], w[30], selector); - w[54] = hc_byte_perm_S (w[28], w[29], selector); - w[53] = hc_byte_perm_S (w[27], w[28], selector); - w[52] = hc_byte_perm_S (w[26], w[27], selector); - w[51] = hc_byte_perm_S (w[25], w[26], selector); - w[50] = hc_byte_perm_S (w[24], w[25], selector); - w[49] = hc_byte_perm_S (w[23], w[24], selector); - w[48] = hc_byte_perm_S (w[22], w[23], selector); - w[47] = hc_byte_perm_S (w[21], w[22], selector); - w[46] = hc_byte_perm_S (w[20], w[21], selector); - w[45] = hc_byte_perm_S (w[19], w[20], selector); - w[44] = hc_byte_perm_S (w[18], w[19], selector); - w[43] = hc_byte_perm_S (w[17], w[18], selector); - w[42] = hc_byte_perm_S (w[16], w[17], selector); - w[41] = hc_byte_perm_S (w[15], w[16], selector); - w[40] = hc_byte_perm_S (w[14], w[15], selector); - w[39] = hc_byte_perm_S (w[13], w[14], selector); - w[38] = hc_byte_perm_S (w[12], w[13], selector); - w[37] = hc_byte_perm_S (w[11], w[12], selector); - w[36] = hc_byte_perm_S (w[10], w[11], selector); - w[35] = hc_byte_perm_S (w[ 9], w[10], selector); - w[34] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[33] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[32] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[31] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[30] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[29] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[28] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[27] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[26] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[25] = hc_byte_perm_S ( 0, w[ 0], selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm_S (w[36], w[37], selector); - w[62] = hc_byte_perm_S (w[35], w[36], selector); - w[61] = hc_byte_perm_S (w[34], w[35], selector); - w[60] = hc_byte_perm_S (w[33], w[34], selector); - w[59] = hc_byte_perm_S (w[32], w[33], selector); - w[58] = hc_byte_perm_S (w[31], w[32], selector); - w[57] = hc_byte_perm_S (w[30], w[31], selector); - w[56] = hc_byte_perm_S (w[29], w[30], selector); - w[55] = hc_byte_perm_S (w[28], w[29], selector); - w[54] = hc_byte_perm_S (w[27], w[28], selector); - w[53] = hc_byte_perm_S (w[26], w[27], selector); - w[52] = hc_byte_perm_S (w[25], w[26], selector); - w[51] = hc_byte_perm_S (w[24], w[25], selector); - w[50] = hc_byte_perm_S (w[23], w[24], selector); - w[49] = hc_byte_perm_S (w[22], w[23], selector); - w[48] = hc_byte_perm_S (w[21], w[22], selector); - w[47] = hc_byte_perm_S (w[20], w[21], selector); - w[46] = hc_byte_perm_S (w[19], w[20], selector); - w[45] = hc_byte_perm_S (w[18], w[19], selector); - w[44] = hc_byte_perm_S (w[17], w[18], selector); - w[43] = hc_byte_perm_S (w[16], w[17], selector); - w[42] = hc_byte_perm_S (w[15], w[16], selector); - w[41] = hc_byte_perm_S (w[14], w[15], selector); - w[40] = hc_byte_perm_S (w[13], w[14], selector); - w[39] = hc_byte_perm_S (w[12], w[13], selector); - w[38] = hc_byte_perm_S (w[11], w[12], selector); - w[37] = hc_byte_perm_S (w[10], w[11], selector); - w[36] = hc_byte_perm_S (w[ 9], w[10], selector); - w[35] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[34] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[33] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[32] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[31] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[30] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[29] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[28] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[27] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[26] = hc_byte_perm_S ( 0, w[ 0], selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm_S (w[35], w[36], selector); - w[62] = hc_byte_perm_S (w[34], w[35], selector); - w[61] = hc_byte_perm_S (w[33], w[34], selector); - w[60] = hc_byte_perm_S (w[32], w[33], selector); - w[59] = hc_byte_perm_S (w[31], w[32], selector); - w[58] = hc_byte_perm_S (w[30], w[31], selector); - w[57] = hc_byte_perm_S (w[29], w[30], selector); - w[56] = hc_byte_perm_S (w[28], w[29], selector); - w[55] = hc_byte_perm_S (w[27], w[28], selector); - w[54] = hc_byte_perm_S (w[26], w[27], selector); - w[53] = hc_byte_perm_S (w[25], w[26], selector); - w[52] = hc_byte_perm_S (w[24], w[25], selector); - w[51] = hc_byte_perm_S (w[23], w[24], selector); - w[50] = hc_byte_perm_S (w[22], w[23], selector); - w[49] = hc_byte_perm_S (w[21], w[22], selector); - w[48] = hc_byte_perm_S (w[20], w[21], selector); - w[47] = hc_byte_perm_S (w[19], w[20], selector); - w[46] = hc_byte_perm_S (w[18], w[19], selector); - w[45] = hc_byte_perm_S (w[17], w[18], selector); - w[44] = hc_byte_perm_S (w[16], w[17], selector); - w[43] = hc_byte_perm_S (w[15], w[16], selector); - w[42] = hc_byte_perm_S (w[14], w[15], selector); - w[41] = hc_byte_perm_S (w[13], w[14], selector); - w[40] = hc_byte_perm_S (w[12], w[13], selector); - w[39] = hc_byte_perm_S (w[11], w[12], selector); - w[38] = hc_byte_perm_S (w[10], w[11], selector); - w[37] = hc_byte_perm_S (w[ 9], w[10], selector); - w[36] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[35] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[34] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[33] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[32] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[31] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[30] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[29] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[28] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[27] = hc_byte_perm_S ( 0, w[ 0], selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm_S (w[34], w[35], selector); - w[62] = hc_byte_perm_S (w[33], w[34], selector); - w[61] = hc_byte_perm_S (w[32], w[33], selector); - w[60] = hc_byte_perm_S (w[31], w[32], selector); - w[59] = hc_byte_perm_S (w[30], w[31], selector); - w[58] = hc_byte_perm_S (w[29], w[30], selector); - w[57] = hc_byte_perm_S (w[28], w[29], selector); - w[56] = hc_byte_perm_S (w[27], w[28], selector); - w[55] = hc_byte_perm_S (w[26], w[27], selector); - w[54] = hc_byte_perm_S (w[25], w[26], selector); - w[53] = hc_byte_perm_S (w[24], w[25], selector); - w[52] = hc_byte_perm_S (w[23], w[24], selector); - w[51] = hc_byte_perm_S (w[22], w[23], selector); - w[50] = hc_byte_perm_S (w[21], w[22], selector); - w[49] = hc_byte_perm_S (w[20], w[21], selector); - w[48] = hc_byte_perm_S (w[19], w[20], selector); - w[47] = hc_byte_perm_S (w[18], w[19], selector); - w[46] = hc_byte_perm_S (w[17], w[18], selector); - w[45] = hc_byte_perm_S (w[16], w[17], selector); - w[44] = hc_byte_perm_S (w[15], w[16], selector); - w[43] = hc_byte_perm_S (w[14], w[15], selector); - w[42] = hc_byte_perm_S (w[13], w[14], selector); - w[41] = hc_byte_perm_S (w[12], w[13], selector); - w[40] = hc_byte_perm_S (w[11], w[12], selector); - w[39] = hc_byte_perm_S (w[10], w[11], selector); - w[38] = hc_byte_perm_S (w[ 9], w[10], selector); - w[37] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[36] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[35] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[34] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[33] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[32] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[31] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[30] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[29] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[28] = hc_byte_perm_S ( 0, w[ 0], selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm_S (w[33], w[34], selector); - w[62] = hc_byte_perm_S (w[32], w[33], selector); - w[61] = hc_byte_perm_S (w[31], w[32], selector); - w[60] = hc_byte_perm_S (w[30], w[31], selector); - w[59] = hc_byte_perm_S (w[29], w[30], selector); - w[58] = hc_byte_perm_S (w[28], w[29], selector); - w[57] = hc_byte_perm_S (w[27], w[28], selector); - w[56] = hc_byte_perm_S (w[26], w[27], selector); - w[55] = hc_byte_perm_S (w[25], w[26], selector); - w[54] = hc_byte_perm_S (w[24], w[25], selector); - w[53] = hc_byte_perm_S (w[23], w[24], selector); - w[52] = hc_byte_perm_S (w[22], w[23], selector); - w[51] = hc_byte_perm_S (w[21], w[22], selector); - w[50] = hc_byte_perm_S (w[20], w[21], selector); - w[49] = hc_byte_perm_S (w[19], w[20], selector); - w[48] = hc_byte_perm_S (w[18], w[19], selector); - w[47] = hc_byte_perm_S (w[17], w[18], selector); - w[46] = hc_byte_perm_S (w[16], w[17], selector); - w[45] = hc_byte_perm_S (w[15], w[16], selector); - w[44] = hc_byte_perm_S (w[14], w[15], selector); - w[43] = hc_byte_perm_S (w[13], w[14], selector); - w[42] = hc_byte_perm_S (w[12], w[13], selector); - w[41] = hc_byte_perm_S (w[11], w[12], selector); - w[40] = hc_byte_perm_S (w[10], w[11], selector); - w[39] = hc_byte_perm_S (w[ 9], w[10], selector); - w[38] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[37] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[36] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[35] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[34] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[33] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[32] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[31] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[30] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[29] = hc_byte_perm_S ( 0, w[ 0], selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm_S (w[32], w[33], selector); - w[62] = hc_byte_perm_S (w[31], w[32], selector); - w[61] = hc_byte_perm_S (w[30], w[31], selector); - w[60] = hc_byte_perm_S (w[29], w[30], selector); - w[59] = hc_byte_perm_S (w[28], w[29], selector); - w[58] = hc_byte_perm_S (w[27], w[28], selector); - w[57] = hc_byte_perm_S (w[26], w[27], selector); - w[56] = hc_byte_perm_S (w[25], w[26], selector); - w[55] = hc_byte_perm_S (w[24], w[25], selector); - w[54] = hc_byte_perm_S (w[23], w[24], selector); - w[53] = hc_byte_perm_S (w[22], w[23], selector); - w[52] = hc_byte_perm_S (w[21], w[22], selector); - w[51] = hc_byte_perm_S (w[20], w[21], selector); - w[50] = hc_byte_perm_S (w[19], w[20], selector); - w[49] = hc_byte_perm_S (w[18], w[19], selector); - w[48] = hc_byte_perm_S (w[17], w[18], selector); - w[47] = hc_byte_perm_S (w[16], w[17], selector); - w[46] = hc_byte_perm_S (w[15], w[16], selector); - w[45] = hc_byte_perm_S (w[14], w[15], selector); - w[44] = hc_byte_perm_S (w[13], w[14], selector); - w[43] = hc_byte_perm_S (w[12], w[13], selector); - w[42] = hc_byte_perm_S (w[11], w[12], selector); - w[41] = hc_byte_perm_S (w[10], w[11], selector); - w[40] = hc_byte_perm_S (w[ 9], w[10], selector); - w[39] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[38] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[37] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[36] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[35] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[34] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[33] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[32] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[31] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[30] = hc_byte_perm_S ( 0, w[ 0], selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm_S (w[31], w[32], selector); - w[62] = hc_byte_perm_S (w[30], w[31], selector); - w[61] = hc_byte_perm_S (w[29], w[30], selector); - w[60] = hc_byte_perm_S (w[28], w[29], selector); - w[59] = hc_byte_perm_S (w[27], w[28], selector); - w[58] = hc_byte_perm_S (w[26], w[27], selector); - w[57] = hc_byte_perm_S (w[25], w[26], selector); - w[56] = hc_byte_perm_S (w[24], w[25], selector); - w[55] = hc_byte_perm_S (w[23], w[24], selector); - w[54] = hc_byte_perm_S (w[22], w[23], selector); - w[53] = hc_byte_perm_S (w[21], w[22], selector); - w[52] = hc_byte_perm_S (w[20], w[21], selector); - w[51] = hc_byte_perm_S (w[19], w[20], selector); - w[50] = hc_byte_perm_S (w[18], w[19], selector); - w[49] = hc_byte_perm_S (w[17], w[18], selector); - w[48] = hc_byte_perm_S (w[16], w[17], selector); - w[47] = hc_byte_perm_S (w[15], w[16], selector); - w[46] = hc_byte_perm_S (w[14], w[15], selector); - w[45] = hc_byte_perm_S (w[13], w[14], selector); - w[44] = hc_byte_perm_S (w[12], w[13], selector); - w[43] = hc_byte_perm_S (w[11], w[12], selector); - w[42] = hc_byte_perm_S (w[10], w[11], selector); - w[41] = hc_byte_perm_S (w[ 9], w[10], selector); - w[40] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[39] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[38] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[37] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[36] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[35] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[34] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[33] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[32] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[31] = hc_byte_perm_S ( 0, w[ 0], selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm_S (w[30], w[31], selector); - w[62] = hc_byte_perm_S (w[29], w[30], selector); - w[61] = hc_byte_perm_S (w[28], w[29], selector); - w[60] = hc_byte_perm_S (w[27], w[28], selector); - w[59] = hc_byte_perm_S (w[26], w[27], selector); - w[58] = hc_byte_perm_S (w[25], w[26], selector); - w[57] = hc_byte_perm_S (w[24], w[25], selector); - w[56] = hc_byte_perm_S (w[23], w[24], selector); - w[55] = hc_byte_perm_S (w[22], w[23], selector); - w[54] = hc_byte_perm_S (w[21], w[22], selector); - w[53] = hc_byte_perm_S (w[20], w[21], selector); - w[52] = hc_byte_perm_S (w[19], w[20], selector); - w[51] = hc_byte_perm_S (w[18], w[19], selector); - w[50] = hc_byte_perm_S (w[17], w[18], selector); - w[49] = hc_byte_perm_S (w[16], w[17], selector); - w[48] = hc_byte_perm_S (w[15], w[16], selector); - w[47] = hc_byte_perm_S (w[14], w[15], selector); - w[46] = hc_byte_perm_S (w[13], w[14], selector); - w[45] = hc_byte_perm_S (w[12], w[13], selector); - w[44] = hc_byte_perm_S (w[11], w[12], selector); - w[43] = hc_byte_perm_S (w[10], w[11], selector); - w[42] = hc_byte_perm_S (w[ 9], w[10], selector); - w[41] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[40] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[39] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[38] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[37] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[36] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[35] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[34] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[33] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[32] = hc_byte_perm_S ( 0, w[ 0], selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm_S (w[29], w[30], selector); - w[62] = hc_byte_perm_S (w[28], w[29], selector); - w[61] = hc_byte_perm_S (w[27], w[28], selector); - w[60] = hc_byte_perm_S (w[26], w[27], selector); - w[59] = hc_byte_perm_S (w[25], w[26], selector); - w[58] = hc_byte_perm_S (w[24], w[25], selector); - w[57] = hc_byte_perm_S (w[23], w[24], selector); - w[56] = hc_byte_perm_S (w[22], w[23], selector); - w[55] = hc_byte_perm_S (w[21], w[22], selector); - w[54] = hc_byte_perm_S (w[20], w[21], selector); - w[53] = hc_byte_perm_S (w[19], w[20], selector); - w[52] = hc_byte_perm_S (w[18], w[19], selector); - w[51] = hc_byte_perm_S (w[17], w[18], selector); - w[50] = hc_byte_perm_S (w[16], w[17], selector); - w[49] = hc_byte_perm_S (w[15], w[16], selector); - w[48] = hc_byte_perm_S (w[14], w[15], selector); - w[47] = hc_byte_perm_S (w[13], w[14], selector); - w[46] = hc_byte_perm_S (w[12], w[13], selector); - w[45] = hc_byte_perm_S (w[11], w[12], selector); - w[44] = hc_byte_perm_S (w[10], w[11], selector); - w[43] = hc_byte_perm_S (w[ 9], w[10], selector); - w[42] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[41] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[40] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[39] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[38] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[37] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[36] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[35] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[34] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[33] = hc_byte_perm_S ( 0, w[ 0], selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm_S (w[28], w[29], selector); - w[62] = hc_byte_perm_S (w[27], w[28], selector); - w[61] = hc_byte_perm_S (w[26], w[27], selector); - w[60] = hc_byte_perm_S (w[25], w[26], selector); - w[59] = hc_byte_perm_S (w[24], w[25], selector); - w[58] = hc_byte_perm_S (w[23], w[24], selector); - w[57] = hc_byte_perm_S (w[22], w[23], selector); - w[56] = hc_byte_perm_S (w[21], w[22], selector); - w[55] = hc_byte_perm_S (w[20], w[21], selector); - w[54] = hc_byte_perm_S (w[19], w[20], selector); - w[53] = hc_byte_perm_S (w[18], w[19], selector); - w[52] = hc_byte_perm_S (w[17], w[18], selector); - w[51] = hc_byte_perm_S (w[16], w[17], selector); - w[50] = hc_byte_perm_S (w[15], w[16], selector); - w[49] = hc_byte_perm_S (w[14], w[15], selector); - w[48] = hc_byte_perm_S (w[13], w[14], selector); - w[47] = hc_byte_perm_S (w[12], w[13], selector); - w[46] = hc_byte_perm_S (w[11], w[12], selector); - w[45] = hc_byte_perm_S (w[10], w[11], selector); - w[44] = hc_byte_perm_S (w[ 9], w[10], selector); - w[43] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[42] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[41] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[40] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[39] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[38] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[37] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[36] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[35] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[34] = hc_byte_perm_S ( 0, w[ 0], selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm_S (w[27], w[28], selector); - w[62] = hc_byte_perm_S (w[26], w[27], selector); - w[61] = hc_byte_perm_S (w[25], w[26], selector); - w[60] = hc_byte_perm_S (w[24], w[25], selector); - w[59] = hc_byte_perm_S (w[23], w[24], selector); - w[58] = hc_byte_perm_S (w[22], w[23], selector); - w[57] = hc_byte_perm_S (w[21], w[22], selector); - w[56] = hc_byte_perm_S (w[20], w[21], selector); - w[55] = hc_byte_perm_S (w[19], w[20], selector); - w[54] = hc_byte_perm_S (w[18], w[19], selector); - w[53] = hc_byte_perm_S (w[17], w[18], selector); - w[52] = hc_byte_perm_S (w[16], w[17], selector); - w[51] = hc_byte_perm_S (w[15], w[16], selector); - w[50] = hc_byte_perm_S (w[14], w[15], selector); - w[49] = hc_byte_perm_S (w[13], w[14], selector); - w[48] = hc_byte_perm_S (w[12], w[13], selector); - w[47] = hc_byte_perm_S (w[11], w[12], selector); - w[46] = hc_byte_perm_S (w[10], w[11], selector); - w[45] = hc_byte_perm_S (w[ 9], w[10], selector); - w[44] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[43] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[42] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[41] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[40] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[39] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[38] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[37] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[36] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[35] = hc_byte_perm_S ( 0, w[ 0], selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm_S (w[26], w[27], selector); - w[62] = hc_byte_perm_S (w[25], w[26], selector); - w[61] = hc_byte_perm_S (w[24], w[25], selector); - w[60] = hc_byte_perm_S (w[23], w[24], selector); - w[59] = hc_byte_perm_S (w[22], w[23], selector); - w[58] = hc_byte_perm_S (w[21], w[22], selector); - w[57] = hc_byte_perm_S (w[20], w[21], selector); - w[56] = hc_byte_perm_S (w[19], w[20], selector); - w[55] = hc_byte_perm_S (w[18], w[19], selector); - w[54] = hc_byte_perm_S (w[17], w[18], selector); - w[53] = hc_byte_perm_S (w[16], w[17], selector); - w[52] = hc_byte_perm_S (w[15], w[16], selector); - w[51] = hc_byte_perm_S (w[14], w[15], selector); - w[50] = hc_byte_perm_S (w[13], w[14], selector); - w[49] = hc_byte_perm_S (w[12], w[13], selector); - w[48] = hc_byte_perm_S (w[11], w[12], selector); - w[47] = hc_byte_perm_S (w[10], w[11], selector); - w[46] = hc_byte_perm_S (w[ 9], w[10], selector); - w[45] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[44] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[43] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[42] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[41] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[40] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[39] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[38] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[37] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[36] = hc_byte_perm_S ( 0, w[ 0], selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm_S (w[25], w[26], selector); - w[62] = hc_byte_perm_S (w[24], w[25], selector); - w[61] = hc_byte_perm_S (w[23], w[24], selector); - w[60] = hc_byte_perm_S (w[22], w[23], selector); - w[59] = hc_byte_perm_S (w[21], w[22], selector); - w[58] = hc_byte_perm_S (w[20], w[21], selector); - w[57] = hc_byte_perm_S (w[19], w[20], selector); - w[56] = hc_byte_perm_S (w[18], w[19], selector); - w[55] = hc_byte_perm_S (w[17], w[18], selector); - w[54] = hc_byte_perm_S (w[16], w[17], selector); - w[53] = hc_byte_perm_S (w[15], w[16], selector); - w[52] = hc_byte_perm_S (w[14], w[15], selector); - w[51] = hc_byte_perm_S (w[13], w[14], selector); - w[50] = hc_byte_perm_S (w[12], w[13], selector); - w[49] = hc_byte_perm_S (w[11], w[12], selector); - w[48] = hc_byte_perm_S (w[10], w[11], selector); - w[47] = hc_byte_perm_S (w[ 9], w[10], selector); - w[46] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[45] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[44] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[43] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[42] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[41] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[40] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[39] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[38] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[37] = hc_byte_perm_S ( 0, w[ 0], selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm_S (w[24], w[25], selector); - w[62] = hc_byte_perm_S (w[23], w[24], selector); - w[61] = hc_byte_perm_S (w[22], w[23], selector); - w[60] = hc_byte_perm_S (w[21], w[22], selector); - w[59] = hc_byte_perm_S (w[20], w[21], selector); - w[58] = hc_byte_perm_S (w[19], w[20], selector); - w[57] = hc_byte_perm_S (w[18], w[19], selector); - w[56] = hc_byte_perm_S (w[17], w[18], selector); - w[55] = hc_byte_perm_S (w[16], w[17], selector); - w[54] = hc_byte_perm_S (w[15], w[16], selector); - w[53] = hc_byte_perm_S (w[14], w[15], selector); - w[52] = hc_byte_perm_S (w[13], w[14], selector); - w[51] = hc_byte_perm_S (w[12], w[13], selector); - w[50] = hc_byte_perm_S (w[11], w[12], selector); - w[49] = hc_byte_perm_S (w[10], w[11], selector); - w[48] = hc_byte_perm_S (w[ 9], w[10], selector); - w[47] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[46] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[45] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[44] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[43] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[42] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[41] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[40] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[39] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[38] = hc_byte_perm_S ( 0, w[ 0], selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm_S (w[23], w[24], selector); - w[62] = hc_byte_perm_S (w[22], w[23], selector); - w[61] = hc_byte_perm_S (w[21], w[22], selector); - w[60] = hc_byte_perm_S (w[20], w[21], selector); - w[59] = hc_byte_perm_S (w[19], w[20], selector); - w[58] = hc_byte_perm_S (w[18], w[19], selector); - w[57] = hc_byte_perm_S (w[17], w[18], selector); - w[56] = hc_byte_perm_S (w[16], w[17], selector); - w[55] = hc_byte_perm_S (w[15], w[16], selector); - w[54] = hc_byte_perm_S (w[14], w[15], selector); - w[53] = hc_byte_perm_S (w[13], w[14], selector); - w[52] = hc_byte_perm_S (w[12], w[13], selector); - w[51] = hc_byte_perm_S (w[11], w[12], selector); - w[50] = hc_byte_perm_S (w[10], w[11], selector); - w[49] = hc_byte_perm_S (w[ 9], w[10], selector); - w[48] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[47] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[46] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[45] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[44] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[43] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[42] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[41] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[40] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[39] = hc_byte_perm_S ( 0, w[ 0], selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm_S (w[22], w[23], selector); - w[62] = hc_byte_perm_S (w[21], w[22], selector); - w[61] = hc_byte_perm_S (w[20], w[21], selector); - w[60] = hc_byte_perm_S (w[19], w[20], selector); - w[59] = hc_byte_perm_S (w[18], w[19], selector); - w[58] = hc_byte_perm_S (w[17], w[18], selector); - w[57] = hc_byte_perm_S (w[16], w[17], selector); - w[56] = hc_byte_perm_S (w[15], w[16], selector); - w[55] = hc_byte_perm_S (w[14], w[15], selector); - w[54] = hc_byte_perm_S (w[13], w[14], selector); - w[53] = hc_byte_perm_S (w[12], w[13], selector); - w[52] = hc_byte_perm_S (w[11], w[12], selector); - w[51] = hc_byte_perm_S (w[10], w[11], selector); - w[50] = hc_byte_perm_S (w[ 9], w[10], selector); - w[49] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[48] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[47] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[46] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[45] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[44] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[43] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[42] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[41] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[40] = hc_byte_perm_S ( 0, w[ 0], selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm_S (w[21], w[22], selector); - w[62] = hc_byte_perm_S (w[20], w[21], selector); - w[61] = hc_byte_perm_S (w[19], w[20], selector); - w[60] = hc_byte_perm_S (w[18], w[19], selector); - w[59] = hc_byte_perm_S (w[17], w[18], selector); - w[58] = hc_byte_perm_S (w[16], w[17], selector); - w[57] = hc_byte_perm_S (w[15], w[16], selector); - w[56] = hc_byte_perm_S (w[14], w[15], selector); - w[55] = hc_byte_perm_S (w[13], w[14], selector); - w[54] = hc_byte_perm_S (w[12], w[13], selector); - w[53] = hc_byte_perm_S (w[11], w[12], selector); - w[52] = hc_byte_perm_S (w[10], w[11], selector); - w[51] = hc_byte_perm_S (w[ 9], w[10], selector); - w[50] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[49] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[48] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[47] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[46] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[45] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[44] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[43] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[42] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[41] = hc_byte_perm_S ( 0, w[ 0], selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm_S (w[20], w[21], selector); - w[62] = hc_byte_perm_S (w[19], w[20], selector); - w[61] = hc_byte_perm_S (w[18], w[19], selector); - w[60] = hc_byte_perm_S (w[17], w[18], selector); - w[59] = hc_byte_perm_S (w[16], w[17], selector); - w[58] = hc_byte_perm_S (w[15], w[16], selector); - w[57] = hc_byte_perm_S (w[14], w[15], selector); - w[56] = hc_byte_perm_S (w[13], w[14], selector); - w[55] = hc_byte_perm_S (w[12], w[13], selector); - w[54] = hc_byte_perm_S (w[11], w[12], selector); - w[53] = hc_byte_perm_S (w[10], w[11], selector); - w[52] = hc_byte_perm_S (w[ 9], w[10], selector); - w[51] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[50] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[49] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[48] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[47] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[46] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[45] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[44] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[43] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[42] = hc_byte_perm_S ( 0, w[ 0], selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm_S (w[19], w[20], selector); - w[62] = hc_byte_perm_S (w[18], w[19], selector); - w[61] = hc_byte_perm_S (w[17], w[18], selector); - w[60] = hc_byte_perm_S (w[16], w[17], selector); - w[59] = hc_byte_perm_S (w[15], w[16], selector); - w[58] = hc_byte_perm_S (w[14], w[15], selector); - w[57] = hc_byte_perm_S (w[13], w[14], selector); - w[56] = hc_byte_perm_S (w[12], w[13], selector); - w[55] = hc_byte_perm_S (w[11], w[12], selector); - w[54] = hc_byte_perm_S (w[10], w[11], selector); - w[53] = hc_byte_perm_S (w[ 9], w[10], selector); - w[52] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[51] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[50] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[49] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[48] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[47] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[46] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[45] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[44] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[43] = hc_byte_perm_S ( 0, w[ 0], selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm_S (w[18], w[19], selector); - w[62] = hc_byte_perm_S (w[17], w[18], selector); - w[61] = hc_byte_perm_S (w[16], w[17], selector); - w[60] = hc_byte_perm_S (w[15], w[16], selector); - w[59] = hc_byte_perm_S (w[14], w[15], selector); - w[58] = hc_byte_perm_S (w[13], w[14], selector); - w[57] = hc_byte_perm_S (w[12], w[13], selector); - w[56] = hc_byte_perm_S (w[11], w[12], selector); - w[55] = hc_byte_perm_S (w[10], w[11], selector); - w[54] = hc_byte_perm_S (w[ 9], w[10], selector); - w[53] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[52] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[51] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[50] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[49] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[48] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[47] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[46] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[45] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[44] = hc_byte_perm_S ( 0, w[ 0], selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm_S (w[17], w[18], selector); - w[62] = hc_byte_perm_S (w[16], w[17], selector); - w[61] = hc_byte_perm_S (w[15], w[16], selector); - w[60] = hc_byte_perm_S (w[14], w[15], selector); - w[59] = hc_byte_perm_S (w[13], w[14], selector); - w[58] = hc_byte_perm_S (w[12], w[13], selector); - w[57] = hc_byte_perm_S (w[11], w[12], selector); - w[56] = hc_byte_perm_S (w[10], w[11], selector); - w[55] = hc_byte_perm_S (w[ 9], w[10], selector); - w[54] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[53] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[52] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[51] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[50] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[49] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[48] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[47] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[46] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[45] = hc_byte_perm_S ( 0, w[ 0], selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm_S (w[16], w[17], selector); - w[62] = hc_byte_perm_S (w[15], w[16], selector); - w[61] = hc_byte_perm_S (w[14], w[15], selector); - w[60] = hc_byte_perm_S (w[13], w[14], selector); - w[59] = hc_byte_perm_S (w[12], w[13], selector); - w[58] = hc_byte_perm_S (w[11], w[12], selector); - w[57] = hc_byte_perm_S (w[10], w[11], selector); - w[56] = hc_byte_perm_S (w[ 9], w[10], selector); - w[55] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[54] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[53] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[52] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[51] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[50] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[49] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[48] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[47] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[46] = hc_byte_perm_S ( 0, w[ 0], selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm_S (w[15], w[16], selector); - w[62] = hc_byte_perm_S (w[14], w[15], selector); - w[61] = hc_byte_perm_S (w[13], w[14], selector); - w[60] = hc_byte_perm_S (w[12], w[13], selector); - w[59] = hc_byte_perm_S (w[11], w[12], selector); - w[58] = hc_byte_perm_S (w[10], w[11], selector); - w[57] = hc_byte_perm_S (w[ 9], w[10], selector); - w[56] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[55] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[54] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[53] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[52] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[51] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[50] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[49] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[48] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[47] = hc_byte_perm_S ( 0, w[ 0], selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm_S (w[14], w[15], selector); - w[62] = hc_byte_perm_S (w[13], w[14], selector); - w[61] = hc_byte_perm_S (w[12], w[13], selector); - w[60] = hc_byte_perm_S (w[11], w[12], selector); - w[59] = hc_byte_perm_S (w[10], w[11], selector); - w[58] = hc_byte_perm_S (w[ 9], w[10], selector); - w[57] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[56] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[55] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[54] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[53] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[52] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[51] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[50] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[49] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[48] = hc_byte_perm_S ( 0, w[ 0], selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm_S (w[13], w[14], selector); - w[62] = hc_byte_perm_S (w[12], w[13], selector); - w[61] = hc_byte_perm_S (w[11], w[12], selector); - w[60] = hc_byte_perm_S (w[10], w[11], selector); - w[59] = hc_byte_perm_S (w[ 9], w[10], selector); - w[58] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[57] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[56] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[55] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[54] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[53] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[52] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[51] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[50] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[49] = hc_byte_perm_S ( 0, w[ 0], selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm_S (w[12], w[13], selector); - w[62] = hc_byte_perm_S (w[11], w[12], selector); - w[61] = hc_byte_perm_S (w[10], w[11], selector); - w[60] = hc_byte_perm_S (w[ 9], w[10], selector); - w[59] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[58] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[57] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[56] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[55] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[54] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[53] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[52] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[51] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[50] = hc_byte_perm_S ( 0, w[ 0], selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm_S (w[11], w[12], selector); - w[62] = hc_byte_perm_S (w[10], w[11], selector); - w[61] = hc_byte_perm_S (w[ 9], w[10], selector); - w[60] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[59] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[58] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[57] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[56] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[55] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[54] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[53] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[52] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[51] = hc_byte_perm_S ( 0, w[ 0], selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm_S (w[10], w[11], selector); - w[62] = hc_byte_perm_S (w[ 9], w[10], selector); - w[61] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[60] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[59] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[58] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[57] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[56] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[55] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[54] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[53] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[52] = hc_byte_perm_S ( 0, w[ 0], selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm_S (w[ 9], w[10], selector); - w[62] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[61] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[60] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[59] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[58] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[57] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[56] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[55] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[54] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[53] = hc_byte_perm_S ( 0, w[ 0], selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[62] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[61] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[60] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[59] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[58] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[57] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[56] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[55] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[54] = hc_byte_perm_S ( 0, w[ 0], selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[62] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[61] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[60] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[59] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[58] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[57] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[56] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[55] = hc_byte_perm_S ( 0, w[ 0], selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[62] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[61] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[60] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[59] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[58] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[57] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[56] = hc_byte_perm_S ( 0, w[ 0], selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[62] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[61] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[60] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[59] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[58] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[57] = hc_byte_perm_S ( 0, w[ 0], selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[62] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[61] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[60] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[59] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[58] = hc_byte_perm_S ( 0, w[ 0], selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[62] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[61] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[60] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[59] = hc_byte_perm_S ( 0, w[ 0], selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[62] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[61] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[60] = hc_byte_perm_S ( 0, w[ 0], selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[62] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[61] = hc_byte_perm_S ( 0, w[ 0], selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[62] = hc_byte_perm_S ( 0, w[ 0], selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm_S ( 0, w[ 0], selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_be_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -64762,4373 +37679,6 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (PRIVATE_AS u32 *w, const u32 of break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm_S (w[63], w[62], selector); - w[62] = hc_byte_perm_S (w[62], w[61], selector); - w[61] = hc_byte_perm_S (w[61], w[60], selector); - w[60] = hc_byte_perm_S (w[60], w[59], selector); - w[59] = hc_byte_perm_S (w[59], w[58], selector); - w[58] = hc_byte_perm_S (w[58], w[57], selector); - w[57] = hc_byte_perm_S (w[57], w[56], selector); - w[56] = hc_byte_perm_S (w[56], w[55], selector); - w[55] = hc_byte_perm_S (w[55], w[54], selector); - w[54] = hc_byte_perm_S (w[54], w[53], selector); - w[53] = hc_byte_perm_S (w[53], w[52], selector); - w[52] = hc_byte_perm_S (w[52], w[51], selector); - w[51] = hc_byte_perm_S (w[51], w[50], selector); - w[50] = hc_byte_perm_S (w[50], w[49], selector); - w[49] = hc_byte_perm_S (w[49], w[48], selector); - w[48] = hc_byte_perm_S (w[48], w[47], selector); - w[47] = hc_byte_perm_S (w[47], w[46], selector); - w[46] = hc_byte_perm_S (w[46], w[45], selector); - w[45] = hc_byte_perm_S (w[45], w[44], selector); - w[44] = hc_byte_perm_S (w[44], w[43], selector); - w[43] = hc_byte_perm_S (w[43], w[42], selector); - w[42] = hc_byte_perm_S (w[42], w[41], selector); - w[41] = hc_byte_perm_S (w[41], w[40], selector); - w[40] = hc_byte_perm_S (w[40], w[39], selector); - w[39] = hc_byte_perm_S (w[39], w[38], selector); - w[38] = hc_byte_perm_S (w[38], w[37], selector); - w[37] = hc_byte_perm_S (w[37], w[36], selector); - w[36] = hc_byte_perm_S (w[36], w[35], selector); - w[35] = hc_byte_perm_S (w[35], w[34], selector); - w[34] = hc_byte_perm_S (w[34], w[33], selector); - w[33] = hc_byte_perm_S (w[33], w[32], selector); - w[32] = hc_byte_perm_S (w[32], w[31], selector); - w[31] = hc_byte_perm_S (w[31], w[30], selector); - w[30] = hc_byte_perm_S (w[30], w[29], selector); - w[29] = hc_byte_perm_S (w[29], w[28], selector); - w[28] = hc_byte_perm_S (w[28], w[27], selector); - w[27] = hc_byte_perm_S (w[27], w[26], selector); - w[26] = hc_byte_perm_S (w[26], w[25], selector); - w[25] = hc_byte_perm_S (w[25], w[24], selector); - w[24] = hc_byte_perm_S (w[24], w[23], selector); - w[23] = hc_byte_perm_S (w[23], w[22], selector); - w[22] = hc_byte_perm_S (w[22], w[21], selector); - w[21] = hc_byte_perm_S (w[21], w[20], selector); - w[20] = hc_byte_perm_S (w[20], w[19], selector); - w[19] = hc_byte_perm_S (w[19], w[18], selector); - w[18] = hc_byte_perm_S (w[18], w[17], selector); - w[17] = hc_byte_perm_S (w[17], w[16], selector); - w[16] = hc_byte_perm_S (w[16], w[15], selector); - w[15] = hc_byte_perm_S (w[15], w[14], selector); - w[14] = hc_byte_perm_S (w[14], w[13], selector); - w[13] = hc_byte_perm_S (w[13], w[12], selector); - w[12] = hc_byte_perm_S (w[12], w[11], selector); - w[11] = hc_byte_perm_S (w[11], w[10], selector); - w[10] = hc_byte_perm_S (w[10], w[ 9], selector); - w[ 9] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[ 8] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 7] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 6] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 5] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 4] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 3] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 2] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 1] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 0] = hc_byte_perm_S (w[ 0], 0, selector); - - break; - - case 1: - w[63] = hc_byte_perm_S (w[62], w[61], selector); - w[62] = hc_byte_perm_S (w[61], w[60], selector); - w[61] = hc_byte_perm_S (w[60], w[59], selector); - w[60] = hc_byte_perm_S (w[59], w[58], selector); - w[59] = hc_byte_perm_S (w[58], w[57], selector); - w[58] = hc_byte_perm_S (w[57], w[56], selector); - w[57] = hc_byte_perm_S (w[56], w[55], selector); - w[56] = hc_byte_perm_S (w[55], w[54], selector); - w[55] = hc_byte_perm_S (w[54], w[53], selector); - w[54] = hc_byte_perm_S (w[53], w[52], selector); - w[53] = hc_byte_perm_S (w[52], w[51], selector); - w[52] = hc_byte_perm_S (w[51], w[50], selector); - w[51] = hc_byte_perm_S (w[50], w[49], selector); - w[50] = hc_byte_perm_S (w[49], w[48], selector); - w[49] = hc_byte_perm_S (w[48], w[47], selector); - w[48] = hc_byte_perm_S (w[47], w[46], selector); - w[47] = hc_byte_perm_S (w[46], w[45], selector); - w[46] = hc_byte_perm_S (w[45], w[44], selector); - w[45] = hc_byte_perm_S (w[44], w[43], selector); - w[44] = hc_byte_perm_S (w[43], w[42], selector); - w[43] = hc_byte_perm_S (w[42], w[41], selector); - w[42] = hc_byte_perm_S (w[41], w[40], selector); - w[41] = hc_byte_perm_S (w[40], w[39], selector); - w[40] = hc_byte_perm_S (w[39], w[38], selector); - w[39] = hc_byte_perm_S (w[38], w[37], selector); - w[38] = hc_byte_perm_S (w[37], w[36], selector); - w[37] = hc_byte_perm_S (w[36], w[35], selector); - w[36] = hc_byte_perm_S (w[35], w[34], selector); - w[35] = hc_byte_perm_S (w[34], w[33], selector); - w[34] = hc_byte_perm_S (w[33], w[32], selector); - w[33] = hc_byte_perm_S (w[32], w[31], selector); - w[32] = hc_byte_perm_S (w[31], w[30], selector); - w[31] = hc_byte_perm_S (w[30], w[29], selector); - w[30] = hc_byte_perm_S (w[29], w[28], selector); - w[29] = hc_byte_perm_S (w[28], w[27], selector); - w[28] = hc_byte_perm_S (w[27], w[26], selector); - w[27] = hc_byte_perm_S (w[26], w[25], selector); - w[26] = hc_byte_perm_S (w[25], w[24], selector); - w[25] = hc_byte_perm_S (w[24], w[23], selector); - w[24] = hc_byte_perm_S (w[23], w[22], selector); - w[23] = hc_byte_perm_S (w[22], w[21], selector); - w[22] = hc_byte_perm_S (w[21], w[20], selector); - w[21] = hc_byte_perm_S (w[20], w[19], selector); - w[20] = hc_byte_perm_S (w[19], w[18], selector); - w[19] = hc_byte_perm_S (w[18], w[17], selector); - w[18] = hc_byte_perm_S (w[17], w[16], selector); - w[17] = hc_byte_perm_S (w[16], w[15], selector); - w[16] = hc_byte_perm_S (w[15], w[14], selector); - w[15] = hc_byte_perm_S (w[14], w[13], selector); - w[14] = hc_byte_perm_S (w[13], w[12], selector); - w[13] = hc_byte_perm_S (w[12], w[11], selector); - w[12] = hc_byte_perm_S (w[11], w[10], selector); - w[11] = hc_byte_perm_S (w[10], w[ 9], selector); - w[10] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[ 9] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 8] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 7] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 6] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 5] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 4] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 3] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 2] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 1] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm_S (w[61], w[60], selector); - w[62] = hc_byte_perm_S (w[60], w[59], selector); - w[61] = hc_byte_perm_S (w[59], w[58], selector); - w[60] = hc_byte_perm_S (w[58], w[57], selector); - w[59] = hc_byte_perm_S (w[57], w[56], selector); - w[58] = hc_byte_perm_S (w[56], w[55], selector); - w[57] = hc_byte_perm_S (w[55], w[54], selector); - w[56] = hc_byte_perm_S (w[54], w[53], selector); - w[55] = hc_byte_perm_S (w[53], w[52], selector); - w[54] = hc_byte_perm_S (w[52], w[51], selector); - w[53] = hc_byte_perm_S (w[51], w[50], selector); - w[52] = hc_byte_perm_S (w[50], w[49], selector); - w[51] = hc_byte_perm_S (w[49], w[48], selector); - w[50] = hc_byte_perm_S (w[48], w[47], selector); - w[49] = hc_byte_perm_S (w[47], w[46], selector); - w[48] = hc_byte_perm_S (w[46], w[45], selector); - w[47] = hc_byte_perm_S (w[45], w[44], selector); - w[46] = hc_byte_perm_S (w[44], w[43], selector); - w[45] = hc_byte_perm_S (w[43], w[42], selector); - w[44] = hc_byte_perm_S (w[42], w[41], selector); - w[43] = hc_byte_perm_S (w[41], w[40], selector); - w[42] = hc_byte_perm_S (w[40], w[39], selector); - w[41] = hc_byte_perm_S (w[39], w[38], selector); - w[40] = hc_byte_perm_S (w[38], w[37], selector); - w[39] = hc_byte_perm_S (w[37], w[36], selector); - w[38] = hc_byte_perm_S (w[36], w[35], selector); - w[37] = hc_byte_perm_S (w[35], w[34], selector); - w[36] = hc_byte_perm_S (w[34], w[33], selector); - w[35] = hc_byte_perm_S (w[33], w[32], selector); - w[34] = hc_byte_perm_S (w[32], w[31], selector); - w[33] = hc_byte_perm_S (w[31], w[30], selector); - w[32] = hc_byte_perm_S (w[30], w[29], selector); - w[31] = hc_byte_perm_S (w[29], w[28], selector); - w[30] = hc_byte_perm_S (w[28], w[27], selector); - w[29] = hc_byte_perm_S (w[27], w[26], selector); - w[28] = hc_byte_perm_S (w[26], w[25], selector); - w[27] = hc_byte_perm_S (w[25], w[24], selector); - w[26] = hc_byte_perm_S (w[24], w[23], selector); - w[25] = hc_byte_perm_S (w[23], w[22], selector); - w[24] = hc_byte_perm_S (w[22], w[21], selector); - w[23] = hc_byte_perm_S (w[21], w[20], selector); - w[22] = hc_byte_perm_S (w[20], w[19], selector); - w[21] = hc_byte_perm_S (w[19], w[18], selector); - w[20] = hc_byte_perm_S (w[18], w[17], selector); - w[19] = hc_byte_perm_S (w[17], w[16], selector); - w[18] = hc_byte_perm_S (w[16], w[15], selector); - w[17] = hc_byte_perm_S (w[15], w[14], selector); - w[16] = hc_byte_perm_S (w[14], w[13], selector); - w[15] = hc_byte_perm_S (w[13], w[12], selector); - w[14] = hc_byte_perm_S (w[12], w[11], selector); - w[13] = hc_byte_perm_S (w[11], w[10], selector); - w[12] = hc_byte_perm_S (w[10], w[ 9], selector); - w[11] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[10] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 9] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 8] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 7] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 6] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 5] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 4] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 3] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 2] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm_S (w[60], w[59], selector); - w[62] = hc_byte_perm_S (w[59], w[58], selector); - w[61] = hc_byte_perm_S (w[58], w[57], selector); - w[60] = hc_byte_perm_S (w[57], w[56], selector); - w[59] = hc_byte_perm_S (w[56], w[55], selector); - w[58] = hc_byte_perm_S (w[55], w[54], selector); - w[57] = hc_byte_perm_S (w[54], w[53], selector); - w[56] = hc_byte_perm_S (w[53], w[52], selector); - w[55] = hc_byte_perm_S (w[52], w[51], selector); - w[54] = hc_byte_perm_S (w[51], w[50], selector); - w[53] = hc_byte_perm_S (w[50], w[49], selector); - w[52] = hc_byte_perm_S (w[49], w[48], selector); - w[51] = hc_byte_perm_S (w[48], w[47], selector); - w[50] = hc_byte_perm_S (w[47], w[46], selector); - w[49] = hc_byte_perm_S (w[46], w[45], selector); - w[48] = hc_byte_perm_S (w[45], w[44], selector); - w[47] = hc_byte_perm_S (w[44], w[43], selector); - w[46] = hc_byte_perm_S (w[43], w[42], selector); - w[45] = hc_byte_perm_S (w[42], w[41], selector); - w[44] = hc_byte_perm_S (w[41], w[40], selector); - w[43] = hc_byte_perm_S (w[40], w[39], selector); - w[42] = hc_byte_perm_S (w[39], w[38], selector); - w[41] = hc_byte_perm_S (w[38], w[37], selector); - w[40] = hc_byte_perm_S (w[37], w[36], selector); - w[39] = hc_byte_perm_S (w[36], w[35], selector); - w[38] = hc_byte_perm_S (w[35], w[34], selector); - w[37] = hc_byte_perm_S (w[34], w[33], selector); - w[36] = hc_byte_perm_S (w[33], w[32], selector); - w[35] = hc_byte_perm_S (w[32], w[31], selector); - w[34] = hc_byte_perm_S (w[31], w[30], selector); - w[33] = hc_byte_perm_S (w[30], w[29], selector); - w[32] = hc_byte_perm_S (w[29], w[28], selector); - w[31] = hc_byte_perm_S (w[28], w[27], selector); - w[30] = hc_byte_perm_S (w[27], w[26], selector); - w[29] = hc_byte_perm_S (w[26], w[25], selector); - w[28] = hc_byte_perm_S (w[25], w[24], selector); - w[27] = hc_byte_perm_S (w[24], w[23], selector); - w[26] = hc_byte_perm_S (w[23], w[22], selector); - w[25] = hc_byte_perm_S (w[22], w[21], selector); - w[24] = hc_byte_perm_S (w[21], w[20], selector); - w[23] = hc_byte_perm_S (w[20], w[19], selector); - w[22] = hc_byte_perm_S (w[19], w[18], selector); - w[21] = hc_byte_perm_S (w[18], w[17], selector); - w[20] = hc_byte_perm_S (w[17], w[16], selector); - w[19] = hc_byte_perm_S (w[16], w[15], selector); - w[18] = hc_byte_perm_S (w[15], w[14], selector); - w[17] = hc_byte_perm_S (w[14], w[13], selector); - w[16] = hc_byte_perm_S (w[13], w[12], selector); - w[15] = hc_byte_perm_S (w[12], w[11], selector); - w[14] = hc_byte_perm_S (w[11], w[10], selector); - w[13] = hc_byte_perm_S (w[10], w[ 9], selector); - w[12] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[11] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[10] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 9] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 8] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 7] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 6] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 5] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 4] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 3] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm_S (w[59], w[58], selector); - w[62] = hc_byte_perm_S (w[58], w[57], selector); - w[61] = hc_byte_perm_S (w[57], w[56], selector); - w[60] = hc_byte_perm_S (w[56], w[55], selector); - w[59] = hc_byte_perm_S (w[55], w[54], selector); - w[58] = hc_byte_perm_S (w[54], w[53], selector); - w[57] = hc_byte_perm_S (w[53], w[52], selector); - w[56] = hc_byte_perm_S (w[52], w[51], selector); - w[55] = hc_byte_perm_S (w[51], w[50], selector); - w[54] = hc_byte_perm_S (w[50], w[49], selector); - w[53] = hc_byte_perm_S (w[49], w[48], selector); - w[52] = hc_byte_perm_S (w[48], w[47], selector); - w[51] = hc_byte_perm_S (w[47], w[46], selector); - w[50] = hc_byte_perm_S (w[46], w[45], selector); - w[49] = hc_byte_perm_S (w[45], w[44], selector); - w[48] = hc_byte_perm_S (w[44], w[43], selector); - w[47] = hc_byte_perm_S (w[43], w[42], selector); - w[46] = hc_byte_perm_S (w[42], w[41], selector); - w[45] = hc_byte_perm_S (w[41], w[40], selector); - w[44] = hc_byte_perm_S (w[40], w[39], selector); - w[43] = hc_byte_perm_S (w[39], w[38], selector); - w[42] = hc_byte_perm_S (w[38], w[37], selector); - w[41] = hc_byte_perm_S (w[37], w[36], selector); - w[40] = hc_byte_perm_S (w[36], w[35], selector); - w[39] = hc_byte_perm_S (w[35], w[34], selector); - w[38] = hc_byte_perm_S (w[34], w[33], selector); - w[37] = hc_byte_perm_S (w[33], w[32], selector); - w[36] = hc_byte_perm_S (w[32], w[31], selector); - w[35] = hc_byte_perm_S (w[31], w[30], selector); - w[34] = hc_byte_perm_S (w[30], w[29], selector); - w[33] = hc_byte_perm_S (w[29], w[28], selector); - w[32] = hc_byte_perm_S (w[28], w[27], selector); - w[31] = hc_byte_perm_S (w[27], w[26], selector); - w[30] = hc_byte_perm_S (w[26], w[25], selector); - w[29] = hc_byte_perm_S (w[25], w[24], selector); - w[28] = hc_byte_perm_S (w[24], w[23], selector); - w[27] = hc_byte_perm_S (w[23], w[22], selector); - w[26] = hc_byte_perm_S (w[22], w[21], selector); - w[25] = hc_byte_perm_S (w[21], w[20], selector); - w[24] = hc_byte_perm_S (w[20], w[19], selector); - w[23] = hc_byte_perm_S (w[19], w[18], selector); - w[22] = hc_byte_perm_S (w[18], w[17], selector); - w[21] = hc_byte_perm_S (w[17], w[16], selector); - w[20] = hc_byte_perm_S (w[16], w[15], selector); - w[19] = hc_byte_perm_S (w[15], w[14], selector); - w[18] = hc_byte_perm_S (w[14], w[13], selector); - w[17] = hc_byte_perm_S (w[13], w[12], selector); - w[16] = hc_byte_perm_S (w[12], w[11], selector); - w[15] = hc_byte_perm_S (w[11], w[10], selector); - w[14] = hc_byte_perm_S (w[10], w[ 9], selector); - w[13] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[12] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[11] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[10] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 9] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 8] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 7] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 6] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 5] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 4] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm_S (w[58], w[57], selector); - w[62] = hc_byte_perm_S (w[57], w[56], selector); - w[61] = hc_byte_perm_S (w[56], w[55], selector); - w[60] = hc_byte_perm_S (w[55], w[54], selector); - w[59] = hc_byte_perm_S (w[54], w[53], selector); - w[58] = hc_byte_perm_S (w[53], w[52], selector); - w[57] = hc_byte_perm_S (w[52], w[51], selector); - w[56] = hc_byte_perm_S (w[51], w[50], selector); - w[55] = hc_byte_perm_S (w[50], w[49], selector); - w[54] = hc_byte_perm_S (w[49], w[48], selector); - w[53] = hc_byte_perm_S (w[48], w[47], selector); - w[52] = hc_byte_perm_S (w[47], w[46], selector); - w[51] = hc_byte_perm_S (w[46], w[45], selector); - w[50] = hc_byte_perm_S (w[45], w[44], selector); - w[49] = hc_byte_perm_S (w[44], w[43], selector); - w[48] = hc_byte_perm_S (w[43], w[42], selector); - w[47] = hc_byte_perm_S (w[42], w[41], selector); - w[46] = hc_byte_perm_S (w[41], w[40], selector); - w[45] = hc_byte_perm_S (w[40], w[39], selector); - w[44] = hc_byte_perm_S (w[39], w[38], selector); - w[43] = hc_byte_perm_S (w[38], w[37], selector); - w[42] = hc_byte_perm_S (w[37], w[36], selector); - w[41] = hc_byte_perm_S (w[36], w[35], selector); - w[40] = hc_byte_perm_S (w[35], w[34], selector); - w[39] = hc_byte_perm_S (w[34], w[33], selector); - w[38] = hc_byte_perm_S (w[33], w[32], selector); - w[37] = hc_byte_perm_S (w[32], w[31], selector); - w[36] = hc_byte_perm_S (w[31], w[30], selector); - w[35] = hc_byte_perm_S (w[30], w[29], selector); - w[34] = hc_byte_perm_S (w[29], w[28], selector); - w[33] = hc_byte_perm_S (w[28], w[27], selector); - w[32] = hc_byte_perm_S (w[27], w[26], selector); - w[31] = hc_byte_perm_S (w[26], w[25], selector); - w[30] = hc_byte_perm_S (w[25], w[24], selector); - w[29] = hc_byte_perm_S (w[24], w[23], selector); - w[28] = hc_byte_perm_S (w[23], w[22], selector); - w[27] = hc_byte_perm_S (w[22], w[21], selector); - w[26] = hc_byte_perm_S (w[21], w[20], selector); - w[25] = hc_byte_perm_S (w[20], w[19], selector); - w[24] = hc_byte_perm_S (w[19], w[18], selector); - w[23] = hc_byte_perm_S (w[18], w[17], selector); - w[22] = hc_byte_perm_S (w[17], w[16], selector); - w[21] = hc_byte_perm_S (w[16], w[15], selector); - w[20] = hc_byte_perm_S (w[15], w[14], selector); - w[19] = hc_byte_perm_S (w[14], w[13], selector); - w[18] = hc_byte_perm_S (w[13], w[12], selector); - w[17] = hc_byte_perm_S (w[12], w[11], selector); - w[16] = hc_byte_perm_S (w[11], w[10], selector); - w[15] = hc_byte_perm_S (w[10], w[ 9], selector); - w[14] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[13] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[12] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[11] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[10] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 9] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 8] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 7] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 6] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 5] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm_S (w[57], w[56], selector); - w[62] = hc_byte_perm_S (w[56], w[55], selector); - w[61] = hc_byte_perm_S (w[55], w[54], selector); - w[60] = hc_byte_perm_S (w[54], w[53], selector); - w[59] = hc_byte_perm_S (w[53], w[52], selector); - w[58] = hc_byte_perm_S (w[52], w[51], selector); - w[57] = hc_byte_perm_S (w[51], w[50], selector); - w[56] = hc_byte_perm_S (w[50], w[49], selector); - w[55] = hc_byte_perm_S (w[49], w[48], selector); - w[54] = hc_byte_perm_S (w[48], w[47], selector); - w[53] = hc_byte_perm_S (w[47], w[46], selector); - w[52] = hc_byte_perm_S (w[46], w[45], selector); - w[51] = hc_byte_perm_S (w[45], w[44], selector); - w[50] = hc_byte_perm_S (w[44], w[43], selector); - w[49] = hc_byte_perm_S (w[43], w[42], selector); - w[48] = hc_byte_perm_S (w[42], w[41], selector); - w[47] = hc_byte_perm_S (w[41], w[40], selector); - w[46] = hc_byte_perm_S (w[40], w[39], selector); - w[45] = hc_byte_perm_S (w[39], w[38], selector); - w[44] = hc_byte_perm_S (w[38], w[37], selector); - w[43] = hc_byte_perm_S (w[37], w[36], selector); - w[42] = hc_byte_perm_S (w[36], w[35], selector); - w[41] = hc_byte_perm_S (w[35], w[34], selector); - w[40] = hc_byte_perm_S (w[34], w[33], selector); - w[39] = hc_byte_perm_S (w[33], w[32], selector); - w[38] = hc_byte_perm_S (w[32], w[31], selector); - w[37] = hc_byte_perm_S (w[31], w[30], selector); - w[36] = hc_byte_perm_S (w[30], w[29], selector); - w[35] = hc_byte_perm_S (w[29], w[28], selector); - w[34] = hc_byte_perm_S (w[28], w[27], selector); - w[33] = hc_byte_perm_S (w[27], w[26], selector); - w[32] = hc_byte_perm_S (w[26], w[25], selector); - w[31] = hc_byte_perm_S (w[25], w[24], selector); - w[30] = hc_byte_perm_S (w[24], w[23], selector); - w[29] = hc_byte_perm_S (w[23], w[22], selector); - w[28] = hc_byte_perm_S (w[22], w[21], selector); - w[27] = hc_byte_perm_S (w[21], w[20], selector); - w[26] = hc_byte_perm_S (w[20], w[19], selector); - w[25] = hc_byte_perm_S (w[19], w[18], selector); - w[24] = hc_byte_perm_S (w[18], w[17], selector); - w[23] = hc_byte_perm_S (w[17], w[16], selector); - w[22] = hc_byte_perm_S (w[16], w[15], selector); - w[21] = hc_byte_perm_S (w[15], w[14], selector); - w[20] = hc_byte_perm_S (w[14], w[13], selector); - w[19] = hc_byte_perm_S (w[13], w[12], selector); - w[18] = hc_byte_perm_S (w[12], w[11], selector); - w[17] = hc_byte_perm_S (w[11], w[10], selector); - w[16] = hc_byte_perm_S (w[10], w[ 9], selector); - w[15] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[14] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[13] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[12] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[11] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[10] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 9] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 8] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 7] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 6] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm_S (w[56], w[55], selector); - w[62] = hc_byte_perm_S (w[55], w[54], selector); - w[61] = hc_byte_perm_S (w[54], w[53], selector); - w[60] = hc_byte_perm_S (w[53], w[52], selector); - w[59] = hc_byte_perm_S (w[52], w[51], selector); - w[58] = hc_byte_perm_S (w[51], w[50], selector); - w[57] = hc_byte_perm_S (w[50], w[49], selector); - w[56] = hc_byte_perm_S (w[49], w[48], selector); - w[55] = hc_byte_perm_S (w[48], w[47], selector); - w[54] = hc_byte_perm_S (w[47], w[46], selector); - w[53] = hc_byte_perm_S (w[46], w[45], selector); - w[52] = hc_byte_perm_S (w[45], w[44], selector); - w[51] = hc_byte_perm_S (w[44], w[43], selector); - w[50] = hc_byte_perm_S (w[43], w[42], selector); - w[49] = hc_byte_perm_S (w[42], w[41], selector); - w[48] = hc_byte_perm_S (w[41], w[40], selector); - w[47] = hc_byte_perm_S (w[40], w[39], selector); - w[46] = hc_byte_perm_S (w[39], w[38], selector); - w[45] = hc_byte_perm_S (w[38], w[37], selector); - w[44] = hc_byte_perm_S (w[37], w[36], selector); - w[43] = hc_byte_perm_S (w[36], w[35], selector); - w[42] = hc_byte_perm_S (w[35], w[34], selector); - w[41] = hc_byte_perm_S (w[34], w[33], selector); - w[40] = hc_byte_perm_S (w[33], w[32], selector); - w[39] = hc_byte_perm_S (w[32], w[31], selector); - w[38] = hc_byte_perm_S (w[31], w[30], selector); - w[37] = hc_byte_perm_S (w[30], w[29], selector); - w[36] = hc_byte_perm_S (w[29], w[28], selector); - w[35] = hc_byte_perm_S (w[28], w[27], selector); - w[34] = hc_byte_perm_S (w[27], w[26], selector); - w[33] = hc_byte_perm_S (w[26], w[25], selector); - w[32] = hc_byte_perm_S (w[25], w[24], selector); - w[31] = hc_byte_perm_S (w[24], w[23], selector); - w[30] = hc_byte_perm_S (w[23], w[22], selector); - w[29] = hc_byte_perm_S (w[22], w[21], selector); - w[28] = hc_byte_perm_S (w[21], w[20], selector); - w[27] = hc_byte_perm_S (w[20], w[19], selector); - w[26] = hc_byte_perm_S (w[19], w[18], selector); - w[25] = hc_byte_perm_S (w[18], w[17], selector); - w[24] = hc_byte_perm_S (w[17], w[16], selector); - w[23] = hc_byte_perm_S (w[16], w[15], selector); - w[22] = hc_byte_perm_S (w[15], w[14], selector); - w[21] = hc_byte_perm_S (w[14], w[13], selector); - w[20] = hc_byte_perm_S (w[13], w[12], selector); - w[19] = hc_byte_perm_S (w[12], w[11], selector); - w[18] = hc_byte_perm_S (w[11], w[10], selector); - w[17] = hc_byte_perm_S (w[10], w[ 9], selector); - w[16] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[15] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[14] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[13] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[12] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[11] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[10] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 9] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 8] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 7] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm_S (w[55], w[54], selector); - w[62] = hc_byte_perm_S (w[54], w[53], selector); - w[61] = hc_byte_perm_S (w[53], w[52], selector); - w[60] = hc_byte_perm_S (w[52], w[51], selector); - w[59] = hc_byte_perm_S (w[51], w[50], selector); - w[58] = hc_byte_perm_S (w[50], w[49], selector); - w[57] = hc_byte_perm_S (w[49], w[48], selector); - w[56] = hc_byte_perm_S (w[48], w[47], selector); - w[55] = hc_byte_perm_S (w[47], w[46], selector); - w[54] = hc_byte_perm_S (w[46], w[45], selector); - w[53] = hc_byte_perm_S (w[45], w[44], selector); - w[52] = hc_byte_perm_S (w[44], w[43], selector); - w[51] = hc_byte_perm_S (w[43], w[42], selector); - w[50] = hc_byte_perm_S (w[42], w[41], selector); - w[49] = hc_byte_perm_S (w[41], w[40], selector); - w[48] = hc_byte_perm_S (w[40], w[39], selector); - w[47] = hc_byte_perm_S (w[39], w[38], selector); - w[46] = hc_byte_perm_S (w[38], w[37], selector); - w[45] = hc_byte_perm_S (w[37], w[36], selector); - w[44] = hc_byte_perm_S (w[36], w[35], selector); - w[43] = hc_byte_perm_S (w[35], w[34], selector); - w[42] = hc_byte_perm_S (w[34], w[33], selector); - w[41] = hc_byte_perm_S (w[33], w[32], selector); - w[40] = hc_byte_perm_S (w[32], w[31], selector); - w[39] = hc_byte_perm_S (w[31], w[30], selector); - w[38] = hc_byte_perm_S (w[30], w[29], selector); - w[37] = hc_byte_perm_S (w[29], w[28], selector); - w[36] = hc_byte_perm_S (w[28], w[27], selector); - w[35] = hc_byte_perm_S (w[27], w[26], selector); - w[34] = hc_byte_perm_S (w[26], w[25], selector); - w[33] = hc_byte_perm_S (w[25], w[24], selector); - w[32] = hc_byte_perm_S (w[24], w[23], selector); - w[31] = hc_byte_perm_S (w[23], w[22], selector); - w[30] = hc_byte_perm_S (w[22], w[21], selector); - w[29] = hc_byte_perm_S (w[21], w[20], selector); - w[28] = hc_byte_perm_S (w[20], w[19], selector); - w[27] = hc_byte_perm_S (w[19], w[18], selector); - w[26] = hc_byte_perm_S (w[18], w[17], selector); - w[25] = hc_byte_perm_S (w[17], w[16], selector); - w[24] = hc_byte_perm_S (w[16], w[15], selector); - w[23] = hc_byte_perm_S (w[15], w[14], selector); - w[22] = hc_byte_perm_S (w[14], w[13], selector); - w[21] = hc_byte_perm_S (w[13], w[12], selector); - w[20] = hc_byte_perm_S (w[12], w[11], selector); - w[19] = hc_byte_perm_S (w[11], w[10], selector); - w[18] = hc_byte_perm_S (w[10], w[ 9], selector); - w[17] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[16] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[15] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[14] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[13] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[12] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[11] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[10] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 9] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 8] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm_S (w[54], w[53], selector); - w[62] = hc_byte_perm_S (w[53], w[52], selector); - w[61] = hc_byte_perm_S (w[52], w[51], selector); - w[60] = hc_byte_perm_S (w[51], w[50], selector); - w[59] = hc_byte_perm_S (w[50], w[49], selector); - w[58] = hc_byte_perm_S (w[49], w[48], selector); - w[57] = hc_byte_perm_S (w[48], w[47], selector); - w[56] = hc_byte_perm_S (w[47], w[46], selector); - w[55] = hc_byte_perm_S (w[46], w[45], selector); - w[54] = hc_byte_perm_S (w[45], w[44], selector); - w[53] = hc_byte_perm_S (w[44], w[43], selector); - w[52] = hc_byte_perm_S (w[43], w[42], selector); - w[51] = hc_byte_perm_S (w[42], w[41], selector); - w[50] = hc_byte_perm_S (w[41], w[40], selector); - w[49] = hc_byte_perm_S (w[40], w[39], selector); - w[48] = hc_byte_perm_S (w[39], w[38], selector); - w[47] = hc_byte_perm_S (w[38], w[37], selector); - w[46] = hc_byte_perm_S (w[37], w[36], selector); - w[45] = hc_byte_perm_S (w[36], w[35], selector); - w[44] = hc_byte_perm_S (w[35], w[34], selector); - w[43] = hc_byte_perm_S (w[34], w[33], selector); - w[42] = hc_byte_perm_S (w[33], w[32], selector); - w[41] = hc_byte_perm_S (w[32], w[31], selector); - w[40] = hc_byte_perm_S (w[31], w[30], selector); - w[39] = hc_byte_perm_S (w[30], w[29], selector); - w[38] = hc_byte_perm_S (w[29], w[28], selector); - w[37] = hc_byte_perm_S (w[28], w[27], selector); - w[36] = hc_byte_perm_S (w[27], w[26], selector); - w[35] = hc_byte_perm_S (w[26], w[25], selector); - w[34] = hc_byte_perm_S (w[25], w[24], selector); - w[33] = hc_byte_perm_S (w[24], w[23], selector); - w[32] = hc_byte_perm_S (w[23], w[22], selector); - w[31] = hc_byte_perm_S (w[22], w[21], selector); - w[30] = hc_byte_perm_S (w[21], w[20], selector); - w[29] = hc_byte_perm_S (w[20], w[19], selector); - w[28] = hc_byte_perm_S (w[19], w[18], selector); - w[27] = hc_byte_perm_S (w[18], w[17], selector); - w[26] = hc_byte_perm_S (w[17], w[16], selector); - w[25] = hc_byte_perm_S (w[16], w[15], selector); - w[24] = hc_byte_perm_S (w[15], w[14], selector); - w[23] = hc_byte_perm_S (w[14], w[13], selector); - w[22] = hc_byte_perm_S (w[13], w[12], selector); - w[21] = hc_byte_perm_S (w[12], w[11], selector); - w[20] = hc_byte_perm_S (w[11], w[10], selector); - w[19] = hc_byte_perm_S (w[10], w[ 9], selector); - w[18] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[17] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[16] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[15] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[14] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[13] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[12] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[11] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[10] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 9] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm_S (w[53], w[52], selector); - w[62] = hc_byte_perm_S (w[52], w[51], selector); - w[61] = hc_byte_perm_S (w[51], w[50], selector); - w[60] = hc_byte_perm_S (w[50], w[49], selector); - w[59] = hc_byte_perm_S (w[49], w[48], selector); - w[58] = hc_byte_perm_S (w[48], w[47], selector); - w[57] = hc_byte_perm_S (w[47], w[46], selector); - w[56] = hc_byte_perm_S (w[46], w[45], selector); - w[55] = hc_byte_perm_S (w[45], w[44], selector); - w[54] = hc_byte_perm_S (w[44], w[43], selector); - w[53] = hc_byte_perm_S (w[43], w[42], selector); - w[52] = hc_byte_perm_S (w[42], w[41], selector); - w[51] = hc_byte_perm_S (w[41], w[40], selector); - w[50] = hc_byte_perm_S (w[40], w[39], selector); - w[49] = hc_byte_perm_S (w[39], w[38], selector); - w[48] = hc_byte_perm_S (w[38], w[37], selector); - w[47] = hc_byte_perm_S (w[37], w[36], selector); - w[46] = hc_byte_perm_S (w[36], w[35], selector); - w[45] = hc_byte_perm_S (w[35], w[34], selector); - w[44] = hc_byte_perm_S (w[34], w[33], selector); - w[43] = hc_byte_perm_S (w[33], w[32], selector); - w[42] = hc_byte_perm_S (w[32], w[31], selector); - w[41] = hc_byte_perm_S (w[31], w[30], selector); - w[40] = hc_byte_perm_S (w[30], w[29], selector); - w[39] = hc_byte_perm_S (w[29], w[28], selector); - w[38] = hc_byte_perm_S (w[28], w[27], selector); - w[37] = hc_byte_perm_S (w[27], w[26], selector); - w[36] = hc_byte_perm_S (w[26], w[25], selector); - w[35] = hc_byte_perm_S (w[25], w[24], selector); - w[34] = hc_byte_perm_S (w[24], w[23], selector); - w[33] = hc_byte_perm_S (w[23], w[22], selector); - w[32] = hc_byte_perm_S (w[22], w[21], selector); - w[31] = hc_byte_perm_S (w[21], w[20], selector); - w[30] = hc_byte_perm_S (w[20], w[19], selector); - w[29] = hc_byte_perm_S (w[19], w[18], selector); - w[28] = hc_byte_perm_S (w[18], w[17], selector); - w[27] = hc_byte_perm_S (w[17], w[16], selector); - w[26] = hc_byte_perm_S (w[16], w[15], selector); - w[25] = hc_byte_perm_S (w[15], w[14], selector); - w[24] = hc_byte_perm_S (w[14], w[13], selector); - w[23] = hc_byte_perm_S (w[13], w[12], selector); - w[22] = hc_byte_perm_S (w[12], w[11], selector); - w[21] = hc_byte_perm_S (w[11], w[10], selector); - w[20] = hc_byte_perm_S (w[10], w[ 9], selector); - w[19] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[18] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[17] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[16] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[15] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[14] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[13] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[12] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[11] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[10] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm_S (w[52], w[51], selector); - w[62] = hc_byte_perm_S (w[51], w[50], selector); - w[61] = hc_byte_perm_S (w[50], w[49], selector); - w[60] = hc_byte_perm_S (w[49], w[48], selector); - w[59] = hc_byte_perm_S (w[48], w[47], selector); - w[58] = hc_byte_perm_S (w[47], w[46], selector); - w[57] = hc_byte_perm_S (w[46], w[45], selector); - w[56] = hc_byte_perm_S (w[45], w[44], selector); - w[55] = hc_byte_perm_S (w[44], w[43], selector); - w[54] = hc_byte_perm_S (w[43], w[42], selector); - w[53] = hc_byte_perm_S (w[42], w[41], selector); - w[52] = hc_byte_perm_S (w[41], w[40], selector); - w[51] = hc_byte_perm_S (w[40], w[39], selector); - w[50] = hc_byte_perm_S (w[39], w[38], selector); - w[49] = hc_byte_perm_S (w[38], w[37], selector); - w[48] = hc_byte_perm_S (w[37], w[36], selector); - w[47] = hc_byte_perm_S (w[36], w[35], selector); - w[46] = hc_byte_perm_S (w[35], w[34], selector); - w[45] = hc_byte_perm_S (w[34], w[33], selector); - w[44] = hc_byte_perm_S (w[33], w[32], selector); - w[43] = hc_byte_perm_S (w[32], w[31], selector); - w[42] = hc_byte_perm_S (w[31], w[30], selector); - w[41] = hc_byte_perm_S (w[30], w[29], selector); - w[40] = hc_byte_perm_S (w[29], w[28], selector); - w[39] = hc_byte_perm_S (w[28], w[27], selector); - w[38] = hc_byte_perm_S (w[27], w[26], selector); - w[37] = hc_byte_perm_S (w[26], w[25], selector); - w[36] = hc_byte_perm_S (w[25], w[24], selector); - w[35] = hc_byte_perm_S (w[24], w[23], selector); - w[34] = hc_byte_perm_S (w[23], w[22], selector); - w[33] = hc_byte_perm_S (w[22], w[21], selector); - w[32] = hc_byte_perm_S (w[21], w[20], selector); - w[31] = hc_byte_perm_S (w[20], w[19], selector); - w[30] = hc_byte_perm_S (w[19], w[18], selector); - w[29] = hc_byte_perm_S (w[18], w[17], selector); - w[28] = hc_byte_perm_S (w[17], w[16], selector); - w[27] = hc_byte_perm_S (w[16], w[15], selector); - w[26] = hc_byte_perm_S (w[15], w[14], selector); - w[25] = hc_byte_perm_S (w[14], w[13], selector); - w[24] = hc_byte_perm_S (w[13], w[12], selector); - w[23] = hc_byte_perm_S (w[12], w[11], selector); - w[22] = hc_byte_perm_S (w[11], w[10], selector); - w[21] = hc_byte_perm_S (w[10], w[ 9], selector); - w[20] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[19] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[18] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[17] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[16] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[15] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[14] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[13] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[12] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[11] = hc_byte_perm_S (w[ 0], 0, selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm_S (w[51], w[50], selector); - w[62] = hc_byte_perm_S (w[50], w[49], selector); - w[61] = hc_byte_perm_S (w[49], w[48], selector); - w[60] = hc_byte_perm_S (w[48], w[47], selector); - w[59] = hc_byte_perm_S (w[47], w[46], selector); - w[58] = hc_byte_perm_S (w[46], w[45], selector); - w[57] = hc_byte_perm_S (w[45], w[44], selector); - w[56] = hc_byte_perm_S (w[44], w[43], selector); - w[55] = hc_byte_perm_S (w[43], w[42], selector); - w[54] = hc_byte_perm_S (w[42], w[41], selector); - w[53] = hc_byte_perm_S (w[41], w[40], selector); - w[52] = hc_byte_perm_S (w[40], w[39], selector); - w[51] = hc_byte_perm_S (w[39], w[38], selector); - w[50] = hc_byte_perm_S (w[38], w[37], selector); - w[49] = hc_byte_perm_S (w[37], w[36], selector); - w[48] = hc_byte_perm_S (w[36], w[35], selector); - w[47] = hc_byte_perm_S (w[35], w[34], selector); - w[46] = hc_byte_perm_S (w[34], w[33], selector); - w[45] = hc_byte_perm_S (w[33], w[32], selector); - w[44] = hc_byte_perm_S (w[32], w[31], selector); - w[43] = hc_byte_perm_S (w[31], w[30], selector); - w[42] = hc_byte_perm_S (w[30], w[29], selector); - w[41] = hc_byte_perm_S (w[29], w[28], selector); - w[40] = hc_byte_perm_S (w[28], w[27], selector); - w[39] = hc_byte_perm_S (w[27], w[26], selector); - w[38] = hc_byte_perm_S (w[26], w[25], selector); - w[37] = hc_byte_perm_S (w[25], w[24], selector); - w[36] = hc_byte_perm_S (w[24], w[23], selector); - w[35] = hc_byte_perm_S (w[23], w[22], selector); - w[34] = hc_byte_perm_S (w[22], w[21], selector); - w[33] = hc_byte_perm_S (w[21], w[20], selector); - w[32] = hc_byte_perm_S (w[20], w[19], selector); - w[31] = hc_byte_perm_S (w[19], w[18], selector); - w[30] = hc_byte_perm_S (w[18], w[17], selector); - w[29] = hc_byte_perm_S (w[17], w[16], selector); - w[28] = hc_byte_perm_S (w[16], w[15], selector); - w[27] = hc_byte_perm_S (w[15], w[14], selector); - w[26] = hc_byte_perm_S (w[14], w[13], selector); - w[25] = hc_byte_perm_S (w[13], w[12], selector); - w[24] = hc_byte_perm_S (w[12], w[11], selector); - w[23] = hc_byte_perm_S (w[11], w[10], selector); - w[22] = hc_byte_perm_S (w[10], w[ 9], selector); - w[21] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[20] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[19] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[18] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[17] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[16] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[15] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[14] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[13] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[12] = hc_byte_perm_S (w[ 0], 0, selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm_S (w[50], w[49], selector); - w[62] = hc_byte_perm_S (w[49], w[48], selector); - w[61] = hc_byte_perm_S (w[48], w[47], selector); - w[60] = hc_byte_perm_S (w[47], w[46], selector); - w[59] = hc_byte_perm_S (w[46], w[45], selector); - w[58] = hc_byte_perm_S (w[45], w[44], selector); - w[57] = hc_byte_perm_S (w[44], w[43], selector); - w[56] = hc_byte_perm_S (w[43], w[42], selector); - w[55] = hc_byte_perm_S (w[42], w[41], selector); - w[54] = hc_byte_perm_S (w[41], w[40], selector); - w[53] = hc_byte_perm_S (w[40], w[39], selector); - w[52] = hc_byte_perm_S (w[39], w[38], selector); - w[51] = hc_byte_perm_S (w[38], w[37], selector); - w[50] = hc_byte_perm_S (w[37], w[36], selector); - w[49] = hc_byte_perm_S (w[36], w[35], selector); - w[48] = hc_byte_perm_S (w[35], w[34], selector); - w[47] = hc_byte_perm_S (w[34], w[33], selector); - w[46] = hc_byte_perm_S (w[33], w[32], selector); - w[45] = hc_byte_perm_S (w[32], w[31], selector); - w[44] = hc_byte_perm_S (w[31], w[30], selector); - w[43] = hc_byte_perm_S (w[30], w[29], selector); - w[42] = hc_byte_perm_S (w[29], w[28], selector); - w[41] = hc_byte_perm_S (w[28], w[27], selector); - w[40] = hc_byte_perm_S (w[27], w[26], selector); - w[39] = hc_byte_perm_S (w[26], w[25], selector); - w[38] = hc_byte_perm_S (w[25], w[24], selector); - w[37] = hc_byte_perm_S (w[24], w[23], selector); - w[36] = hc_byte_perm_S (w[23], w[22], selector); - w[35] = hc_byte_perm_S (w[22], w[21], selector); - w[34] = hc_byte_perm_S (w[21], w[20], selector); - w[33] = hc_byte_perm_S (w[20], w[19], selector); - w[32] = hc_byte_perm_S (w[19], w[18], selector); - w[31] = hc_byte_perm_S (w[18], w[17], selector); - w[30] = hc_byte_perm_S (w[17], w[16], selector); - w[29] = hc_byte_perm_S (w[16], w[15], selector); - w[28] = hc_byte_perm_S (w[15], w[14], selector); - w[27] = hc_byte_perm_S (w[14], w[13], selector); - w[26] = hc_byte_perm_S (w[13], w[12], selector); - w[25] = hc_byte_perm_S (w[12], w[11], selector); - w[24] = hc_byte_perm_S (w[11], w[10], selector); - w[23] = hc_byte_perm_S (w[10], w[ 9], selector); - w[22] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[21] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[20] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[19] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[18] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[17] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[16] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[15] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[14] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[13] = hc_byte_perm_S (w[ 0], 0, selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm_S (w[49], w[48], selector); - w[62] = hc_byte_perm_S (w[48], w[47], selector); - w[61] = hc_byte_perm_S (w[47], w[46], selector); - w[60] = hc_byte_perm_S (w[46], w[45], selector); - w[59] = hc_byte_perm_S (w[45], w[44], selector); - w[58] = hc_byte_perm_S (w[44], w[43], selector); - w[57] = hc_byte_perm_S (w[43], w[42], selector); - w[56] = hc_byte_perm_S (w[42], w[41], selector); - w[55] = hc_byte_perm_S (w[41], w[40], selector); - w[54] = hc_byte_perm_S (w[40], w[39], selector); - w[53] = hc_byte_perm_S (w[39], w[38], selector); - w[52] = hc_byte_perm_S (w[38], w[37], selector); - w[51] = hc_byte_perm_S (w[37], w[36], selector); - w[50] = hc_byte_perm_S (w[36], w[35], selector); - w[49] = hc_byte_perm_S (w[35], w[34], selector); - w[48] = hc_byte_perm_S (w[34], w[33], selector); - w[47] = hc_byte_perm_S (w[33], w[32], selector); - w[46] = hc_byte_perm_S (w[32], w[31], selector); - w[45] = hc_byte_perm_S (w[31], w[30], selector); - w[44] = hc_byte_perm_S (w[30], w[29], selector); - w[43] = hc_byte_perm_S (w[29], w[28], selector); - w[42] = hc_byte_perm_S (w[28], w[27], selector); - w[41] = hc_byte_perm_S (w[27], w[26], selector); - w[40] = hc_byte_perm_S (w[26], w[25], selector); - w[39] = hc_byte_perm_S (w[25], w[24], selector); - w[38] = hc_byte_perm_S (w[24], w[23], selector); - w[37] = hc_byte_perm_S (w[23], w[22], selector); - w[36] = hc_byte_perm_S (w[22], w[21], selector); - w[35] = hc_byte_perm_S (w[21], w[20], selector); - w[34] = hc_byte_perm_S (w[20], w[19], selector); - w[33] = hc_byte_perm_S (w[19], w[18], selector); - w[32] = hc_byte_perm_S (w[18], w[17], selector); - w[31] = hc_byte_perm_S (w[17], w[16], selector); - w[30] = hc_byte_perm_S (w[16], w[15], selector); - w[29] = hc_byte_perm_S (w[15], w[14], selector); - w[28] = hc_byte_perm_S (w[14], w[13], selector); - w[27] = hc_byte_perm_S (w[13], w[12], selector); - w[26] = hc_byte_perm_S (w[12], w[11], selector); - w[25] = hc_byte_perm_S (w[11], w[10], selector); - w[24] = hc_byte_perm_S (w[10], w[ 9], selector); - w[23] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[22] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[21] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[20] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[19] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[18] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[17] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[16] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[15] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[14] = hc_byte_perm_S (w[ 0], 0, selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm_S (w[48], w[47], selector); - w[62] = hc_byte_perm_S (w[47], w[46], selector); - w[61] = hc_byte_perm_S (w[46], w[45], selector); - w[60] = hc_byte_perm_S (w[45], w[44], selector); - w[59] = hc_byte_perm_S (w[44], w[43], selector); - w[58] = hc_byte_perm_S (w[43], w[42], selector); - w[57] = hc_byte_perm_S (w[42], w[41], selector); - w[56] = hc_byte_perm_S (w[41], w[40], selector); - w[55] = hc_byte_perm_S (w[40], w[39], selector); - w[54] = hc_byte_perm_S (w[39], w[38], selector); - w[53] = hc_byte_perm_S (w[38], w[37], selector); - w[52] = hc_byte_perm_S (w[37], w[36], selector); - w[51] = hc_byte_perm_S (w[36], w[35], selector); - w[50] = hc_byte_perm_S (w[35], w[34], selector); - w[49] = hc_byte_perm_S (w[34], w[33], selector); - w[48] = hc_byte_perm_S (w[33], w[32], selector); - w[47] = hc_byte_perm_S (w[32], w[31], selector); - w[46] = hc_byte_perm_S (w[31], w[30], selector); - w[45] = hc_byte_perm_S (w[30], w[29], selector); - w[44] = hc_byte_perm_S (w[29], w[28], selector); - w[43] = hc_byte_perm_S (w[28], w[27], selector); - w[42] = hc_byte_perm_S (w[27], w[26], selector); - w[41] = hc_byte_perm_S (w[26], w[25], selector); - w[40] = hc_byte_perm_S (w[25], w[24], selector); - w[39] = hc_byte_perm_S (w[24], w[23], selector); - w[38] = hc_byte_perm_S (w[23], w[22], selector); - w[37] = hc_byte_perm_S (w[22], w[21], selector); - w[36] = hc_byte_perm_S (w[21], w[20], selector); - w[35] = hc_byte_perm_S (w[20], w[19], selector); - w[34] = hc_byte_perm_S (w[19], w[18], selector); - w[33] = hc_byte_perm_S (w[18], w[17], selector); - w[32] = hc_byte_perm_S (w[17], w[16], selector); - w[31] = hc_byte_perm_S (w[16], w[15], selector); - w[30] = hc_byte_perm_S (w[15], w[14], selector); - w[29] = hc_byte_perm_S (w[14], w[13], selector); - w[28] = hc_byte_perm_S (w[13], w[12], selector); - w[27] = hc_byte_perm_S (w[12], w[11], selector); - w[26] = hc_byte_perm_S (w[11], w[10], selector); - w[25] = hc_byte_perm_S (w[10], w[ 9], selector); - w[24] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[23] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[22] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[21] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[20] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[19] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[18] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[17] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[16] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[15] = hc_byte_perm_S (w[ 0], 0, selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm_S (w[47], w[46], selector); - w[62] = hc_byte_perm_S (w[46], w[45], selector); - w[61] = hc_byte_perm_S (w[45], w[44], selector); - w[60] = hc_byte_perm_S (w[44], w[43], selector); - w[59] = hc_byte_perm_S (w[43], w[42], selector); - w[58] = hc_byte_perm_S (w[42], w[41], selector); - w[57] = hc_byte_perm_S (w[41], w[40], selector); - w[56] = hc_byte_perm_S (w[40], w[39], selector); - w[55] = hc_byte_perm_S (w[39], w[38], selector); - w[54] = hc_byte_perm_S (w[38], w[37], selector); - w[53] = hc_byte_perm_S (w[37], w[36], selector); - w[52] = hc_byte_perm_S (w[36], w[35], selector); - w[51] = hc_byte_perm_S (w[35], w[34], selector); - w[50] = hc_byte_perm_S (w[34], w[33], selector); - w[49] = hc_byte_perm_S (w[33], w[32], selector); - w[48] = hc_byte_perm_S (w[32], w[31], selector); - w[47] = hc_byte_perm_S (w[31], w[30], selector); - w[46] = hc_byte_perm_S (w[30], w[29], selector); - w[45] = hc_byte_perm_S (w[29], w[28], selector); - w[44] = hc_byte_perm_S (w[28], w[27], selector); - w[43] = hc_byte_perm_S (w[27], w[26], selector); - w[42] = hc_byte_perm_S (w[26], w[25], selector); - w[41] = hc_byte_perm_S (w[25], w[24], selector); - w[40] = hc_byte_perm_S (w[24], w[23], selector); - w[39] = hc_byte_perm_S (w[23], w[22], selector); - w[38] = hc_byte_perm_S (w[22], w[21], selector); - w[37] = hc_byte_perm_S (w[21], w[20], selector); - w[36] = hc_byte_perm_S (w[20], w[19], selector); - w[35] = hc_byte_perm_S (w[19], w[18], selector); - w[34] = hc_byte_perm_S (w[18], w[17], selector); - w[33] = hc_byte_perm_S (w[17], w[16], selector); - w[32] = hc_byte_perm_S (w[16], w[15], selector); - w[31] = hc_byte_perm_S (w[15], w[14], selector); - w[30] = hc_byte_perm_S (w[14], w[13], selector); - w[29] = hc_byte_perm_S (w[13], w[12], selector); - w[28] = hc_byte_perm_S (w[12], w[11], selector); - w[27] = hc_byte_perm_S (w[11], w[10], selector); - w[26] = hc_byte_perm_S (w[10], w[ 9], selector); - w[25] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[24] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[23] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[22] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[21] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[20] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[19] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[18] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[17] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[16] = hc_byte_perm_S (w[ 0], 0, selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm_S (w[46], w[45], selector); - w[62] = hc_byte_perm_S (w[45], w[44], selector); - w[61] = hc_byte_perm_S (w[44], w[43], selector); - w[60] = hc_byte_perm_S (w[43], w[42], selector); - w[59] = hc_byte_perm_S (w[42], w[41], selector); - w[58] = hc_byte_perm_S (w[41], w[40], selector); - w[57] = hc_byte_perm_S (w[40], w[39], selector); - w[56] = hc_byte_perm_S (w[39], w[38], selector); - w[55] = hc_byte_perm_S (w[38], w[37], selector); - w[54] = hc_byte_perm_S (w[37], w[36], selector); - w[53] = hc_byte_perm_S (w[36], w[35], selector); - w[52] = hc_byte_perm_S (w[35], w[34], selector); - w[51] = hc_byte_perm_S (w[34], w[33], selector); - w[50] = hc_byte_perm_S (w[33], w[32], selector); - w[49] = hc_byte_perm_S (w[32], w[31], selector); - w[48] = hc_byte_perm_S (w[31], w[30], selector); - w[47] = hc_byte_perm_S (w[30], w[29], selector); - w[46] = hc_byte_perm_S (w[29], w[28], selector); - w[45] = hc_byte_perm_S (w[28], w[27], selector); - w[44] = hc_byte_perm_S (w[27], w[26], selector); - w[43] = hc_byte_perm_S (w[26], w[25], selector); - w[42] = hc_byte_perm_S (w[25], w[24], selector); - w[41] = hc_byte_perm_S (w[24], w[23], selector); - w[40] = hc_byte_perm_S (w[23], w[22], selector); - w[39] = hc_byte_perm_S (w[22], w[21], selector); - w[38] = hc_byte_perm_S (w[21], w[20], selector); - w[37] = hc_byte_perm_S (w[20], w[19], selector); - w[36] = hc_byte_perm_S (w[19], w[18], selector); - w[35] = hc_byte_perm_S (w[18], w[17], selector); - w[34] = hc_byte_perm_S (w[17], w[16], selector); - w[33] = hc_byte_perm_S (w[16], w[15], selector); - w[32] = hc_byte_perm_S (w[15], w[14], selector); - w[31] = hc_byte_perm_S (w[14], w[13], selector); - w[30] = hc_byte_perm_S (w[13], w[12], selector); - w[29] = hc_byte_perm_S (w[12], w[11], selector); - w[28] = hc_byte_perm_S (w[11], w[10], selector); - w[27] = hc_byte_perm_S (w[10], w[ 9], selector); - w[26] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[25] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[24] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[23] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[22] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[21] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[20] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[19] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[18] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[17] = hc_byte_perm_S (w[ 0], 0, selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm_S (w[45], w[44], selector); - w[62] = hc_byte_perm_S (w[44], w[43], selector); - w[61] = hc_byte_perm_S (w[43], w[42], selector); - w[60] = hc_byte_perm_S (w[42], w[41], selector); - w[59] = hc_byte_perm_S (w[41], w[40], selector); - w[58] = hc_byte_perm_S (w[40], w[39], selector); - w[57] = hc_byte_perm_S (w[39], w[38], selector); - w[56] = hc_byte_perm_S (w[38], w[37], selector); - w[55] = hc_byte_perm_S (w[37], w[36], selector); - w[54] = hc_byte_perm_S (w[36], w[35], selector); - w[53] = hc_byte_perm_S (w[35], w[34], selector); - w[52] = hc_byte_perm_S (w[34], w[33], selector); - w[51] = hc_byte_perm_S (w[33], w[32], selector); - w[50] = hc_byte_perm_S (w[32], w[31], selector); - w[49] = hc_byte_perm_S (w[31], w[30], selector); - w[48] = hc_byte_perm_S (w[30], w[29], selector); - w[47] = hc_byte_perm_S (w[29], w[28], selector); - w[46] = hc_byte_perm_S (w[28], w[27], selector); - w[45] = hc_byte_perm_S (w[27], w[26], selector); - w[44] = hc_byte_perm_S (w[26], w[25], selector); - w[43] = hc_byte_perm_S (w[25], w[24], selector); - w[42] = hc_byte_perm_S (w[24], w[23], selector); - w[41] = hc_byte_perm_S (w[23], w[22], selector); - w[40] = hc_byte_perm_S (w[22], w[21], selector); - w[39] = hc_byte_perm_S (w[21], w[20], selector); - w[38] = hc_byte_perm_S (w[20], w[19], selector); - w[37] = hc_byte_perm_S (w[19], w[18], selector); - w[36] = hc_byte_perm_S (w[18], w[17], selector); - w[35] = hc_byte_perm_S (w[17], w[16], selector); - w[34] = hc_byte_perm_S (w[16], w[15], selector); - w[33] = hc_byte_perm_S (w[15], w[14], selector); - w[32] = hc_byte_perm_S (w[14], w[13], selector); - w[31] = hc_byte_perm_S (w[13], w[12], selector); - w[30] = hc_byte_perm_S (w[12], w[11], selector); - w[29] = hc_byte_perm_S (w[11], w[10], selector); - w[28] = hc_byte_perm_S (w[10], w[ 9], selector); - w[27] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[26] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[25] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[24] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[23] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[22] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[21] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[20] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[19] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[18] = hc_byte_perm_S (w[ 0], 0, selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm_S (w[44], w[43], selector); - w[62] = hc_byte_perm_S (w[43], w[42], selector); - w[61] = hc_byte_perm_S (w[42], w[41], selector); - w[60] = hc_byte_perm_S (w[41], w[40], selector); - w[59] = hc_byte_perm_S (w[40], w[39], selector); - w[58] = hc_byte_perm_S (w[39], w[38], selector); - w[57] = hc_byte_perm_S (w[38], w[37], selector); - w[56] = hc_byte_perm_S (w[37], w[36], selector); - w[55] = hc_byte_perm_S (w[36], w[35], selector); - w[54] = hc_byte_perm_S (w[35], w[34], selector); - w[53] = hc_byte_perm_S (w[34], w[33], selector); - w[52] = hc_byte_perm_S (w[33], w[32], selector); - w[51] = hc_byte_perm_S (w[32], w[31], selector); - w[50] = hc_byte_perm_S (w[31], w[30], selector); - w[49] = hc_byte_perm_S (w[30], w[29], selector); - w[48] = hc_byte_perm_S (w[29], w[28], selector); - w[47] = hc_byte_perm_S (w[28], w[27], selector); - w[46] = hc_byte_perm_S (w[27], w[26], selector); - w[45] = hc_byte_perm_S (w[26], w[25], selector); - w[44] = hc_byte_perm_S (w[25], w[24], selector); - w[43] = hc_byte_perm_S (w[24], w[23], selector); - w[42] = hc_byte_perm_S (w[23], w[22], selector); - w[41] = hc_byte_perm_S (w[22], w[21], selector); - w[40] = hc_byte_perm_S (w[21], w[20], selector); - w[39] = hc_byte_perm_S (w[20], w[19], selector); - w[38] = hc_byte_perm_S (w[19], w[18], selector); - w[37] = hc_byte_perm_S (w[18], w[17], selector); - w[36] = hc_byte_perm_S (w[17], w[16], selector); - w[35] = hc_byte_perm_S (w[16], w[15], selector); - w[34] = hc_byte_perm_S (w[15], w[14], selector); - w[33] = hc_byte_perm_S (w[14], w[13], selector); - w[32] = hc_byte_perm_S (w[13], w[12], selector); - w[31] = hc_byte_perm_S (w[12], w[11], selector); - w[30] = hc_byte_perm_S (w[11], w[10], selector); - w[29] = hc_byte_perm_S (w[10], w[ 9], selector); - w[28] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[27] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[26] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[25] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[24] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[23] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[22] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[21] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[20] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[19] = hc_byte_perm_S (w[ 0], 0, selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm_S (w[43], w[42], selector); - w[62] = hc_byte_perm_S (w[42], w[41], selector); - w[61] = hc_byte_perm_S (w[41], w[40], selector); - w[60] = hc_byte_perm_S (w[40], w[39], selector); - w[59] = hc_byte_perm_S (w[39], w[38], selector); - w[58] = hc_byte_perm_S (w[38], w[37], selector); - w[57] = hc_byte_perm_S (w[37], w[36], selector); - w[56] = hc_byte_perm_S (w[36], w[35], selector); - w[55] = hc_byte_perm_S (w[35], w[34], selector); - w[54] = hc_byte_perm_S (w[34], w[33], selector); - w[53] = hc_byte_perm_S (w[33], w[32], selector); - w[52] = hc_byte_perm_S (w[32], w[31], selector); - w[51] = hc_byte_perm_S (w[31], w[30], selector); - w[50] = hc_byte_perm_S (w[30], w[29], selector); - w[49] = hc_byte_perm_S (w[29], w[28], selector); - w[48] = hc_byte_perm_S (w[28], w[27], selector); - w[47] = hc_byte_perm_S (w[27], w[26], selector); - w[46] = hc_byte_perm_S (w[26], w[25], selector); - w[45] = hc_byte_perm_S (w[25], w[24], selector); - w[44] = hc_byte_perm_S (w[24], w[23], selector); - w[43] = hc_byte_perm_S (w[23], w[22], selector); - w[42] = hc_byte_perm_S (w[22], w[21], selector); - w[41] = hc_byte_perm_S (w[21], w[20], selector); - w[40] = hc_byte_perm_S (w[20], w[19], selector); - w[39] = hc_byte_perm_S (w[19], w[18], selector); - w[38] = hc_byte_perm_S (w[18], w[17], selector); - w[37] = hc_byte_perm_S (w[17], w[16], selector); - w[36] = hc_byte_perm_S (w[16], w[15], selector); - w[35] = hc_byte_perm_S (w[15], w[14], selector); - w[34] = hc_byte_perm_S (w[14], w[13], selector); - w[33] = hc_byte_perm_S (w[13], w[12], selector); - w[32] = hc_byte_perm_S (w[12], w[11], selector); - w[31] = hc_byte_perm_S (w[11], w[10], selector); - w[30] = hc_byte_perm_S (w[10], w[ 9], selector); - w[29] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[28] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[27] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[26] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[25] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[24] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[23] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[22] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[21] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[20] = hc_byte_perm_S (w[ 0], 0, selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm_S (w[42], w[41], selector); - w[62] = hc_byte_perm_S (w[41], w[40], selector); - w[61] = hc_byte_perm_S (w[40], w[39], selector); - w[60] = hc_byte_perm_S (w[39], w[38], selector); - w[59] = hc_byte_perm_S (w[38], w[37], selector); - w[58] = hc_byte_perm_S (w[37], w[36], selector); - w[57] = hc_byte_perm_S (w[36], w[35], selector); - w[56] = hc_byte_perm_S (w[35], w[34], selector); - w[55] = hc_byte_perm_S (w[34], w[33], selector); - w[54] = hc_byte_perm_S (w[33], w[32], selector); - w[53] = hc_byte_perm_S (w[32], w[31], selector); - w[52] = hc_byte_perm_S (w[31], w[30], selector); - w[51] = hc_byte_perm_S (w[30], w[29], selector); - w[50] = hc_byte_perm_S (w[29], w[28], selector); - w[49] = hc_byte_perm_S (w[28], w[27], selector); - w[48] = hc_byte_perm_S (w[27], w[26], selector); - w[47] = hc_byte_perm_S (w[26], w[25], selector); - w[46] = hc_byte_perm_S (w[25], w[24], selector); - w[45] = hc_byte_perm_S (w[24], w[23], selector); - w[44] = hc_byte_perm_S (w[23], w[22], selector); - w[43] = hc_byte_perm_S (w[22], w[21], selector); - w[42] = hc_byte_perm_S (w[21], w[20], selector); - w[41] = hc_byte_perm_S (w[20], w[19], selector); - w[40] = hc_byte_perm_S (w[19], w[18], selector); - w[39] = hc_byte_perm_S (w[18], w[17], selector); - w[38] = hc_byte_perm_S (w[17], w[16], selector); - w[37] = hc_byte_perm_S (w[16], w[15], selector); - w[36] = hc_byte_perm_S (w[15], w[14], selector); - w[35] = hc_byte_perm_S (w[14], w[13], selector); - w[34] = hc_byte_perm_S (w[13], w[12], selector); - w[33] = hc_byte_perm_S (w[12], w[11], selector); - w[32] = hc_byte_perm_S (w[11], w[10], selector); - w[31] = hc_byte_perm_S (w[10], w[ 9], selector); - w[30] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[29] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[28] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[27] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[26] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[25] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[24] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[23] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[22] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[21] = hc_byte_perm_S (w[ 0], 0, selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm_S (w[41], w[40], selector); - w[62] = hc_byte_perm_S (w[40], w[39], selector); - w[61] = hc_byte_perm_S (w[39], w[38], selector); - w[60] = hc_byte_perm_S (w[38], w[37], selector); - w[59] = hc_byte_perm_S (w[37], w[36], selector); - w[58] = hc_byte_perm_S (w[36], w[35], selector); - w[57] = hc_byte_perm_S (w[35], w[34], selector); - w[56] = hc_byte_perm_S (w[34], w[33], selector); - w[55] = hc_byte_perm_S (w[33], w[32], selector); - w[54] = hc_byte_perm_S (w[32], w[31], selector); - w[53] = hc_byte_perm_S (w[31], w[30], selector); - w[52] = hc_byte_perm_S (w[30], w[29], selector); - w[51] = hc_byte_perm_S (w[29], w[28], selector); - w[50] = hc_byte_perm_S (w[28], w[27], selector); - w[49] = hc_byte_perm_S (w[27], w[26], selector); - w[48] = hc_byte_perm_S (w[26], w[25], selector); - w[47] = hc_byte_perm_S (w[25], w[24], selector); - w[46] = hc_byte_perm_S (w[24], w[23], selector); - w[45] = hc_byte_perm_S (w[23], w[22], selector); - w[44] = hc_byte_perm_S (w[22], w[21], selector); - w[43] = hc_byte_perm_S (w[21], w[20], selector); - w[42] = hc_byte_perm_S (w[20], w[19], selector); - w[41] = hc_byte_perm_S (w[19], w[18], selector); - w[40] = hc_byte_perm_S (w[18], w[17], selector); - w[39] = hc_byte_perm_S (w[17], w[16], selector); - w[38] = hc_byte_perm_S (w[16], w[15], selector); - w[37] = hc_byte_perm_S (w[15], w[14], selector); - w[36] = hc_byte_perm_S (w[14], w[13], selector); - w[35] = hc_byte_perm_S (w[13], w[12], selector); - w[34] = hc_byte_perm_S (w[12], w[11], selector); - w[33] = hc_byte_perm_S (w[11], w[10], selector); - w[32] = hc_byte_perm_S (w[10], w[ 9], selector); - w[31] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[30] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[29] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[28] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[27] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[26] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[25] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[24] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[23] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[22] = hc_byte_perm_S (w[ 0], 0, selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm_S (w[40], w[39], selector); - w[62] = hc_byte_perm_S (w[39], w[38], selector); - w[61] = hc_byte_perm_S (w[38], w[37], selector); - w[60] = hc_byte_perm_S (w[37], w[36], selector); - w[59] = hc_byte_perm_S (w[36], w[35], selector); - w[58] = hc_byte_perm_S (w[35], w[34], selector); - w[57] = hc_byte_perm_S (w[34], w[33], selector); - w[56] = hc_byte_perm_S (w[33], w[32], selector); - w[55] = hc_byte_perm_S (w[32], w[31], selector); - w[54] = hc_byte_perm_S (w[31], w[30], selector); - w[53] = hc_byte_perm_S (w[30], w[29], selector); - w[52] = hc_byte_perm_S (w[29], w[28], selector); - w[51] = hc_byte_perm_S (w[28], w[27], selector); - w[50] = hc_byte_perm_S (w[27], w[26], selector); - w[49] = hc_byte_perm_S (w[26], w[25], selector); - w[48] = hc_byte_perm_S (w[25], w[24], selector); - w[47] = hc_byte_perm_S (w[24], w[23], selector); - w[46] = hc_byte_perm_S (w[23], w[22], selector); - w[45] = hc_byte_perm_S (w[22], w[21], selector); - w[44] = hc_byte_perm_S (w[21], w[20], selector); - w[43] = hc_byte_perm_S (w[20], w[19], selector); - w[42] = hc_byte_perm_S (w[19], w[18], selector); - w[41] = hc_byte_perm_S (w[18], w[17], selector); - w[40] = hc_byte_perm_S (w[17], w[16], selector); - w[39] = hc_byte_perm_S (w[16], w[15], selector); - w[38] = hc_byte_perm_S (w[15], w[14], selector); - w[37] = hc_byte_perm_S (w[14], w[13], selector); - w[36] = hc_byte_perm_S (w[13], w[12], selector); - w[35] = hc_byte_perm_S (w[12], w[11], selector); - w[34] = hc_byte_perm_S (w[11], w[10], selector); - w[33] = hc_byte_perm_S (w[10], w[ 9], selector); - w[32] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[31] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[30] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[29] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[28] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[27] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[26] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[25] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[24] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[23] = hc_byte_perm_S (w[ 0], 0, selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm_S (w[39], w[38], selector); - w[62] = hc_byte_perm_S (w[38], w[37], selector); - w[61] = hc_byte_perm_S (w[37], w[36], selector); - w[60] = hc_byte_perm_S (w[36], w[35], selector); - w[59] = hc_byte_perm_S (w[35], w[34], selector); - w[58] = hc_byte_perm_S (w[34], w[33], selector); - w[57] = hc_byte_perm_S (w[33], w[32], selector); - w[56] = hc_byte_perm_S (w[32], w[31], selector); - w[55] = hc_byte_perm_S (w[31], w[30], selector); - w[54] = hc_byte_perm_S (w[30], w[29], selector); - w[53] = hc_byte_perm_S (w[29], w[28], selector); - w[52] = hc_byte_perm_S (w[28], w[27], selector); - w[51] = hc_byte_perm_S (w[27], w[26], selector); - w[50] = hc_byte_perm_S (w[26], w[25], selector); - w[49] = hc_byte_perm_S (w[25], w[24], selector); - w[48] = hc_byte_perm_S (w[24], w[23], selector); - w[47] = hc_byte_perm_S (w[23], w[22], selector); - w[46] = hc_byte_perm_S (w[22], w[21], selector); - w[45] = hc_byte_perm_S (w[21], w[20], selector); - w[44] = hc_byte_perm_S (w[20], w[19], selector); - w[43] = hc_byte_perm_S (w[19], w[18], selector); - w[42] = hc_byte_perm_S (w[18], w[17], selector); - w[41] = hc_byte_perm_S (w[17], w[16], selector); - w[40] = hc_byte_perm_S (w[16], w[15], selector); - w[39] = hc_byte_perm_S (w[15], w[14], selector); - w[38] = hc_byte_perm_S (w[14], w[13], selector); - w[37] = hc_byte_perm_S (w[13], w[12], selector); - w[36] = hc_byte_perm_S (w[12], w[11], selector); - w[35] = hc_byte_perm_S (w[11], w[10], selector); - w[34] = hc_byte_perm_S (w[10], w[ 9], selector); - w[33] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[32] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[31] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[30] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[29] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[28] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[27] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[26] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[25] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[24] = hc_byte_perm_S (w[ 0], 0, selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm_S (w[38], w[37], selector); - w[62] = hc_byte_perm_S (w[37], w[36], selector); - w[61] = hc_byte_perm_S (w[36], w[35], selector); - w[60] = hc_byte_perm_S (w[35], w[34], selector); - w[59] = hc_byte_perm_S (w[34], w[33], selector); - w[58] = hc_byte_perm_S (w[33], w[32], selector); - w[57] = hc_byte_perm_S (w[32], w[31], selector); - w[56] = hc_byte_perm_S (w[31], w[30], selector); - w[55] = hc_byte_perm_S (w[30], w[29], selector); - w[54] = hc_byte_perm_S (w[29], w[28], selector); - w[53] = hc_byte_perm_S (w[28], w[27], selector); - w[52] = hc_byte_perm_S (w[27], w[26], selector); - w[51] = hc_byte_perm_S (w[26], w[25], selector); - w[50] = hc_byte_perm_S (w[25], w[24], selector); - w[49] = hc_byte_perm_S (w[24], w[23], selector); - w[48] = hc_byte_perm_S (w[23], w[22], selector); - w[47] = hc_byte_perm_S (w[22], w[21], selector); - w[46] = hc_byte_perm_S (w[21], w[20], selector); - w[45] = hc_byte_perm_S (w[20], w[19], selector); - w[44] = hc_byte_perm_S (w[19], w[18], selector); - w[43] = hc_byte_perm_S (w[18], w[17], selector); - w[42] = hc_byte_perm_S (w[17], w[16], selector); - w[41] = hc_byte_perm_S (w[16], w[15], selector); - w[40] = hc_byte_perm_S (w[15], w[14], selector); - w[39] = hc_byte_perm_S (w[14], w[13], selector); - w[38] = hc_byte_perm_S (w[13], w[12], selector); - w[37] = hc_byte_perm_S (w[12], w[11], selector); - w[36] = hc_byte_perm_S (w[11], w[10], selector); - w[35] = hc_byte_perm_S (w[10], w[ 9], selector); - w[34] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[33] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[32] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[31] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[30] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[29] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[28] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[27] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[26] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[25] = hc_byte_perm_S (w[ 0], 0, selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm_S (w[37], w[36], selector); - w[62] = hc_byte_perm_S (w[36], w[35], selector); - w[61] = hc_byte_perm_S (w[35], w[34], selector); - w[60] = hc_byte_perm_S (w[34], w[33], selector); - w[59] = hc_byte_perm_S (w[33], w[32], selector); - w[58] = hc_byte_perm_S (w[32], w[31], selector); - w[57] = hc_byte_perm_S (w[31], w[30], selector); - w[56] = hc_byte_perm_S (w[30], w[29], selector); - w[55] = hc_byte_perm_S (w[29], w[28], selector); - w[54] = hc_byte_perm_S (w[28], w[27], selector); - w[53] = hc_byte_perm_S (w[27], w[26], selector); - w[52] = hc_byte_perm_S (w[26], w[25], selector); - w[51] = hc_byte_perm_S (w[25], w[24], selector); - w[50] = hc_byte_perm_S (w[24], w[23], selector); - w[49] = hc_byte_perm_S (w[23], w[22], selector); - w[48] = hc_byte_perm_S (w[22], w[21], selector); - w[47] = hc_byte_perm_S (w[21], w[20], selector); - w[46] = hc_byte_perm_S (w[20], w[19], selector); - w[45] = hc_byte_perm_S (w[19], w[18], selector); - w[44] = hc_byte_perm_S (w[18], w[17], selector); - w[43] = hc_byte_perm_S (w[17], w[16], selector); - w[42] = hc_byte_perm_S (w[16], w[15], selector); - w[41] = hc_byte_perm_S (w[15], w[14], selector); - w[40] = hc_byte_perm_S (w[14], w[13], selector); - w[39] = hc_byte_perm_S (w[13], w[12], selector); - w[38] = hc_byte_perm_S (w[12], w[11], selector); - w[37] = hc_byte_perm_S (w[11], w[10], selector); - w[36] = hc_byte_perm_S (w[10], w[ 9], selector); - w[35] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[34] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[33] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[32] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[31] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[30] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[29] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[28] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[27] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[26] = hc_byte_perm_S (w[ 0], 0, selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm_S (w[36], w[35], selector); - w[62] = hc_byte_perm_S (w[35], w[34], selector); - w[61] = hc_byte_perm_S (w[34], w[33], selector); - w[60] = hc_byte_perm_S (w[33], w[32], selector); - w[59] = hc_byte_perm_S (w[32], w[31], selector); - w[58] = hc_byte_perm_S (w[31], w[30], selector); - w[57] = hc_byte_perm_S (w[30], w[29], selector); - w[56] = hc_byte_perm_S (w[29], w[28], selector); - w[55] = hc_byte_perm_S (w[28], w[27], selector); - w[54] = hc_byte_perm_S (w[27], w[26], selector); - w[53] = hc_byte_perm_S (w[26], w[25], selector); - w[52] = hc_byte_perm_S (w[25], w[24], selector); - w[51] = hc_byte_perm_S (w[24], w[23], selector); - w[50] = hc_byte_perm_S (w[23], w[22], selector); - w[49] = hc_byte_perm_S (w[22], w[21], selector); - w[48] = hc_byte_perm_S (w[21], w[20], selector); - w[47] = hc_byte_perm_S (w[20], w[19], selector); - w[46] = hc_byte_perm_S (w[19], w[18], selector); - w[45] = hc_byte_perm_S (w[18], w[17], selector); - w[44] = hc_byte_perm_S (w[17], w[16], selector); - w[43] = hc_byte_perm_S (w[16], w[15], selector); - w[42] = hc_byte_perm_S (w[15], w[14], selector); - w[41] = hc_byte_perm_S (w[14], w[13], selector); - w[40] = hc_byte_perm_S (w[13], w[12], selector); - w[39] = hc_byte_perm_S (w[12], w[11], selector); - w[38] = hc_byte_perm_S (w[11], w[10], selector); - w[37] = hc_byte_perm_S (w[10], w[ 9], selector); - w[36] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[35] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[34] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[33] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[32] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[31] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[30] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[29] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[28] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[27] = hc_byte_perm_S (w[ 0], 0, selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm_S (w[35], w[34], selector); - w[62] = hc_byte_perm_S (w[34], w[33], selector); - w[61] = hc_byte_perm_S (w[33], w[32], selector); - w[60] = hc_byte_perm_S (w[32], w[31], selector); - w[59] = hc_byte_perm_S (w[31], w[30], selector); - w[58] = hc_byte_perm_S (w[30], w[29], selector); - w[57] = hc_byte_perm_S (w[29], w[28], selector); - w[56] = hc_byte_perm_S (w[28], w[27], selector); - w[55] = hc_byte_perm_S (w[27], w[26], selector); - w[54] = hc_byte_perm_S (w[26], w[25], selector); - w[53] = hc_byte_perm_S (w[25], w[24], selector); - w[52] = hc_byte_perm_S (w[24], w[23], selector); - w[51] = hc_byte_perm_S (w[23], w[22], selector); - w[50] = hc_byte_perm_S (w[22], w[21], selector); - w[49] = hc_byte_perm_S (w[21], w[20], selector); - w[48] = hc_byte_perm_S (w[20], w[19], selector); - w[47] = hc_byte_perm_S (w[19], w[18], selector); - w[46] = hc_byte_perm_S (w[18], w[17], selector); - w[45] = hc_byte_perm_S (w[17], w[16], selector); - w[44] = hc_byte_perm_S (w[16], w[15], selector); - w[43] = hc_byte_perm_S (w[15], w[14], selector); - w[42] = hc_byte_perm_S (w[14], w[13], selector); - w[41] = hc_byte_perm_S (w[13], w[12], selector); - w[40] = hc_byte_perm_S (w[12], w[11], selector); - w[39] = hc_byte_perm_S (w[11], w[10], selector); - w[38] = hc_byte_perm_S (w[10], w[ 9], selector); - w[37] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[36] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[35] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[34] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[33] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[32] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[31] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[30] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[29] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[28] = hc_byte_perm_S (w[ 0], 0, selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm_S (w[34], w[33], selector); - w[62] = hc_byte_perm_S (w[33], w[32], selector); - w[61] = hc_byte_perm_S (w[32], w[31], selector); - w[60] = hc_byte_perm_S (w[31], w[30], selector); - w[59] = hc_byte_perm_S (w[30], w[29], selector); - w[58] = hc_byte_perm_S (w[29], w[28], selector); - w[57] = hc_byte_perm_S (w[28], w[27], selector); - w[56] = hc_byte_perm_S (w[27], w[26], selector); - w[55] = hc_byte_perm_S (w[26], w[25], selector); - w[54] = hc_byte_perm_S (w[25], w[24], selector); - w[53] = hc_byte_perm_S (w[24], w[23], selector); - w[52] = hc_byte_perm_S (w[23], w[22], selector); - w[51] = hc_byte_perm_S (w[22], w[21], selector); - w[50] = hc_byte_perm_S (w[21], w[20], selector); - w[49] = hc_byte_perm_S (w[20], w[19], selector); - w[48] = hc_byte_perm_S (w[19], w[18], selector); - w[47] = hc_byte_perm_S (w[18], w[17], selector); - w[46] = hc_byte_perm_S (w[17], w[16], selector); - w[45] = hc_byte_perm_S (w[16], w[15], selector); - w[44] = hc_byte_perm_S (w[15], w[14], selector); - w[43] = hc_byte_perm_S (w[14], w[13], selector); - w[42] = hc_byte_perm_S (w[13], w[12], selector); - w[41] = hc_byte_perm_S (w[12], w[11], selector); - w[40] = hc_byte_perm_S (w[11], w[10], selector); - w[39] = hc_byte_perm_S (w[10], w[ 9], selector); - w[38] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[37] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[36] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[35] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[34] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[33] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[32] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[31] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[30] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[29] = hc_byte_perm_S (w[ 0], 0, selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm_S (w[33], w[32], selector); - w[62] = hc_byte_perm_S (w[32], w[31], selector); - w[61] = hc_byte_perm_S (w[31], w[30], selector); - w[60] = hc_byte_perm_S (w[30], w[29], selector); - w[59] = hc_byte_perm_S (w[29], w[28], selector); - w[58] = hc_byte_perm_S (w[28], w[27], selector); - w[57] = hc_byte_perm_S (w[27], w[26], selector); - w[56] = hc_byte_perm_S (w[26], w[25], selector); - w[55] = hc_byte_perm_S (w[25], w[24], selector); - w[54] = hc_byte_perm_S (w[24], w[23], selector); - w[53] = hc_byte_perm_S (w[23], w[22], selector); - w[52] = hc_byte_perm_S (w[22], w[21], selector); - w[51] = hc_byte_perm_S (w[21], w[20], selector); - w[50] = hc_byte_perm_S (w[20], w[19], selector); - w[49] = hc_byte_perm_S (w[19], w[18], selector); - w[48] = hc_byte_perm_S (w[18], w[17], selector); - w[47] = hc_byte_perm_S (w[17], w[16], selector); - w[46] = hc_byte_perm_S (w[16], w[15], selector); - w[45] = hc_byte_perm_S (w[15], w[14], selector); - w[44] = hc_byte_perm_S (w[14], w[13], selector); - w[43] = hc_byte_perm_S (w[13], w[12], selector); - w[42] = hc_byte_perm_S (w[12], w[11], selector); - w[41] = hc_byte_perm_S (w[11], w[10], selector); - w[40] = hc_byte_perm_S (w[10], w[ 9], selector); - w[39] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[38] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[37] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[36] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[35] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[34] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[33] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[32] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[31] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[30] = hc_byte_perm_S (w[ 0], 0, selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm_S (w[32], w[31], selector); - w[62] = hc_byte_perm_S (w[31], w[30], selector); - w[61] = hc_byte_perm_S (w[30], w[29], selector); - w[60] = hc_byte_perm_S (w[29], w[28], selector); - w[59] = hc_byte_perm_S (w[28], w[27], selector); - w[58] = hc_byte_perm_S (w[27], w[26], selector); - w[57] = hc_byte_perm_S (w[26], w[25], selector); - w[56] = hc_byte_perm_S (w[25], w[24], selector); - w[55] = hc_byte_perm_S (w[24], w[23], selector); - w[54] = hc_byte_perm_S (w[23], w[22], selector); - w[53] = hc_byte_perm_S (w[22], w[21], selector); - w[52] = hc_byte_perm_S (w[21], w[20], selector); - w[51] = hc_byte_perm_S (w[20], w[19], selector); - w[50] = hc_byte_perm_S (w[19], w[18], selector); - w[49] = hc_byte_perm_S (w[18], w[17], selector); - w[48] = hc_byte_perm_S (w[17], w[16], selector); - w[47] = hc_byte_perm_S (w[16], w[15], selector); - w[46] = hc_byte_perm_S (w[15], w[14], selector); - w[45] = hc_byte_perm_S (w[14], w[13], selector); - w[44] = hc_byte_perm_S (w[13], w[12], selector); - w[43] = hc_byte_perm_S (w[12], w[11], selector); - w[42] = hc_byte_perm_S (w[11], w[10], selector); - w[41] = hc_byte_perm_S (w[10], w[ 9], selector); - w[40] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[39] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[38] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[37] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[36] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[35] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[34] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[33] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[32] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[31] = hc_byte_perm_S (w[ 0], 0, selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm_S (w[31], w[30], selector); - w[62] = hc_byte_perm_S (w[30], w[29], selector); - w[61] = hc_byte_perm_S (w[29], w[28], selector); - w[60] = hc_byte_perm_S (w[28], w[27], selector); - w[59] = hc_byte_perm_S (w[27], w[26], selector); - w[58] = hc_byte_perm_S (w[26], w[25], selector); - w[57] = hc_byte_perm_S (w[25], w[24], selector); - w[56] = hc_byte_perm_S (w[24], w[23], selector); - w[55] = hc_byte_perm_S (w[23], w[22], selector); - w[54] = hc_byte_perm_S (w[22], w[21], selector); - w[53] = hc_byte_perm_S (w[21], w[20], selector); - w[52] = hc_byte_perm_S (w[20], w[19], selector); - w[51] = hc_byte_perm_S (w[19], w[18], selector); - w[50] = hc_byte_perm_S (w[18], w[17], selector); - w[49] = hc_byte_perm_S (w[17], w[16], selector); - w[48] = hc_byte_perm_S (w[16], w[15], selector); - w[47] = hc_byte_perm_S (w[15], w[14], selector); - w[46] = hc_byte_perm_S (w[14], w[13], selector); - w[45] = hc_byte_perm_S (w[13], w[12], selector); - w[44] = hc_byte_perm_S (w[12], w[11], selector); - w[43] = hc_byte_perm_S (w[11], w[10], selector); - w[42] = hc_byte_perm_S (w[10], w[ 9], selector); - w[41] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[40] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[39] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[38] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[37] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[36] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[35] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[34] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[33] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[32] = hc_byte_perm_S (w[ 0], 0, selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm_S (w[30], w[29], selector); - w[62] = hc_byte_perm_S (w[29], w[28], selector); - w[61] = hc_byte_perm_S (w[28], w[27], selector); - w[60] = hc_byte_perm_S (w[27], w[26], selector); - w[59] = hc_byte_perm_S (w[26], w[25], selector); - w[58] = hc_byte_perm_S (w[25], w[24], selector); - w[57] = hc_byte_perm_S (w[24], w[23], selector); - w[56] = hc_byte_perm_S (w[23], w[22], selector); - w[55] = hc_byte_perm_S (w[22], w[21], selector); - w[54] = hc_byte_perm_S (w[21], w[20], selector); - w[53] = hc_byte_perm_S (w[20], w[19], selector); - w[52] = hc_byte_perm_S (w[19], w[18], selector); - w[51] = hc_byte_perm_S (w[18], w[17], selector); - w[50] = hc_byte_perm_S (w[17], w[16], selector); - w[49] = hc_byte_perm_S (w[16], w[15], selector); - w[48] = hc_byte_perm_S (w[15], w[14], selector); - w[47] = hc_byte_perm_S (w[14], w[13], selector); - w[46] = hc_byte_perm_S (w[13], w[12], selector); - w[45] = hc_byte_perm_S (w[12], w[11], selector); - w[44] = hc_byte_perm_S (w[11], w[10], selector); - w[43] = hc_byte_perm_S (w[10], w[ 9], selector); - w[42] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[41] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[40] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[39] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[38] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[37] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[36] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[35] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[34] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[33] = hc_byte_perm_S (w[ 0], 0, selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm_S (w[29], w[28], selector); - w[62] = hc_byte_perm_S (w[28], w[27], selector); - w[61] = hc_byte_perm_S (w[27], w[26], selector); - w[60] = hc_byte_perm_S (w[26], w[25], selector); - w[59] = hc_byte_perm_S (w[25], w[24], selector); - w[58] = hc_byte_perm_S (w[24], w[23], selector); - w[57] = hc_byte_perm_S (w[23], w[22], selector); - w[56] = hc_byte_perm_S (w[22], w[21], selector); - w[55] = hc_byte_perm_S (w[21], w[20], selector); - w[54] = hc_byte_perm_S (w[20], w[19], selector); - w[53] = hc_byte_perm_S (w[19], w[18], selector); - w[52] = hc_byte_perm_S (w[18], w[17], selector); - w[51] = hc_byte_perm_S (w[17], w[16], selector); - w[50] = hc_byte_perm_S (w[16], w[15], selector); - w[49] = hc_byte_perm_S (w[15], w[14], selector); - w[48] = hc_byte_perm_S (w[14], w[13], selector); - w[47] = hc_byte_perm_S (w[13], w[12], selector); - w[46] = hc_byte_perm_S (w[12], w[11], selector); - w[45] = hc_byte_perm_S (w[11], w[10], selector); - w[44] = hc_byte_perm_S (w[10], w[ 9], selector); - w[43] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[42] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[41] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[40] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[39] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[38] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[37] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[36] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[35] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[34] = hc_byte_perm_S (w[ 0], 0, selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm_S (w[28], w[27], selector); - w[62] = hc_byte_perm_S (w[27], w[26], selector); - w[61] = hc_byte_perm_S (w[26], w[25], selector); - w[60] = hc_byte_perm_S (w[25], w[24], selector); - w[59] = hc_byte_perm_S (w[24], w[23], selector); - w[58] = hc_byte_perm_S (w[23], w[22], selector); - w[57] = hc_byte_perm_S (w[22], w[21], selector); - w[56] = hc_byte_perm_S (w[21], w[20], selector); - w[55] = hc_byte_perm_S (w[20], w[19], selector); - w[54] = hc_byte_perm_S (w[19], w[18], selector); - w[53] = hc_byte_perm_S (w[18], w[17], selector); - w[52] = hc_byte_perm_S (w[17], w[16], selector); - w[51] = hc_byte_perm_S (w[16], w[15], selector); - w[50] = hc_byte_perm_S (w[15], w[14], selector); - w[49] = hc_byte_perm_S (w[14], w[13], selector); - w[48] = hc_byte_perm_S (w[13], w[12], selector); - w[47] = hc_byte_perm_S (w[12], w[11], selector); - w[46] = hc_byte_perm_S (w[11], w[10], selector); - w[45] = hc_byte_perm_S (w[10], w[ 9], selector); - w[44] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[43] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[42] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[41] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[40] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[39] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[38] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[37] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[36] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[35] = hc_byte_perm_S (w[ 0], 0, selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm_S (w[27], w[26], selector); - w[62] = hc_byte_perm_S (w[26], w[25], selector); - w[61] = hc_byte_perm_S (w[25], w[24], selector); - w[60] = hc_byte_perm_S (w[24], w[23], selector); - w[59] = hc_byte_perm_S (w[23], w[22], selector); - w[58] = hc_byte_perm_S (w[22], w[21], selector); - w[57] = hc_byte_perm_S (w[21], w[20], selector); - w[56] = hc_byte_perm_S (w[20], w[19], selector); - w[55] = hc_byte_perm_S (w[19], w[18], selector); - w[54] = hc_byte_perm_S (w[18], w[17], selector); - w[53] = hc_byte_perm_S (w[17], w[16], selector); - w[52] = hc_byte_perm_S (w[16], w[15], selector); - w[51] = hc_byte_perm_S (w[15], w[14], selector); - w[50] = hc_byte_perm_S (w[14], w[13], selector); - w[49] = hc_byte_perm_S (w[13], w[12], selector); - w[48] = hc_byte_perm_S (w[12], w[11], selector); - w[47] = hc_byte_perm_S (w[11], w[10], selector); - w[46] = hc_byte_perm_S (w[10], w[ 9], selector); - w[45] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[44] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[43] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[42] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[41] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[40] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[39] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[38] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[37] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[36] = hc_byte_perm_S (w[ 0], 0, selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm_S (w[26], w[25], selector); - w[62] = hc_byte_perm_S (w[25], w[24], selector); - w[61] = hc_byte_perm_S (w[24], w[23], selector); - w[60] = hc_byte_perm_S (w[23], w[22], selector); - w[59] = hc_byte_perm_S (w[22], w[21], selector); - w[58] = hc_byte_perm_S (w[21], w[20], selector); - w[57] = hc_byte_perm_S (w[20], w[19], selector); - w[56] = hc_byte_perm_S (w[19], w[18], selector); - w[55] = hc_byte_perm_S (w[18], w[17], selector); - w[54] = hc_byte_perm_S (w[17], w[16], selector); - w[53] = hc_byte_perm_S (w[16], w[15], selector); - w[52] = hc_byte_perm_S (w[15], w[14], selector); - w[51] = hc_byte_perm_S (w[14], w[13], selector); - w[50] = hc_byte_perm_S (w[13], w[12], selector); - w[49] = hc_byte_perm_S (w[12], w[11], selector); - w[48] = hc_byte_perm_S (w[11], w[10], selector); - w[47] = hc_byte_perm_S (w[10], w[ 9], selector); - w[46] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[45] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[44] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[43] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[42] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[41] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[40] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[39] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[38] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[37] = hc_byte_perm_S (w[ 0], 0, selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm_S (w[25], w[24], selector); - w[62] = hc_byte_perm_S (w[24], w[23], selector); - w[61] = hc_byte_perm_S (w[23], w[22], selector); - w[60] = hc_byte_perm_S (w[22], w[21], selector); - w[59] = hc_byte_perm_S (w[21], w[20], selector); - w[58] = hc_byte_perm_S (w[20], w[19], selector); - w[57] = hc_byte_perm_S (w[19], w[18], selector); - w[56] = hc_byte_perm_S (w[18], w[17], selector); - w[55] = hc_byte_perm_S (w[17], w[16], selector); - w[54] = hc_byte_perm_S (w[16], w[15], selector); - w[53] = hc_byte_perm_S (w[15], w[14], selector); - w[52] = hc_byte_perm_S (w[14], w[13], selector); - w[51] = hc_byte_perm_S (w[13], w[12], selector); - w[50] = hc_byte_perm_S (w[12], w[11], selector); - w[49] = hc_byte_perm_S (w[11], w[10], selector); - w[48] = hc_byte_perm_S (w[10], w[ 9], selector); - w[47] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[46] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[45] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[44] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[43] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[42] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[41] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[40] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[39] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[38] = hc_byte_perm_S (w[ 0], 0, selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm_S (w[24], w[23], selector); - w[62] = hc_byte_perm_S (w[23], w[22], selector); - w[61] = hc_byte_perm_S (w[22], w[21], selector); - w[60] = hc_byte_perm_S (w[21], w[20], selector); - w[59] = hc_byte_perm_S (w[20], w[19], selector); - w[58] = hc_byte_perm_S (w[19], w[18], selector); - w[57] = hc_byte_perm_S (w[18], w[17], selector); - w[56] = hc_byte_perm_S (w[17], w[16], selector); - w[55] = hc_byte_perm_S (w[16], w[15], selector); - w[54] = hc_byte_perm_S (w[15], w[14], selector); - w[53] = hc_byte_perm_S (w[14], w[13], selector); - w[52] = hc_byte_perm_S (w[13], w[12], selector); - w[51] = hc_byte_perm_S (w[12], w[11], selector); - w[50] = hc_byte_perm_S (w[11], w[10], selector); - w[49] = hc_byte_perm_S (w[10], w[ 9], selector); - w[48] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[47] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[46] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[45] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[44] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[43] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[42] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[41] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[40] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[39] = hc_byte_perm_S (w[ 0], 0, selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm_S (w[23], w[22], selector); - w[62] = hc_byte_perm_S (w[22], w[21], selector); - w[61] = hc_byte_perm_S (w[21], w[20], selector); - w[60] = hc_byte_perm_S (w[20], w[19], selector); - w[59] = hc_byte_perm_S (w[19], w[18], selector); - w[58] = hc_byte_perm_S (w[18], w[17], selector); - w[57] = hc_byte_perm_S (w[17], w[16], selector); - w[56] = hc_byte_perm_S (w[16], w[15], selector); - w[55] = hc_byte_perm_S (w[15], w[14], selector); - w[54] = hc_byte_perm_S (w[14], w[13], selector); - w[53] = hc_byte_perm_S (w[13], w[12], selector); - w[52] = hc_byte_perm_S (w[12], w[11], selector); - w[51] = hc_byte_perm_S (w[11], w[10], selector); - w[50] = hc_byte_perm_S (w[10], w[ 9], selector); - w[49] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[48] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[47] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[46] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[45] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[44] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[43] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[42] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[41] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[40] = hc_byte_perm_S (w[ 0], 0, selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm_S (w[22], w[21], selector); - w[62] = hc_byte_perm_S (w[21], w[20], selector); - w[61] = hc_byte_perm_S (w[20], w[19], selector); - w[60] = hc_byte_perm_S (w[19], w[18], selector); - w[59] = hc_byte_perm_S (w[18], w[17], selector); - w[58] = hc_byte_perm_S (w[17], w[16], selector); - w[57] = hc_byte_perm_S (w[16], w[15], selector); - w[56] = hc_byte_perm_S (w[15], w[14], selector); - w[55] = hc_byte_perm_S (w[14], w[13], selector); - w[54] = hc_byte_perm_S (w[13], w[12], selector); - w[53] = hc_byte_perm_S (w[12], w[11], selector); - w[52] = hc_byte_perm_S (w[11], w[10], selector); - w[51] = hc_byte_perm_S (w[10], w[ 9], selector); - w[50] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[49] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[48] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[47] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[46] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[45] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[44] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[43] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[42] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[41] = hc_byte_perm_S (w[ 0], 0, selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm_S (w[21], w[20], selector); - w[62] = hc_byte_perm_S (w[20], w[19], selector); - w[61] = hc_byte_perm_S (w[19], w[18], selector); - w[60] = hc_byte_perm_S (w[18], w[17], selector); - w[59] = hc_byte_perm_S (w[17], w[16], selector); - w[58] = hc_byte_perm_S (w[16], w[15], selector); - w[57] = hc_byte_perm_S (w[15], w[14], selector); - w[56] = hc_byte_perm_S (w[14], w[13], selector); - w[55] = hc_byte_perm_S (w[13], w[12], selector); - w[54] = hc_byte_perm_S (w[12], w[11], selector); - w[53] = hc_byte_perm_S (w[11], w[10], selector); - w[52] = hc_byte_perm_S (w[10], w[ 9], selector); - w[51] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[50] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[49] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[48] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[47] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[46] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[45] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[44] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[43] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[42] = hc_byte_perm_S (w[ 0], 0, selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm_S (w[20], w[19], selector); - w[62] = hc_byte_perm_S (w[19], w[18], selector); - w[61] = hc_byte_perm_S (w[18], w[17], selector); - w[60] = hc_byte_perm_S (w[17], w[16], selector); - w[59] = hc_byte_perm_S (w[16], w[15], selector); - w[58] = hc_byte_perm_S (w[15], w[14], selector); - w[57] = hc_byte_perm_S (w[14], w[13], selector); - w[56] = hc_byte_perm_S (w[13], w[12], selector); - w[55] = hc_byte_perm_S (w[12], w[11], selector); - w[54] = hc_byte_perm_S (w[11], w[10], selector); - w[53] = hc_byte_perm_S (w[10], w[ 9], selector); - w[52] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[51] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[50] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[49] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[48] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[47] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[46] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[45] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[44] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[43] = hc_byte_perm_S (w[ 0], 0, selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm_S (w[19], w[18], selector); - w[62] = hc_byte_perm_S (w[18], w[17], selector); - w[61] = hc_byte_perm_S (w[17], w[16], selector); - w[60] = hc_byte_perm_S (w[16], w[15], selector); - w[59] = hc_byte_perm_S (w[15], w[14], selector); - w[58] = hc_byte_perm_S (w[14], w[13], selector); - w[57] = hc_byte_perm_S (w[13], w[12], selector); - w[56] = hc_byte_perm_S (w[12], w[11], selector); - w[55] = hc_byte_perm_S (w[11], w[10], selector); - w[54] = hc_byte_perm_S (w[10], w[ 9], selector); - w[53] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[52] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[51] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[50] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[49] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[48] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[47] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[46] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[45] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[44] = hc_byte_perm_S (w[ 0], 0, selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm_S (w[18], w[17], selector); - w[62] = hc_byte_perm_S (w[17], w[16], selector); - w[61] = hc_byte_perm_S (w[16], w[15], selector); - w[60] = hc_byte_perm_S (w[15], w[14], selector); - w[59] = hc_byte_perm_S (w[14], w[13], selector); - w[58] = hc_byte_perm_S (w[13], w[12], selector); - w[57] = hc_byte_perm_S (w[12], w[11], selector); - w[56] = hc_byte_perm_S (w[11], w[10], selector); - w[55] = hc_byte_perm_S (w[10], w[ 9], selector); - w[54] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[53] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[52] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[51] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[50] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[49] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[48] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[47] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[46] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[45] = hc_byte_perm_S (w[ 0], 0, selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm_S (w[17], w[16], selector); - w[62] = hc_byte_perm_S (w[16], w[15], selector); - w[61] = hc_byte_perm_S (w[15], w[14], selector); - w[60] = hc_byte_perm_S (w[14], w[13], selector); - w[59] = hc_byte_perm_S (w[13], w[12], selector); - w[58] = hc_byte_perm_S (w[12], w[11], selector); - w[57] = hc_byte_perm_S (w[11], w[10], selector); - w[56] = hc_byte_perm_S (w[10], w[ 9], selector); - w[55] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[54] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[53] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[52] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[51] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[50] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[49] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[48] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[47] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[46] = hc_byte_perm_S (w[ 0], 0, selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm_S (w[16], w[15], selector); - w[62] = hc_byte_perm_S (w[15], w[14], selector); - w[61] = hc_byte_perm_S (w[14], w[13], selector); - w[60] = hc_byte_perm_S (w[13], w[12], selector); - w[59] = hc_byte_perm_S (w[12], w[11], selector); - w[58] = hc_byte_perm_S (w[11], w[10], selector); - w[57] = hc_byte_perm_S (w[10], w[ 9], selector); - w[56] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[55] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[54] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[53] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[52] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[51] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[50] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[49] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[48] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[47] = hc_byte_perm_S (w[ 0], 0, selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm_S (w[15], w[14], selector); - w[62] = hc_byte_perm_S (w[14], w[13], selector); - w[61] = hc_byte_perm_S (w[13], w[12], selector); - w[60] = hc_byte_perm_S (w[12], w[11], selector); - w[59] = hc_byte_perm_S (w[11], w[10], selector); - w[58] = hc_byte_perm_S (w[10], w[ 9], selector); - w[57] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[56] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[55] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[54] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[53] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[52] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[51] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[50] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[49] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[48] = hc_byte_perm_S (w[ 0], 0, selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm_S (w[14], w[13], selector); - w[62] = hc_byte_perm_S (w[13], w[12], selector); - w[61] = hc_byte_perm_S (w[12], w[11], selector); - w[60] = hc_byte_perm_S (w[11], w[10], selector); - w[59] = hc_byte_perm_S (w[10], w[ 9], selector); - w[58] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[57] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[56] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[55] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[54] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[53] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[52] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[51] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[50] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[49] = hc_byte_perm_S (w[ 0], 0, selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm_S (w[13], w[12], selector); - w[62] = hc_byte_perm_S (w[12], w[11], selector); - w[61] = hc_byte_perm_S (w[11], w[10], selector); - w[60] = hc_byte_perm_S (w[10], w[ 9], selector); - w[59] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[58] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[57] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[56] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[55] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[54] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[53] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[52] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[51] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[50] = hc_byte_perm_S (w[ 0], 0, selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm_S (w[12], w[11], selector); - w[62] = hc_byte_perm_S (w[11], w[10], selector); - w[61] = hc_byte_perm_S (w[10], w[ 9], selector); - w[60] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[59] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[58] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[57] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[56] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[55] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[54] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[53] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[52] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[51] = hc_byte_perm_S (w[ 0], 0, selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm_S (w[11], w[10], selector); - w[62] = hc_byte_perm_S (w[10], w[ 9], selector); - w[61] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[60] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[59] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[58] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[57] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[56] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[55] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[54] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[53] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[52] = hc_byte_perm_S (w[ 0], 0, selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm_S (w[10], w[ 9], selector); - w[62] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[61] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[60] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[59] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[58] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[57] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[56] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[55] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[54] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[53] = hc_byte_perm_S (w[ 0], 0, selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[62] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[61] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[60] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[59] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[58] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[57] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[56] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[55] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[54] = hc_byte_perm_S (w[ 0], 0, selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[62] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[61] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[60] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[59] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[58] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[57] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[56] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[55] = hc_byte_perm_S (w[ 0], 0, selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[62] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[61] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[60] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[59] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[58] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[57] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[56] = hc_byte_perm_S (w[ 0], 0, selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[62] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[61] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[60] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[59] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[58] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[57] = hc_byte_perm_S (w[ 0], 0, selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[62] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[61] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[60] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[59] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[58] = hc_byte_perm_S (w[ 0], 0, selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[62] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[61] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[60] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[59] = hc_byte_perm_S (w[ 0], 0, selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[62] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[61] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[60] = hc_byte_perm_S (w[ 0], 0, selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[62] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[61] = hc_byte_perm_S (w[ 0], 0, selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[62] = hc_byte_perm_S (w[ 0], 0, selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm_S (w[ 0], 0, selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } /** diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl index b205b18a7..9ec941b10 100644 --- a/OpenCL/inc_hash_blake2b.cl +++ b/OpenCL/inc_hash_blake2b.cl @@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a) return out.v64; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv64_t in; @@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a) return out.v64; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv64_t in; diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl index 99d4389fb..d1c2e3716 100644 --- a/OpenCL/inc_hash_blake2s.cl +++ b/OpenCL/inc_hash_blake2s.cl @@ -77,7 +77,7 @@ DECLSPEC u32 blake2s_rot08_S (const u32 a) return out.v32; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv32_t in; diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index 5a8d04f4c..7e4308229 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -781,7 +781,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 src_r00 = src_r0[0]; const u32 src_r01 = src_r0[1]; const u32 src_r02 = src_r0[2]; @@ -882,123 +881,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P s0 = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - const u32 src_r00 = src_r0[0]; - const u32 src_r01 = src_r0[1]; - const u32 src_r02 = src_r0[2]; - const u32 src_r03 = src_r0[3]; - const u32 src_r10 = src_r1[0]; - const u32 src_r11 = src_r1[1]; - const u32 src_r12 = src_r1[2]; - const u32 src_r13 = src_r1[3]; - - switch (offset_switch) - { - case 0: - s7 = hc_byte_perm_S (src_r12, src_r13, selector); - s6 = hc_byte_perm_S (src_r11, src_r12, selector); - s5 = hc_byte_perm_S (src_r10, src_r11, selector); - s4 = hc_byte_perm_S (src_r03, src_r10, selector); - s3 = hc_byte_perm_S (src_r02, src_r03, selector); - s2 = hc_byte_perm_S (src_r01, src_r02, selector); - s1 = hc_byte_perm_S (src_r00, src_r01, selector); - s0 = hc_byte_perm_S ( 0, src_r00, selector); - break; - - case 1: - s7 = hc_byte_perm_S (src_r11, src_r12, selector); - s6 = hc_byte_perm_S (src_r10, src_r11, selector); - s5 = hc_byte_perm_S (src_r03, src_r10, selector); - s4 = hc_byte_perm_S (src_r02, src_r03, selector); - s3 = hc_byte_perm_S (src_r01, src_r02, selector); - s2 = hc_byte_perm_S (src_r00, src_r01, selector); - s1 = hc_byte_perm_S ( 0, src_r00, selector); - s0 = 0; - break; - - case 2: - s7 = hc_byte_perm_S (src_r10, src_r11, selector); - s6 = hc_byte_perm_S (src_r03, src_r10, selector); - s5 = hc_byte_perm_S (src_r02, src_r03, selector); - s4 = hc_byte_perm_S (src_r01, src_r02, selector); - s3 = hc_byte_perm_S (src_r00, src_r01, selector); - s2 = hc_byte_perm_S ( 0, src_r00, selector); - s1 = 0; - s0 = 0; - break; - - case 3: - s7 = hc_byte_perm_S (src_r03, src_r10, selector); - s6 = hc_byte_perm_S (src_r02, src_r03, selector); - s5 = hc_byte_perm_S (src_r01, src_r02, selector); - s4 = hc_byte_perm_S (src_r00, src_r01, selector); - s3 = hc_byte_perm_S ( 0, src_r00, selector); - s2 = 0; - s1 = 0; - s0 = 0; - - break; - - case 4: - s7 = hc_byte_perm_S (src_r02, src_r03, selector); - s6 = hc_byte_perm_S (src_r01, src_r02, selector); - s5 = hc_byte_perm_S (src_r00, src_r01, selector); - s4 = hc_byte_perm_S ( 0, src_r00, selector); - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 5: - s7 = hc_byte_perm_S (src_r01, src_r02, selector); - s6 = hc_byte_perm_S (src_r00, src_r01, selector); - s5 = hc_byte_perm_S ( 0, src_r00, selector); - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 6: - s7 = hc_byte_perm_S (src_r00, src_r01, selector); - s6 = hc_byte_perm_S ( 0, src_r00, selector); - s5 = 0; - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 7: - s7 = hc_byte_perm_S ( 0, src_r00, selector); - s6 = 0; - s5 = 0; - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - } - #endif buf0[0] = src_l0[0] | s0; buf0[1] = src_l0[1] | s1; diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 40414cbb2..3f4da2fa8 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -121,10 +121,6 @@ using namespace metal; #define IS_GENERIC #endif -#if defined IS_AMD && HAS_VPERM == 1 -#define IS_ROCM -#endif - #define LOCAL_MEM_TYPE_LOCAL 1 #define LOCAL_MEM_TYPE_GLOBAL 2 @@ -159,7 +155,7 @@ using namespace metal; #elif defined IS_CUDA #define DECLSPEC __device__ #elif defined IS_HIP -#define DECLSPEC __device__ +#define DECLSPEC __device__ HC_INLINE #else #define DECLSPEC #endif @@ -190,11 +186,6 @@ using namespace metal; #define USE_ROTATE #endif -#ifdef IS_ROCM -#define USE_BITSELECT -#define USE_ROTATE -#endif - #ifdef IS_INTEL_SDK #ifdef IS_CPU //#define USE_BITSELECT diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index d7b7f57d2..e91fdee6d 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -32,43 +32,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); const u32 div = offset / 4; @@ -145,45 +118,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); const u32 div = offset / 4; @@ -258,35 +203,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index d113243e1..47ec5ba37 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -31,44 +31,17 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif - + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); + const u32 div = offset / 4; switch (div) @@ -144,45 +117,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); const u32 div = offset / 4; @@ -257,35 +202,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 37bbcf883..6465f1457 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -231,47 +231,18 @@ DECLSPEC void append_salt (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp4; u32 tmp5; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = append[4]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - tmp5 = hc_bytealign (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = append[4]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - tmp5 = hc_byte_perm (in4, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); + tmp5 = hc_bytealign_S (in4, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index 39865927c..cdbd5179a 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -28,43 +28,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); const u32 div = offset / 4; @@ -140,47 +113,18 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp2; u32 tmp3; u32 tmp4; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif - + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); + const u32 div = offset / 4; switch (div) @@ -254,35 +198,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index 1a3ea413c..a2db229c8 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -45,30 +45,11 @@ DECLSPEC u32 memcat16 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); switch (offset / 4) { @@ -172,30 +153,11 @@ DECLSPEC u32 memcat16c (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); u32 carry[4] = { 0 }; @@ -336,32 +298,12 @@ DECLSPEC u32 memcat16s (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, in4, offset); - const u32 tmp5 = hc_bytealign_be (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); + const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); switch (offset / 4) { @@ -477,32 +419,12 @@ DECLSPEC u32 memcat16sc (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS con u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, in4, offset); - const u32 tmp5 = hc_bytealign_be (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); + const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); u32 carry[5] = { 0 }; @@ -784,30 +706,11 @@ DECLSPEC u32 memcat20 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif switch (offset / 4) { @@ -950,30 +853,11 @@ DECLSPEC u32 memcat20_x80 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c u32 in3 = append[3]; u32 in4 = 0x80000000; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - #endif switch (offset / 4) { @@ -1116,32 +1000,12 @@ DECLSPEC u32 memcat24 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif switch (offset / 4) { diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index b665dbbf3..7a958394a 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -234,34 +234,13 @@ DECLSPEC void make_sc (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_ u32 i; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] - | hc_bytealign_be (bl[0], 0, pm4); - for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be (bl[i], bl[i - 1], pm4); - sc[idx++] = hc_bytealign_be (sc[0], bl[i - 1], pm4); - for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be (sc[i], sc[i - 1], pm4); - sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8)); - #endif - - for (i = 0; i < pd; i++) sc[idx++] = pw[i]; - sc[idx++] = pw[i] - | hc_byte_perm ( 0, bl[0], selector); - for (i = 1; i < bd; i++) sc[idx++] = hc_byte_perm (bl[i - 1], bl[i], selector); - sc[idx++] = hc_byte_perm (bl[i - 1], sc[0], selector); - for (i = 1; i < 4; i++) sc[idx++] = hc_byte_perm (sc[i - 1], sc[i], selector); - sc[idx++] = hc_byte_perm (sc[i - 1], 0, selector); - #endif + | hc_bytealign_be_S (bl[0], 0, pm4); + for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be_S (bl[i], bl[i - 1], pm4); + sc[idx++] = hc_bytealign_be_S (sc[0], bl[i - 1], pm4); + for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be_S (sc[i], sc[i - 1], pm4); + sc[idx++] = hc_bytealign_be_S ( 0, sc[i - 1], pm4); } } @@ -272,27 +251,10 @@ DECLSPEC void make_pt_with_offset (PRIVATE_AS u32 *pt, const u32 offset, LOCAL_A const u32 om = m % 4; const u32 od = m / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); - pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); - pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); - pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8)); - #endif - pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector); - pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector); - pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector); - pt[3] = hc_byte_perm (sc[od + 3], sc[od + 4], selector); - #endif + pt[0] = hc_bytealign_be_S (sc[od + 1], sc[od + 0], om); + pt[1] = hc_bytealign_be_S (sc[od + 2], sc[od + 1], om); + pt[2] = hc_bytealign_be_S (sc[od + 3], sc[od + 2], om); + pt[3] = hc_bytealign_be_S (sc[od + 4], sc[od + 3], om); } DECLSPEC void make_w_with_offset (PRIVATE_AS ctx_t *ctx, const u32 W_len, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len, PRIVATE_AS u32 *iv, PRIVATE_AS const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 85e59a4da..44d6e3ff7 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -42,24 +42,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index 9ae20dc50..9738ca3bb 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -37,24 +37,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index 2562fac9b..440fc322a 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -51,7 +51,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -69,36 +68,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 2ea23421b..09b8e9eaa 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -49,7 +49,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -67,36 +66,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index d79a9e8f7..a99473ebd 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -48,7 +48,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -66,36 +65,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m17010-pure.cl b/OpenCL/m17010-pure.cl index 4c4e41571..c3ebe645e 100644 --- a/OpenCL/m17010-pure.cl +++ b/OpenCL/m17010-pure.cl @@ -42,31 +42,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -74,11 +49,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -188,11 +163,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17020-pure.cl b/OpenCL/m17020-pure.cl index 29fda3bfa..0ec95fd3f 100644 --- a/OpenCL/m17020-pure.cl +++ b/OpenCL/m17020-pure.cl @@ -47,31 +47,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -79,11 +54,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -193,11 +168,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17030-pure.cl b/OpenCL/m17030-pure.cl index b31f2eeb2..0b97696e5 100644 --- a/OpenCL/m17030-pure.cl +++ b/OpenCL/m17030-pure.cl @@ -43,31 +43,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -189,11 +164,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17040-pure.cl b/OpenCL/m17040-pure.cl index 020ab6b64..dea375187 100644 --- a/OpenCL/m17040-pure.cl +++ b/OpenCL/m17040-pure.cl @@ -43,31 +43,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -168,11 +143,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index 4e520a5cd..1a5fe7504 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -145,24 +145,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index 71bd9a50f..78f98eed6 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -58,24 +58,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m31400_a0-optimized.cl b/OpenCL/m31400_a0-optimized.cl index 3d386cb7c..2d1155546 100644 --- a/OpenCL/m31400_a0-optimized.cl +++ b/OpenCL/m31400_a0-optimized.cl @@ -29,7 +29,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -67,56 +66,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a0-pure.cl b/OpenCL/m31400_a0-pure.cl index 50a005e33..dfeb887ad 100644 --- a/OpenCL/m31400_a0-pure.cl +++ b/OpenCL/m31400_a0-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a1-optimized.cl b/OpenCL/m31400_a1-optimized.cl index aba0ad4fa..02e00eee8 100644 --- a/OpenCL/m31400_a1-optimized.cl +++ b/OpenCL/m31400_a1-optimized.cl @@ -26,7 +26,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -64,56 +63,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a1-pure.cl b/OpenCL/m31400_a1-pure.cl index 96774ca7b..a45dbff6f 100644 --- a/OpenCL/m31400_a1-pure.cl +++ b/OpenCL/m31400_a1-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a3-optimized.cl b/OpenCL/m31400_a3-optimized.cl index bc30d42c3..40eb9753e 100644 --- a/OpenCL/m31400_a3-optimized.cl +++ b/OpenCL/m31400_a3-optimized.cl @@ -28,7 +28,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -66,56 +65,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a3-pure.cl b/OpenCL/m31400_a3-pure.cl index 09d450df9..861cef146 100644 --- a/OpenCL/m31400_a3-pure.cl +++ b/OpenCL/m31400_a3-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)