/** * Author......: See docs/credits.txt * License.....: MIT */ #include "inc_vendor.h" #include "inc_types.h" #include "inc_platform.h" #include "inc_common.h" /** * vendor specific (or generic) functions */ DECLSPEC u8 v8a_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v8.a; } DECLSPEC u8 v8b_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v8.b; } DECLSPEC u8 v8c_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v8.c; } DECLSPEC u8 v8d_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v8.d; } DECLSPEC u8 v8a_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.a; } DECLSPEC u8 v8b_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.b; } DECLSPEC u8 v8c_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.c; } DECLSPEC u8 v8d_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.d; } DECLSPEC u8 v8e_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.e; } DECLSPEC u8 v8f_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.f; } DECLSPEC u8 v8g_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.g; } DECLSPEC u8 v8h_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v8.h; } DECLSPEC u8x v8a_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8a_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8a_from_v64_S (a.s0); r.s1 = v8a_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8a_from_v64_S (a.s2); r.s3 = v8a_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8a_from_v64_S (a.s4); r.s5 = v8a_from_v64_S (a.s5); r.s6 = v8a_from_v64_S (a.s6); r.s7 = v8a_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8a_from_v64_S (a.s8); r.s9 = v8a_from_v64_S (a.s9); r.sa = v8a_from_v64_S (a.sa); r.sb = v8a_from_v64_S (a.sb); r.sc = v8a_from_v64_S (a.sc); r.sd = v8a_from_v64_S (a.sd); r.se = v8a_from_v64_S (a.se); r.sf = v8a_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8b_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8b_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8b_from_v64_S (a.s0); r.s1 = v8b_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8b_from_v64_S (a.s2); r.s3 = v8b_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8b_from_v64_S (a.s4); r.s5 = v8b_from_v64_S (a.s5); r.s6 = v8b_from_v64_S (a.s6); r.s7 = v8b_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8b_from_v64_S (a.s8); r.s9 = v8b_from_v64_S (a.s9); r.sa = v8b_from_v64_S (a.sa); r.sb = v8b_from_v64_S (a.sb); r.sc = v8b_from_v64_S (a.sc); r.sd = v8b_from_v64_S (a.sd); r.se = v8b_from_v64_S (a.se); r.sf = v8b_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8c_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8c_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8c_from_v64_S (a.s0); r.s1 = v8c_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8c_from_v64_S (a.s2); r.s3 = v8c_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8c_from_v64_S (a.s4); r.s5 = v8c_from_v64_S (a.s5); r.s6 = v8c_from_v64_S (a.s6); r.s7 = v8c_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8c_from_v64_S (a.s8); r.s9 = v8c_from_v64_S (a.s9); r.sa = v8c_from_v64_S (a.sa); r.sb = v8c_from_v64_S (a.sb); r.sc = v8c_from_v64_S (a.sc); r.sd = v8c_from_v64_S (a.sd); r.se = v8c_from_v64_S (a.se); r.sf = v8c_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8d_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8d_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8d_from_v64_S (a.s0); r.s1 = v8d_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8d_from_v64_S (a.s2); r.s3 = v8d_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8d_from_v64_S (a.s4); r.s5 = v8d_from_v64_S (a.s5); r.s6 = v8d_from_v64_S (a.s6); r.s7 = v8d_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8d_from_v64_S (a.s8); r.s9 = v8d_from_v64_S (a.s9); r.sa = v8d_from_v64_S (a.sa); r.sb = v8d_from_v64_S (a.sb); r.sc = v8d_from_v64_S (a.sc); r.sd = v8d_from_v64_S (a.sd); r.se = v8d_from_v64_S (a.se); r.sf = v8d_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8e_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8e_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8e_from_v64_S (a.s0); r.s1 = v8e_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8e_from_v64_S (a.s2); r.s3 = v8e_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8e_from_v64_S (a.s4); r.s5 = v8e_from_v64_S (a.s5); r.s6 = v8e_from_v64_S (a.s6); r.s7 = v8e_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8e_from_v64_S (a.s8); r.s9 = v8e_from_v64_S (a.s9); r.sa = v8e_from_v64_S (a.sa); r.sb = v8e_from_v64_S (a.sb); r.sc = v8e_from_v64_S (a.sc); r.sd = v8e_from_v64_S (a.sd); r.se = v8e_from_v64_S (a.se); r.sf = v8e_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8f_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8f_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8f_from_v64_S (a.s0); r.s1 = v8f_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8f_from_v64_S (a.s2); r.s3 = v8f_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8f_from_v64_S (a.s4); r.s5 = v8f_from_v64_S (a.s5); r.s6 = v8f_from_v64_S (a.s6); r.s7 = v8f_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8f_from_v64_S (a.s8); r.s9 = v8f_from_v64_S (a.s9); r.sa = v8f_from_v64_S (a.sa); r.sb = v8f_from_v64_S (a.sb); r.sc = v8f_from_v64_S (a.sc); r.sd = v8f_from_v64_S (a.sd); r.se = v8f_from_v64_S (a.se); r.sf = v8f_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8g_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8g_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8g_from_v64_S (a.s0); r.s1 = v8g_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8g_from_v64_S (a.s2); r.s3 = v8g_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8g_from_v64_S (a.s4); r.s5 = v8g_from_v64_S (a.s5); r.s6 = v8g_from_v64_S (a.s6); r.s7 = v8g_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8g_from_v64_S (a.s8); r.s9 = v8g_from_v64_S (a.s9); r.sa = v8g_from_v64_S (a.sa); r.sb = v8g_from_v64_S (a.sb); r.sc = v8g_from_v64_S (a.sc); r.sd = v8g_from_v64_S (a.sd); r.se = v8g_from_v64_S (a.se); r.sf = v8g_from_v64_S (a.sf); #endif return r; } DECLSPEC u8x v8h_from_v64 (u64x a) { u8x r = 0; #if VECT_SIZE == 1 r = v8h_from_v64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = v8h_from_v64_S (a.s0); r.s1 = v8h_from_v64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v8h_from_v64_S (a.s2); r.s3 = v8h_from_v64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v8h_from_v64_S (a.s4); r.s5 = v8h_from_v64_S (a.s5); r.s6 = v8h_from_v64_S (a.s6); r.s7 = v8h_from_v64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v8h_from_v64_S (a.s8); r.s9 = v8h_from_v64_S (a.s9); r.sa = v8h_from_v64_S (a.sa); r.sb = v8h_from_v64_S (a.sb); r.sc = v8h_from_v64_S (a.sc); r.sd = v8h_from_v64_S (a.sd); r.se = v8h_from_v64_S (a.se); r.sf = v8h_from_v64_S (a.sf); #endif return r; } DECLSPEC u16 v16a_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v16.a; } DECLSPEC u16 v16b_from_v32_S (const u32 v32) { vconv32_t v; v.v32 = v32; return v.v16.b; } DECLSPEC u32 v32_from_v16ab_S (const u16 v16a, const u16 v16b) { vconv32_t v; v.v16.a = v16a; v.v16.b = v16b; return v.v32; } DECLSPEC u32 v32a_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v32.a; } DECLSPEC u32 v32b_from_v64_S (const u64 v64) { vconv64_t v; v.v64 = v64; return v.v32.b; } DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b) { vconv64_t v; v.v32.a = v32a; v.v32.b = v32b; return v.v64; } // unpack function are similar, but always return u32 DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32) { u32x r = 0; #if defined IS_NV && HAS_BFE == 1 #if VECT_SIZE == 1 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); #endif #if VECT_SIZE >= 2 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s0) : "r"(v32.s0)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s1) : "r"(v32.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s2) : "r"(v32.s2)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s3) : "r"(v32.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s4) : "r"(v32.s4)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s5) : "r"(v32.s5)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s6) : "r"(v32.s6)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s7) : "r"(v32.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s8) : "r"(v32.s8)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s9) : "r"(v32.s9)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sa) : "r"(v32.sa)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sb) : "r"(v32.sb)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sc) : "r"(v32.sc)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sd) : "r"(v32.sd)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.se) : "r"(v32.se)); asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; #endif return r; } DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32) { u32x r = 0; #if defined IS_NV && HAS_BFE == 1 #if VECT_SIZE == 1 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); #endif #if VECT_SIZE >= 2 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s0) : "r"(v32.s0)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s1) : "r"(v32.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s2) : "r"(v32.s2)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s3) : "r"(v32.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s4) : "r"(v32.s4)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s5) : "r"(v32.s5)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s6) : "r"(v32.s6)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s7) : "r"(v32.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s8) : "r"(v32.s8)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s9) : "r"(v32.s9)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sa) : "r"(v32.sa)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sb) : "r"(v32.sb)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sc) : "r"(v32.sc)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sd) : "r"(v32.sd)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.se) : "r"(v32.se)); asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; #endif return r; } DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32) { u32x r = 0; #if defined IS_NV && HAS_BFE == 1 #if VECT_SIZE == 1 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); #endif #if VECT_SIZE >= 2 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s0) : "r"(v32.s0)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s1) : "r"(v32.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s2) : "r"(v32.s2)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s3) : "r"(v32.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s4) : "r"(v32.s4)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s5) : "r"(v32.s5)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s6) : "r"(v32.s6)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s7) : "r"(v32.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s8) : "r"(v32.s8)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s9) : "r"(v32.s9)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sa) : "r"(v32.sa)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sb) : "r"(v32.sb)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sc) : "r"(v32.sc)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sd) : "r"(v32.sd)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.se) : "r"(v32.se)); asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; #endif return r; } DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32) { u32x r = 0; #if defined IS_NV && HAS_BFE == 1 #if VECT_SIZE == 1 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); #endif #if VECT_SIZE >= 2 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s0) : "r"(v32.s0)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s1) : "r"(v32.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s2) : "r"(v32.s2)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s3) : "r"(v32.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s4) : "r"(v32.s4)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s5) : "r"(v32.s5)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s6) : "r"(v32.s6)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s7) : "r"(v32.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s8) : "r"(v32.s8)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s9) : "r"(v32.s9)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sa) : "r"(v32.sa)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sb) : "r"(v32.sb)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sc) : "r"(v32.sc)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sd) : "r"(v32.sd)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.se) : "r"(v32.se)); asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf)); #endif //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; #endif return r; } DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32) { u32 r = 0; #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 0) & 0xff; #endif return r; } DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32) { u32 r = 0; #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 8) & 0xff; #endif return r; } DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32) { u32 r = 0; #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 16) & 0xff; #endif return r; } DECLSPEC u32 unpack_v8d_from_v32_S (const u32 v32) { u32 r = 0; #if defined IS_NV && HAS_BFE == 1 asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); //#elif (defined IS_AMD || defined IS_HIP) && HAS_VBFE == 1 //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); #else r = (v32 >> 24) & 0xff; #endif return r; } DECLSPEC u32 l32_from_64_S (u64 a) { return v32a_from_v64_S (a); } DECLSPEC u32 h32_from_64_S (u64 a) { return v32b_from_v64_S (a); } DECLSPEC u64 hl32_to_64_S (const u32 a, const u32 b) { return v64_from_v32ab_S (b, a); } DECLSPEC u32x l32_from_64 (u64x a) { u32x r = 0; #if VECT_SIZE == 1 r = l32_from_64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = l32_from_64_S (a.s0); r.s1 = l32_from_64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = l32_from_64_S (a.s2); r.s3 = l32_from_64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = l32_from_64_S (a.s4); r.s5 = l32_from_64_S (a.s5); r.s6 = l32_from_64_S (a.s6); r.s7 = l32_from_64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = l32_from_64_S (a.s8); r.s9 = l32_from_64_S (a.s9); r.sa = l32_from_64_S (a.sa); r.sb = l32_from_64_S (a.sb); r.sc = l32_from_64_S (a.sc); r.sd = l32_from_64_S (a.sd); r.se = l32_from_64_S (a.se); r.sf = l32_from_64_S (a.sf); #endif return r; } DECLSPEC u32x h32_from_64 (u64x a) { u32x r = 0; #if VECT_SIZE == 1 r = h32_from_64_S (a); #endif #if VECT_SIZE >= 2 r.s0 = h32_from_64_S (a.s0); r.s1 = h32_from_64_S (a.s1); #endif #if VECT_SIZE >= 4 r.s2 = h32_from_64_S (a.s2); r.s3 = h32_from_64_S (a.s3); #endif #if VECT_SIZE >= 8 r.s4 = h32_from_64_S (a.s4); r.s5 = h32_from_64_S (a.s5); r.s6 = h32_from_64_S (a.s6); r.s7 = h32_from_64_S (a.s7); #endif #if VECT_SIZE >= 16 r.s8 = h32_from_64_S (a.s8); r.s9 = h32_from_64_S (a.s9); r.sa = h32_from_64_S (a.sa); r.sb = h32_from_64_S (a.sb); r.sc = h32_from_64_S (a.sc); r.sd = h32_from_64_S (a.sd); r.se = h32_from_64_S (a.se); r.sf = h32_from_64_S (a.sf); #endif return r; } DECLSPEC u64x hl32_to_64 (const u32x a, const u32x b) { u64x r; #if VECT_SIZE == 1 r = v64_from_v32ab_S (b , a); #endif #if VECT_SIZE >= 2 r.s0 = v64_from_v32ab_S (b.s0, a.s0); r.s1 = v64_from_v32ab_S (b.s1, a.s1); #endif #if VECT_SIZE >= 4 r.s2 = v64_from_v32ab_S (b.s2, a.s2); r.s3 = v64_from_v32ab_S (b.s3, a.s3); #endif #if VECT_SIZE >= 8 r.s4 = v64_from_v32ab_S (b.s4, a.s4); r.s5 = v64_from_v32ab_S (b.s5, a.s5); r.s6 = v64_from_v32ab_S (b.s6, a.s6); r.s7 = v64_from_v32ab_S (b.s7, a.s7); #endif #if VECT_SIZE >= 16 r.s8 = v64_from_v32ab_S (b.s8, a.s8); r.s9 = v64_from_v32ab_S (b.s9, a.s9); r.sa = v64_from_v32ab_S (b.sa, a.sa); r.sb = v64_from_v32ab_S (b.sb, a.sb); r.sc = v64_from_v32ab_S (b.sc, a.sc); r.sd = v64_from_v32ab_S (b.sd, a.sd); r.se = v64_from_v32ab_S (b.se, a.se); r.sf = v64_from_v32ab_S (b.sf, a.sf); #endif return r; } // bit rotates // // For HC_CPU_OPENCL_EMU_H we dont need to care about vector functions // The VECT_SIZE is guaranteed to be set to 1 from cpu_opencl_emu.h DECLSPEC u32x hc_rotl32 (const u32x a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotl32 (a, n); #elif defined IS_CUDA || defined IS_HIP return rotl32 (a, n); #else #ifdef USE_ROTATE return rotate (a, make_u32x (n)); #else return ((a << n) | (a >> (32 - n))); #endif #endif } DECLSPEC u32x hc_rotr32 (const u32x a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotr32 (a, n); #elif defined IS_CUDA || defined IS_HIP return rotr32 (a, n); #else #ifdef USE_ROTATE return rotate (a, make_u32x (32 - n)); #else return ((a >> n) | (a << (32 - n))); #endif #endif } DECLSPEC u32 hc_rotl32_S (const u32 a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotl32 (a, n); #elif defined IS_CUDA || defined IS_HIP return rotl32_S (a, n); #else #ifdef USE_ROTATE return rotate (a, (u32) (n)); #else return ((a << n) | (a >> (32 - n))); #endif #endif } DECLSPEC u32 hc_rotr32_S (const u32 a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotr32 (a, n); #elif defined IS_CUDA || defined IS_HIP return rotr32_S (a, n); #else #ifdef USE_ROTATE return rotate (a, (u32) (32 - n)); #else return ((a >> n) | (a << (32 - n))); #endif #endif } DECLSPEC u64x hc_rotl64 (const u64x a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotl64 (a, n); #elif defined IS_CUDA return rotl64 (a, n); #elif (defined IS_AMD || defined IS_HIP) return rotl64 (a, n); #else #ifdef USE_ROTATE return rotate (a, make_u64x (n)); #else return ((a << n) | (a >> (64 - n))); #endif #endif } DECLSPEC u64x hc_rotr64 (const u64x a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotr64 (a, n); #elif defined IS_CUDA return rotr64 (a, n); #elif (defined IS_AMD || defined IS_HIP) return rotr64 (a, n); #else #ifdef USE_ROTATE return rotate (a, make_u64x (64 - n)); #else return ((a >> n) | (a << (64 - n))); #endif #endif } DECLSPEC u64 hc_rotl64_S (const u64 a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotl64 (a, n); #elif defined IS_CUDA return rotl64_S (a, n); #elif (defined IS_AMD || defined IS_HIP) return rotl64_S (a, n); #else #ifdef USE_ROTATE return rotate (a, (u64) (n)); #else return ((a << n) | (a >> (64 - n))); #endif #endif } DECLSPEC u64 hc_rotr64_S (const u64 a, const int n) { #if defined HC_CPU_OPENCL_EMU_H return rotr64 (a, n); #elif defined IS_CUDA return rotr64_S (a, n); #elif (defined IS_AMD || defined IS_HIP) return rotr64_S (a, n); #else #ifdef USE_ROTATE return rotate (a, (u64) (64 - n)); #else return ((a >> n) | (a << (64 - n))); #endif #endif } // bitwise swap DECLSPEC u32x hc_swap32 (const u32x v) { u32x r; #ifdef HC_CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; #if VECT_SIZE == 1 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(m)); #endif #if VECT_SIZE >= 2 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s0) : "v"(v.s0), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s1) : "v"(v.s1), "v"(m)); #endif #if VECT_SIZE >= 4 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s2) : "v"(v.s2), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s3) : "v"(v.s3), "v"(m)); #endif #if VECT_SIZE >= 8 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s4) : "v"(v.s4), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s5) : "v"(v.s5), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s6) : "v"(v.s6), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s7) : "v"(v.s7), "v"(m)); #endif #if VECT_SIZE >= 16 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s8) : "v"(v.s8), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.s9) : "v"(v.s9), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.sa) : "v"(v.sa), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.sb) : "v"(v.sb), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.sc) : "v"(v.sc), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.sd) : "v"(v.sd), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.se) : "v"(v.se), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r.sf) : "v"(v.sf), "v"(m)); #endif #elif defined IS_NV && HAS_PRMT == 1 #if VECT_SIZE == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v)); #endif #if VECT_SIZE >= 2 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s0) : "r"(v.s0)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s1) : "r"(v.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s2) : "r"(v.s2)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s3) : "r"(v.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s4) : "r"(v.s4)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s5) : "r"(v.s5)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s6) : "r"(v.s6)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s7) : "r"(v.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s8) : "r"(v.s8)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.s9) : "r"(v.s9)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.sa) : "r"(v.sa)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.sb) : "r"(v.sb)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.sc) : "r"(v.sc)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.sd) : "r"(v.sd)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.se) : "r"(v.se)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r.sf) : "r"(v.sf)); #endif #else #if defined USE_BITSELECT && defined USE_ROTATE r = bitselect (rotate (v, make_u32x (24)), rotate (v, make_u32x ( 8)), make_u32x (0x00ff00ff)); #else r = ((v & make_u32x (0xff000000)) >> 24) | ((v & make_u32x (0x00ff0000)) >> 8) | ((v & make_u32x (0x0000ff00)) << 8) | ((v & make_u32x (0x000000ff)) << 24); #endif #endif #endif return r; } DECLSPEC u32 hc_swap32_S (const u32 v) { u32 r; #ifdef HC_CPU_OPENCL_EMU_H r = byte_swap_32 (v); #else #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(r) : "v"(v), "v"(0x00010203)); #elif defined IS_NV && HAS_PRMT == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(r) : "r"(v)); #else #ifdef USE_SWIZZLE r = as_uint (as_uchar4 (v).s3210); #else r = ((v & 0xff000000) >> 24) | ((v & 0x00ff0000) >> 8) | ((v & 0x0000ff00) << 8) | ((v & 0x000000ff) << 24); #endif #endif #endif return r; } DECLSPEC u64x hc_swap64 (const u64x v) { u64x r; #ifdef HC_CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; const u32x a0 = h32_from_64 (v); const u32x a1 = l32_from_64 (v); u32x t0; u32x t1; #if VECT_SIZE == 1 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0) : "v"(0), "v"(a0), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1) : "v"(0), "v"(a1), "v"(m)); #endif #if VECT_SIZE >= 2 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s0) : "v"(0), "v"(a0.s0), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s0) : "v"(0), "v"(a1.s0), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s1) : "v"(0), "v"(a0.s1), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s1) : "v"(0), "v"(a1.s1), "v"(m)); #endif #if VECT_SIZE >= 4 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s2) : "v"(0), "v"(a0.s2), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s2) : "v"(0), "v"(a1.s2), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s3) : "v"(0), "v"(a0.s3), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s3) : "v"(0), "v"(a1.s3), "v"(m)); #endif #if VECT_SIZE >= 8 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s4) : "v"(0), "v"(a0.s4), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s4) : "v"(0), "v"(a1.s4), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s5) : "v"(0), "v"(a0.s5), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s5) : "v"(0), "v"(a1.s5), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s6) : "v"(0), "v"(a0.s6), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s6) : "v"(0), "v"(a1.s6), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s7) : "v"(0), "v"(a0.s7), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s7) : "v"(0), "v"(a1.s7), "v"(m)); #endif #if VECT_SIZE >= 16 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s8) : "v"(0), "v"(a0.s8), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s8) : "v"(0), "v"(a1.s8), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.s9) : "v"(0), "v"(a0.s9), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.s9) : "v"(0), "v"(a1.s9), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.sa) : "v"(0), "v"(a0.sa), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.sa) : "v"(0), "v"(a1.sa), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.sb) : "v"(0), "v"(a0.sb), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.sb) : "v"(0), "v"(a1.sb), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.sc) : "v"(0), "v"(a0.sc), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.sc) : "v"(0), "v"(a1.sc), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.sd) : "v"(0), "v"(a0.sd), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.sd) : "v"(0), "v"(a1.sd), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.se) : "v"(0), "v"(a0.se), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.se) : "v"(0), "v"(a1.se), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t0.sf) : "v"(0), "v"(a0.sf), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(t1.sf) : "v"(0), "v"(a1.sf), "v"(m)); #endif r = hl32_to_64 (t1, t0); #elif defined IS_NV && HAS_MOV64 == 1 && HAS_PRMT == 1 u32x il; u32x ir; #if VECT_SIZE == 1 asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v)); #endif #if VECT_SIZE >= 2 asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s0), "=r"(ir.s0) : "l"(v.s0)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s1), "=r"(ir.s1) : "l"(v.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s2), "=r"(ir.s2) : "l"(v.s2)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s3), "=r"(ir.s3) : "l"(v.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s4), "=r"(ir.s4) : "l"(v.s4)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s5), "=r"(ir.s5) : "l"(v.s5)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s6), "=r"(ir.s6) : "l"(v.s6)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s7), "=r"(ir.s7) : "l"(v.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s8), "=r"(ir.s8) : "l"(v.s8)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.s9), "=r"(ir.s9) : "l"(v.s9)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.sa), "=r"(ir.sa) : "l"(v.sa)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.sb), "=r"(ir.sb) : "l"(v.sb)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.sc), "=r"(ir.sc) : "l"(v.sc)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.sd), "=r"(ir.sd) : "l"(v.sd)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.se), "=r"(ir.se) : "l"(v.se)); asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il.sf), "=r"(ir.sf) : "l"(v.sf)); #endif u32x tl; u32x tr; #if VECT_SIZE == 1 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir)); #endif #if VECT_SIZE >= 2 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s0) : "r"(il.s0)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s0) : "r"(ir.s0)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s1) : "r"(il.s1)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s1) : "r"(ir.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s2) : "r"(il.s2)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s2) : "r"(ir.s2)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s3) : "r"(il.s3)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s3) : "r"(ir.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s4) : "r"(il.s4)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s4) : "r"(ir.s4)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s5) : "r"(il.s5)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s5) : "r"(ir.s5)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s6) : "r"(il.s6)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s6) : "r"(ir.s6)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s7) : "r"(il.s7)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s7) : "r"(ir.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s8) : "r"(il.s8)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s8) : "r"(ir.s8)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.s9) : "r"(il.s9)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.s9) : "r"(ir.s9)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.sa) : "r"(il.sa)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.sa) : "r"(ir.sa)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.sb) : "r"(il.sb)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.sb) : "r"(ir.sb)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.sc) : "r"(il.sc)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.sc) : "r"(ir.sc)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.sd) : "r"(il.sd)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.sd) : "r"(ir.sd)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.se) : "r"(il.se)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.se) : "r"(ir.se)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl.sf) : "r"(il.sf)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr.sf) : "r"(ir.sf)); #endif #if VECT_SIZE == 1 asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl)); #endif #if VECT_SIZE >= 2 asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s0) : "r"(tr.s0), "r"(tl.s0)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s1) : "r"(tr.s1), "r"(tl.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s2) : "r"(tr.s2), "r"(tl.s2)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s3) : "r"(tr.s3), "r"(tl.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s4) : "r"(tr.s4), "r"(tl.s4)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s5) : "r"(tr.s5), "r"(tl.s5)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s6) : "r"(tr.s6), "r"(tl.s6)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s7) : "r"(tr.s7), "r"(tl.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s8) : "r"(tr.s8), "r"(tl.s8)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.s9) : "r"(tr.s9), "r"(tl.s9)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sa) : "r"(tr.sa), "r"(tl.sa)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sb) : "r"(tr.sb), "r"(tl.sb)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sc) : "r"(tr.sc), "r"(tl.sc)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sd) : "r"(tr.sd), "r"(tl.sd)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.se) : "r"(tr.se), "r"(tl.se)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r.sf) : "r"(tr.sf), "r"(tl.sf)); #endif #else #if defined USE_BITSELECT && defined USE_ROTATE r = bitselect (bitselect (rotate (v, make_u64x (24)), rotate (v, make_u64x ( 8)), make_u64x (0x000000ff000000ffUL)), bitselect (rotate (v, make_u64x (56)), rotate (v, make_u64x (40)), make_u64x (0x00ff000000ff0000UL)), make_u64x (0xffff0000ffff0000UL)); #else r = ((v & make_u64x (0xff00000000000000UL)) >> 56) | ((v & make_u64x (0x00ff000000000000UL)) >> 40) | ((v & make_u64x (0x0000ff0000000000UL)) >> 24) | ((v & make_u64x (0x000000ff00000000UL)) >> 8) | ((v & make_u64x (0x00000000ff000000UL)) << 8) | ((v & make_u64x (0x0000000000ff0000UL)) << 24) | ((v & make_u64x (0x000000000000ff00UL)) << 40) | ((v & make_u64x (0x00000000000000ffUL)) << 56); #endif #endif #endif return r; } DECLSPEC u64 hc_swap64_S (const u64 v) { u64 r; #ifdef HC_CPU_OPENCL_EMU_H r = byte_swap_64 (v); #else #if (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 const u32 m = 0x00010203; const u32 v0 = h32_from_64_S (v); const u32 v1 = l32_from_64_S (v); u32 t0; u32 t1; __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(t0) : "v"(v0), "v"(m)); __asm__ __volatile__ ("V_PERM_B32 %0, 0, %1, %2;" : "=v"(t1) : "v"(v1), "v"(m)); r = hl32_to_64_S (t1, t0); #elif defined IS_NV && HAS_PRMT == 1 u32 il; u32 ir; asm volatile ("mov.b64 {%0, %1}, %2;" : "=r"(il), "=r"(ir) : "l"(v)); u32 tl; u32 tr; asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tl) : "r"(il)); asm volatile ("prmt.b32 %0, %1, 0, 0x0123;" : "=r"(tr) : "r"(ir)); asm volatile ("mov.b64 %0, {%1, %2};" : "=l"(r) : "r"(tr), "r"(tl)); #else #ifdef USE_SWIZZLE r = as_ulong (as_uchar8 (v).s76543210); #else r = ((v & (u64) 0xff00000000000000UL) >> 56) | ((v & (u64) 0x00ff000000000000UL) >> 40) | ((v & (u64) 0x0000ff0000000000UL) >> 24) | ((v & (u64) 0x000000ff00000000UL) >> 8) | ((v & (u64) 0x00000000ff000000UL) << 8) | ((v & (u64) 0x0000000000ff0000UL) << 24) | ((v & (u64) 0x000000000000ff00UL) << 40) | ((v & (u64) 0x00000000000000ffUL) << 56); #endif #endif #endif return r; } #if (defined IS_AMD || defined IS_HIP) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { #define BIT(x) (make_u32x (1u) << (x)) #define BIT_MASK(x) (BIT (x) - 1) #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) return BFE (a, b, c); #undef BIT #undef BIT_MASK #undef BFE } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { #define BIT(x) (1u << (x)) #define BIT_MASK(x) (BIT (x) - 1) #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) return BFE (a, b, c); #undef BIT #undef BIT_MASK #undef BFE } DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a << 24) | (b >> 8); } else if (cm == 2) { r = (a << 16) | (b >> 16); } else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a << 24) | (b >> 8); } else if (cm == 2) { r = (a << 16) | (b >> 16); } else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a >> 24) | (b << 8); } else if (cm == 2) { r = (a >> 16) | (b << 16); } else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a >> 24) | (b << 8); } else if (cm == 2) { r = (a >> 16) | (b << 16); } else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } #if HAS_VPERM == 1 DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) { u32x r = 0; #if VECT_SIZE == 1 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); #endif #if VECT_SIZE >= 2 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c)); #endif #if VECT_SIZE >= 4 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c)); #endif #if VECT_SIZE >= 8 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c)); #endif #if VECT_SIZE >= 16 __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s8) : "v"(b.s8), "v"(a.s8), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s9) : "v"(b.s9), "v"(a.s9), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sa) : "v"(b.sa), "v"(a.sa), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sb) : "v"(b.sb), "v"(a.sb), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sc) : "v"(b.sc), "v"(a.sc), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sd) : "v"(b.sd), "v"(a.sd), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.se) : "v"(b.se), "v"(a.se), "v"(c)); __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sf) : "v"(b.sf), "v"(a.sf), "v"(c)); #endif return r; } DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c) { u32 r = 0; __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); return r; } #endif #if HAS_VADD3 == 1 DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) { /* u32x r = 0; #if VECT_SIZE == 1 __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); #endif #if VECT_SIZE >= 2 __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c.s0)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c.s1)); #endif #if VECT_SIZE >= 4 __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c.s2)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c.s3)); #endif #if VECT_SIZE >= 8 __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c.s4)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c.s5)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c.s6)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c.s7)); #endif #if VECT_SIZE >= 16 __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s8) : "v"(b.s8), "v"(a.s8), "v"(c.s8)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.s9) : "v"(b.s9), "v"(a.s9), "v"(c.s9)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.sa) : "v"(b.sa), "v"(a.sa), "v"(c.sa)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.sb) : "v"(b.sb), "v"(a.sb), "v"(c.sb)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.sc) : "v"(b.sc), "v"(a.sc), "v"(c.sc)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.sd) : "v"(b.sd), "v"(a.sd), "v"(c.sd)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.se) : "v"(b.se), "v"(a.se), "v"(c.se)); __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r.sf) : "v"(b.sf), "v"(a.sf), "v"(c.sf)); #endif return r; */ return a + b + c; } DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c) { /* u32 r = 0; __asm__ __volatile__ ("V_ADD3_U32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); return r; */ return a + b + c; } #else DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) { return a + b + c; } DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c) { return a + b + c; } #endif DECLSPEC u32x hc_lop_0x96 (const u32x a, const u32x b, const u32x c) { return a ^ b ^ c; } DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c) { return a ^ b ^ c; } #endif #ifdef IS_NV DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) { u32x r = 0; #if VECT_SIZE == 1 asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); #endif #if VECT_SIZE >= 2 asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c)); #endif #if VECT_SIZE >= 4 asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c)); #endif #if VECT_SIZE >= 8 asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c)); #endif #if VECT_SIZE >= 16 asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c)); asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c)); #endif return r; } DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c) { u32 r = 0; asm volatile ("prmt.b32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); return r; } DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { u32x r = 0; #if VECT_SIZE == 1 asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); #endif #if VECT_SIZE >= 2 asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s0) : "r"(a.s0), "r"(b.s0), "r"(c.s0)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s1) : "r"(a.s1), "r"(b.s1), "r"(c.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s2) : "r"(a.s2), "r"(b.s2), "r"(c.s2)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s3) : "r"(a.s3), "r"(b.s3), "r"(c.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s4) : "r"(a.s4), "r"(b.s4), "r"(c.s4)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s5) : "r"(a.s5), "r"(b.s5), "r"(c.s5)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s6) : "r"(a.s6), "r"(b.s6), "r"(c.s6)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s7) : "r"(a.s7), "r"(b.s7), "r"(c.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s8) : "r"(a.s8), "r"(b.s8), "r"(c.s8)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.s9) : "r"(a.s9), "r"(b.s9), "r"(c.s9)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sa) : "r"(a.sa), "r"(b.sa), "r"(c.sa)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sb) : "r"(a.sb), "r"(b.sb), "r"(c.sb)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sc) : "r"(a.sc), "r"(b.sc), "r"(c.sc)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sd) : "r"(a.sd), "r"(b.sd), "r"(c.sd)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.se) : "r"(a.se), "r"(b.se), "r"(c.se)); asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r.sf) : "r"(a.sf), "r"(b.sf), "r"(c.sf)); #endif return r; } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { u32 r = 0; asm volatile ("bfe.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(c)); return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; const u32 r = hc_byte_perm_S (b, a, (0x76543210 >> (c_mod_4 * 4)) & 0xffff); return r; } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { const int c_mod_4 = c & 3; const int c_minus_4 = 4 - c_mod_4; const u32x r = hc_byte_perm (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); return r; } DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; const int c_minus_4 = 4 - c_mod_4; const u32 r = hc_byte_perm_S (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); return r; } DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) { return a + b + c; } DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c) { return a + b + c; } DECLSPEC u32x hc_lop_0x96 (const u32x a, const u32x b, const u32x c) { u32x r = 0; #if CUDA_ARCH >= 500 #if VECT_SIZE == 1 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r): "r"(a), "r"(b), "r"(c)); #endif #if VECT_SIZE >= 2 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s0): "r"(a.s0), "r"(b.s0), "r"(c.s0)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s1): "r"(a.s1), "r"(b.s1), "r"(c.s1)); #endif #if VECT_SIZE >= 4 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s2): "r"(a.s2), "r"(b.s2), "r"(c.s2)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s3): "r"(a.s3), "r"(b.s3), "r"(c.s3)); #endif #if VECT_SIZE >= 8 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s4): "r"(a.s4), "r"(b.s4), "r"(c.s4)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s5): "r"(a.s5), "r"(b.s5), "r"(c.s5)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s6): "r"(a.s6), "r"(b.s6), "r"(c.s6)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s7): "r"(a.s7), "r"(b.s7), "r"(c.s7)); #endif #if VECT_SIZE >= 16 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s8): "r"(a.s8), "r"(b.s8), "r"(c.s8)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.s9): "r"(a.s9), "r"(b.s9), "r"(c.s9)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.sa): "r"(a.sa), "r"(b.sa), "r"(c.sa)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.sb): "r"(a.sb), "r"(b.sb), "r"(c.sb)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.sc): "r"(a.sc), "r"(b.sc), "r"(c.sc)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.sd): "r"(a.sd), "r"(b.sd), "r"(c.sd)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.se): "r"(a.se), "r"(b.se), "r"(c.se)); asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r.sf): "r"(a.sf), "r"(b.sf), "r"(c.sf)); #endif #else r = a ^ b ^ c; #endif return r; } DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c) { u32 r = 0; #if CUDA_ARCH >= 500 asm volatile ("lop3.b32 %0, %1, %2, %3, 0x96;" : "=r"(r): "r"(a), "r"(b), "r"(c)); #else r = a ^ b ^ c; #endif return r; } #endif #ifdef IS_GENERIC DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { #define BIT(x) (make_u32x (1u) << (x)) #define BIT_MASK(x) (BIT (x) - 1) #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) return BFE (a, b, c); #undef BIT #undef BIT_MASK #undef BFE } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { #define BIT(x) (1u << (x)) #define BIT_MASK(x) (BIT (x) - 1) #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) return BFE (a, b, c); #undef BIT #undef BIT_MASK #undef BFE } DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { u32x r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a << 24) | (b >> 8); } else if (cm == 2) { r = (a << 16) | (b >> 16); } else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { u32 r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a << 24) | (b >> 8); } else if (cm == 2) { r = (a << 16) | (b >> 16); } else if (cm == 3) { r = (a << 8) | (b >> 24); } return r; } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { u32x r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a >> 24) | (b << 8); } else if (cm == 2) { r = (a >> 16) | (b << 16); } else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { u32 r = 0; const int cm = c & 3; if (cm == 0) { r = b; } else if (cm == 1) { r = (a >> 24) | (b << 8); } else if (cm == 2) { r = (a >> 16) | (b << 16); } else if (cm == 3) { r = (a >> 8) | (b << 24); } return r; } DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) { return a + b + c; } DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c) { return a + b + c; } DECLSPEC u32x hc_lop_0x96 (const u32x a, const u32x b, const u32x c) { return a ^ b ^ c; } DECLSPEC u32 hc_lop_0x96_S (const u32 a, const u32 b, const u32 c) { return a ^ b ^ c; } #endif /** * pure scalar functions */ DECLSPEC int ffz (const u32 v) { #ifdef _unroll #pragma unroll #endif for (int i = 0; i < 32; i++) { if ((v >> i) & 1) continue; return i; } return -1; } #ifdef KERNEL_STATIC DECLSPEC int hash_comp (PRIVATE_AS const u32 *d1, GLOBAL_AS const u32 *d2) { if (d1[3] > d2[DGST_R3]) return ( 1); if (d1[3] < d2[DGST_R3]) return (-1); if (d1[2] > d2[DGST_R2]) return ( 1); if (d1[2] < d2[DGST_R2]) return (-1); if (d1[1] > d2[DGST_R1]) return ( 1); if (d1[1] < d2[DGST_R1]) return (-1); if (d1[0] > d2[DGST_R0]) return ( 1); if (d1[0] < d2[DGST_R0]) return (-1); return (0); } DECLSPEC int find_hash (PRIVATE_AS const u32 *digest, const u32 digests_cnt, GLOBAL_AS const digest_t *digests_buf) { for (u32 l = 0, r = digests_cnt; r; r >>= 1) { const u32 m = r >> 1; const u32 c = l + m; const int cmp = hash_comp (digest, digests_buf[c].digest_buf); if (cmp > 0) { l += m + 1; r--; } if (cmp == 0) return (c); } return (-1); } #endif // Input has to be zero padded and buffer size has to be multiple of 4 and at least of length 24 // We simply ignore buffer length for the first 24 bytes for some extra speed boost :) // Number of unrolls found by simply testing what gave best results DECLSPEC int hc_enc_scan (PRIVATE_AS const u32 *buf, const int len) { if (buf[0] & 0x80808080) return 1; if (buf[1] & 0x80808080) return 1; if (buf[2] & 0x80808080) return 1; if (buf[3] & 0x80808080) return 1; if (buf[4] & 0x80808080) return 1; if (buf[5] & 0x80808080) return 1; for (int i = 24, j = 6; i < len; i += 4, j += 1) { if (buf[j] & 0x80808080) return 1; } return 0; } DECLSPEC int hc_enc_scan_global (GLOBAL_AS const u32 *buf, const int len) { if (buf[0] & 0x80808080) return 1; if (buf[1] & 0x80808080) return 1; if (buf[2] & 0x80808080) return 1; if (buf[3] & 0x80808080) return 1; if (buf[4] & 0x80808080) return 1; if (buf[5] & 0x80808080) return 1; for (int i = 24, j = 6; i < len; i += 4, j += 1) { if (buf[j] & 0x80808080) return 1; } return 0; } // Constants and some code snippets from unicode.org's ConvertUTF.c // Compiler can perfectly translate some of the branches and switch cases this into MOVC // which is faster than lookup tables #define halfShift 10 #define halfBase 0x0010000 #define halfMask 0x3FF #define UNI_MAX_BMP 0xFFFF #define UNI_SUR_HIGH_START 0xD800 #define UNI_SUR_HIGH_END 0xDBFF #define UNI_SUR_LOW_START 0xDC00 #define UNI_SUR_LOW_END 0xDFFF /* * Magic values subtracted from a buffer value during UTF8 conversion. * This table contains as many values as there might be trailing bytes * in a UTF-8 sequence. */ #define offsetsFromUTF8_0 0x00000000UL #define offsetsFromUTF8_1 0x00003080UL #define offsetsFromUTF8_2 0x000E2080UL #define offsetsFromUTF8_3 0x03C82080UL #define offsetsFromUTF8_4 0xFA082080UL #define offsetsFromUTF8_5 0x82082080UL DECLSPEC void hc_enc_init (PRIVATE_AS hc_enc_t *hc_enc) { hc_enc->pos = 0; hc_enc->cbuf = 0; hc_enc->clen = 0; } DECLSPEC int hc_enc_has_next (PRIVATE_AS hc_enc_t *hc_enc, const int sz) { if (hc_enc->pos < sz) return 1; if (hc_enc->clen) return 1; return 0; } DECLSPEC int hc_enc_validate_utf8 (PRIVATE_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead) { PRIVATE_AS const u8 *src_ptr = (PRIVATE_AS const u8 *) src_buf; if (extraBytesToRead == 0) { const u8 c0 = src_ptr[src_pos + 0]; if (c0 >= 0x80) return 0; } else if (extraBytesToRead == 1) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 < 0xc2) || (c0 > 0xdf)) return 0; const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; } else if (extraBytesToRead == 2) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 >= 0xe0) && (c0 <= 0xe0)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0xa0) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xe1) && (c0 <= 0xec)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xed) && (c0 <= 0xed)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0x9f)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xee) && (c0 <= 0xef)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else { return 0; } } else if (extraBytesToRead == 3) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 >= 0xf0) && (c0 <= 0xf0)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x90) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else if ((c0 >= 0xf1) && (c0 <= 0xf3)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else if ((c0 >= 0xf4) && (c0 <= 0xf4)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else { return 0; } } return 1; } DECLSPEC int hc_enc_validate_utf8_global (GLOBAL_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead) { GLOBAL_AS const u8 *src_ptr = (GLOBAL_AS const u8 *) src_buf; if (extraBytesToRead == 0) { const u8 c0 = src_ptr[src_pos + 0]; if (c0 >= 0x80) return 0; } else if (extraBytesToRead == 1) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 < 0xc2) || (c0 > 0xdf)) return 0; const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; } else if (extraBytesToRead == 2) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 >= 0xe0) && (c0 <= 0xe0)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0xa0) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xe1) && (c0 <= 0xec)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xed) && (c0 <= 0xed)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0x9f)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else if ((c0 >= 0xee) && (c0 <= 0xef)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; } else { return 0; } } else if (extraBytesToRead == 3) { const u8 c0 = src_ptr[src_pos + 0]; if ((c0 >= 0xf0) && (c0 <= 0xf0)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x90) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else if ((c0 >= 0xf1) && (c0 <= 0xf3)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else if ((c0 >= 0xf4) && (c0 <= 0xf4)) { const u8 c1 = src_ptr[src_pos + 1]; if ((c1 < 0x80) || (c1 > 0xbf)) return 0; const u8 c2 = src_ptr[src_pos + 2]; if ((c2 < 0x80) || (c2 > 0xbf)) return 0; const u8 c3 = src_ptr[src_pos + 3]; if ((c3 < 0x80) || (c3 > 0xbf)) return 0; } else { return 0; } } return 1; } // Input buffer and Output buffer size has to be multiple of 4 and at least of size 4. // The output buffer is not zero padded, so entire buffer has to be set all zero before entering this function or truncated afterwards. DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz) { PRIVATE_AS const u8 *src_ptr = (PRIVATE_AS const u8 *) src_buf; PRIVATE_AS u8 *dst_ptr = (PRIVATE_AS u8 *) dst_buf; int src_pos = hc_enc->pos; #if VENDOR_ID == 8 // Work around segmentation fault in Intel JiT // Tested with 2021.12.6.0.19_160000 volatile #endif int dst_pos = hc_enc->clen; dst_buf[0] = hc_enc->cbuf; hc_enc->clen = 0; hc_enc->cbuf = 0; while ((src_pos < src_len) && (dst_pos < dst_sz)) { const u8 c = src_ptr[src_pos]; int extraBytesToRead = 0; if (c >= 0xfc) { extraBytesToRead = 5; } else if (c >= 0xf8) { extraBytesToRead = 4; } else if (c >= 0xf0) { extraBytesToRead = 3; } else if (c >= 0xe0) { extraBytesToRead = 2; } else if (c >= 0xc0) { extraBytesToRead = 1; } if ((src_pos + extraBytesToRead) >= src_sz) { // broken input hc_enc->pos = src_len; return -1; } if (hc_enc_validate_utf8 (src_buf, src_pos, extraBytesToRead) == 0) { // broken input hc_enc->pos = src_len; return -1; } u32 ch = 0; switch (extraBytesToRead) { case 5: ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_5; break; case 4: ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_4; break; case 3: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_3; break; case 2: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_2; break; case 1: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_1; break; case 0: ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_0; break; } /* Target is a character <= 0xFFFF */ if (ch <= UNI_MAX_BMP) { dst_ptr[dst_pos++] = (ch >> 0) & 0xff; dst_ptr[dst_pos++] = (ch >> 8) & 0xff; } else { ch -= halfBase; const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); if ((dst_pos + 2) == dst_sz) { dst_ptr[dst_pos++] = (a >> 0) & 0xff; dst_ptr[dst_pos++] = (a >> 8) & 0xff; hc_enc->cbuf = b & 0xffff; hc_enc->clen = 2; } else { dst_ptr[dst_pos++] = (a >> 0) & 0xff; dst_ptr[dst_pos++] = (a >> 8) & 0xff; dst_ptr[dst_pos++] = (b >> 0) & 0xff; dst_ptr[dst_pos++] = (b >> 8) & 0xff; } } } hc_enc->pos = src_pos; return dst_pos; } DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz) { GLOBAL_AS const u8 *src_ptr = (GLOBAL_AS const u8 *) src_buf; PRIVATE_AS u8 *dst_ptr = (PRIVATE_AS u8 *) dst_buf; int src_pos = hc_enc->pos; #if VENDOR_ID == 8 // Work around segmentation fault in Intel JiT // Tested with 2021.12.6.0.19_160000 volatile #endif int dst_pos = hc_enc->clen; dst_buf[0] = hc_enc->cbuf; hc_enc->clen = 0; hc_enc->cbuf = 0; while ((src_pos < src_len) && (dst_pos < dst_sz)) { const u8 c = src_ptr[src_pos]; int extraBytesToRead = 0; if (c >= 0xfc) { extraBytesToRead = 5; } else if (c >= 0xf8) { extraBytesToRead = 4; } else if (c >= 0xf0) { extraBytesToRead = 3; } else if (c >= 0xe0) { extraBytesToRead = 2; } else if (c >= 0xc0) { extraBytesToRead = 1; } if ((src_pos + extraBytesToRead) >= src_sz) { // broken input hc_enc->pos = src_len; return -1; } if (hc_enc_validate_utf8_global (src_buf, src_pos, extraBytesToRead) == 0) { // broken input hc_enc->pos = src_len; return -1; } u32 ch = 0; switch (extraBytesToRead) { case 5: ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_5; break; case 4: ch += src_ptr[src_pos++]; ch <<= 6; /* remember, illegal UTF-8 */ ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_4; break; case 3: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_3; break; case 2: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_2; break; case 1: ch += src_ptr[src_pos++]; ch <<= 6; ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_1; break; case 0: ch += src_ptr[src_pos++]; ch -= offsetsFromUTF8_0; break; } /* Target is a character <= 0xFFFF */ if (ch <= UNI_MAX_BMP) { dst_ptr[dst_pos++] = (ch >> 0) & 0xff; dst_ptr[dst_pos++] = (ch >> 8) & 0xff; } else { ch -= halfBase; const u32 a = ((ch >> halfShift) + UNI_SUR_HIGH_START); const u32 b = ((ch & halfMask) + UNI_SUR_LOW_START); if ((dst_pos + 2) == dst_sz) { // this section seems to break intel opencl runtime but is unknown why dst_ptr[dst_pos++] = (a >> 0) & 0xff; dst_ptr[dst_pos++] = (a >> 8) & 0xff; hc_enc->cbuf = b & 0xffff; hc_enc->clen = 2; } else { dst_ptr[dst_pos++] = (a >> 0) & 0xff; dst_ptr[dst_pos++] = (a >> 8) & 0xff; dst_ptr[dst_pos++] = (b >> 0) & 0xff; dst_ptr[dst_pos++] = (b >> 8) & 0xff; } } } hc_enc->pos = src_pos; return dst_pos; } #undef halfShift #undef halfBase #undef halfMask #undef UNI_MAX_BMP #undef UNI_SUR_HIGH_START #undef UNI_SUR_HIGH_END #undef UNI_SUR_LOW_START #undef UNI_SUR_LOW_END #undef offsetsFromUTF8_0 #undef offsetsFromUTF8_1 #undef offsetsFromUTF8_2 #undef offsetsFromUTF8_3 #undef offsetsFromUTF8_4 #undef offsetsFromUTF8_5 DECLSPEC int pkcs_padding_bs8 (PRIVATE_AS const u32 *data_buf, const int data_len) { if (data_len == 0) return -1; // cannot have zero length, is important to avoid out of boundary reads if (data_len % 8) return -1; // has to be a multiple of block size const int last_pad_pos = data_len - 1; const int last_pad_elem = last_pad_pos / 4; const u32 pad = data_buf[last_pad_elem] >> 24; // guaranteed by pkcs structure if ((pad < 1) || (pad > 8)) return -1; // pkcs pads are not zero based const u32 padm = (pad << 0) | (pad << 8) | (pad << 16) | (pad << 24); u32 mask0 = 0; u32 mask1 = 0; switch (pad) { case 1: mask0 = 0x00000000; mask1 = 0xff000000; break; case 2: mask0 = 0x00000000; mask1 = 0xffff0000; break; case 3: mask0 = 0x00000000; mask1 = 0xffffff00; break; case 4: mask0 = 0x00000000; mask1 = 0xffffffff; break; case 5: mask0 = 0xff000000; mask1 = 0xffffffff; break; case 6: mask0 = 0xffff0000; mask1 = 0xffffffff; break; case 7: mask0 = 0xffffff00; mask1 = 0xffffffff; break; case 8: mask0 = 0xffffffff; mask1 = 0xffffffff; break; } const u32 data0 = data_buf[last_pad_elem - 1]; const u32 data1 = data_buf[last_pad_elem - 0]; if ((data0 & mask0) != (padm & mask0)) return -1; if ((data1 & mask1) != (padm & mask1)) return -1; const int real_len = data_len - pad; return real_len; } DECLSPEC int pkcs_padding_bs16 (PRIVATE_AS const u32 *data_buf, const int data_len) { if (data_len == 0) return -1; // cannot have zero length, is important to avoid out of boundary reads if (data_len % 16) return -1; // has to be a multiple of block size const int last_pad_pos = data_len - 1; const int last_pad_elem = last_pad_pos / 4; const u32 pad = data_buf[last_pad_elem] >> 24; // guaranteed by pkcs structure if ((pad < 1) || (pad > 16)) return -1; // pkcs pads are not zero based const u32 padm = (pad << 0) | (pad << 8) | (pad << 16) | (pad << 24); u32 mask0 = 0; u32 mask1 = 0; u32 mask2 = 0; u32 mask3 = 0; switch (pad) { case 1: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xff000000; break; case 2: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffff0000; break; case 3: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffffff00; break; case 4: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0x00000000; mask3 = 0xffffffff; break; case 5: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xff000000; mask3 = 0xffffffff; break; case 6: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffff0000; mask3 = 0xffffffff; break; case 7: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffffff00; mask3 = 0xffffffff; break; case 8: mask0 = 0x00000000; mask1 = 0x00000000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 9: mask0 = 0x00000000; mask1 = 0xff000000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 10: mask0 = 0x00000000; mask1 = 0xffff0000; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 11: mask0 = 0x00000000; mask1 = 0xffffff00; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 12: mask0 = 0x00000000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 13: mask0 = 0xff000000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 14: mask0 = 0xffff0000; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 15: mask0 = 0xffffff00; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; case 16: mask0 = 0xffffffff; mask1 = 0xffffffff; mask2 = 0xffffffff; mask3 = 0xffffffff; break; } const u32 data0 = data_buf[last_pad_elem - 3]; const u32 data1 = data_buf[last_pad_elem - 2]; const u32 data2 = data_buf[last_pad_elem - 1]; const u32 data3 = data_buf[last_pad_elem - 0]; if ((data0 & mask0) != (padm & mask0)) return -1; if ((data1 & mask1) != (padm & mask1)) return -1; if ((data2 & mask2) != (padm & mask2)) return -1; if ((data3 & mask3) != (padm & mask3)) return -1; const int real_len = data_len - pad; return real_len; } DECLSPEC int asn1_detect (PRIVATE_AS const u32 *buf, const int len) { if (len < 128) { if ((buf[0] & 0x00ff80ff) != 0x00020030) return 0; } else if (len < 256) { if ((buf[0] & 0xff00ffff) != 0x02008130) return 0; } else if (len < 65536) { if ((buf[0] & 0x0000ffff) != 0x00008230) return 0; if ((buf[1] & 0x000000ff) != 0x00000002) return 0; } if (len < 128) { const int lenb = ((buf[0] & 0x00007f00) >> 8); if ((lenb + 2) != len) return 0; } else if (len < 256) { const int lenb = ((buf[0] & 0x00ff0000) >> 16); if ((lenb + 3) != len) return 0; } else if (len < 65536) { const int lenb = ((buf[0] & 0xff000000) >> 24) | ((buf[0] & 0x00ff0000) >> 8); if ((lenb + 4) != len) return 0; } return 1; } DECLSPEC u32 check_bitmap (GLOBAL_AS const u32 *bitmap, const u32 bitmap_mask, const u32 bitmap_shift, const u32 digest) { return (bitmap[(digest >> bitmap_shift) & bitmap_mask] & (1 << (digest & 0x1f))); } DECLSPEC u32 check (PRIVATE_AS const u32 *digest, GLOBAL_AS const u32 *bitmap_s1_a, GLOBAL_AS const u32 *bitmap_s1_b, GLOBAL_AS const u32 *bitmap_s1_c, GLOBAL_AS const u32 *bitmap_s1_d, GLOBAL_AS const u32 *bitmap_s2_a, GLOBAL_AS const u32 *bitmap_s2_b, GLOBAL_AS const u32 *bitmap_s2_c, GLOBAL_AS const u32 *bitmap_s2_d, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2) { if (check_bitmap (bitmap_s1_a, bitmap_mask, bitmap_shift1, digest[0]) == 0) return (0); if (check_bitmap (bitmap_s1_b, bitmap_mask, bitmap_shift1, digest[1]) == 0) return (0); if (check_bitmap (bitmap_s1_c, bitmap_mask, bitmap_shift1, digest[2]) == 0) return (0); if (check_bitmap (bitmap_s1_d, bitmap_mask, bitmap_shift1, digest[3]) == 0) return (0); if (check_bitmap (bitmap_s2_a, bitmap_mask, bitmap_shift2, digest[0]) == 0) return (0); if (check_bitmap (bitmap_s2_b, bitmap_mask, bitmap_shift2, digest[1]) == 0) return (0); if (check_bitmap (bitmap_s2_c, bitmap_mask, bitmap_shift2, digest[2]) == 0) return (0); if (check_bitmap (bitmap_s2_d, bitmap_mask, bitmap_shift2, digest[3]) == 0) return (0); return (1); } DECLSPEC void mark_hash (GLOBAL_AS plain_t *plains_buf, GLOBAL_AS u32 *d_result, const u32 salt_pos, const u32 digests_cnt, const u32 digest_pos, const u32 hash_pos, const u64 gid, const u32 il_pos, const u32 extra1, const u32 extra2) { const u32 idx = hc_atomic_inc (d_result); #if ATTACK_MODE == 9 #else if (idx >= digests_cnt) { // this is kind of tricky: we *must* call hc_atomic_inc() to know about the current value from a multi-thread perspective // this action creates a buffer overflow, so we need to fix it here hc_atomic_dec (d_result); return; } #endif plains_buf[idx].salt_pos = salt_pos; plains_buf[idx].digest_pos = digest_pos; // relative plains_buf[idx].hash_pos = hash_pos; // absolute plains_buf[idx].gidvid = gid; plains_buf[idx].il_pos = il_pos; plains_buf[idx].extra1 = extra1; // unused so far plains_buf[idx].extra2 = extra2; // unused so far } DECLSPEC int hc_count_char (PRIVATE_AS const u32 *buf, const int elems, const u32 c) { int r = 0; for (int i = 0; i < elems; i++) { const u32 v = buf[i]; if (((v >> 0) & 0xff) == c) r++; if (((v >> 8) & 0xff) == c) r++; if (((v >> 16) & 0xff) == c) r++; if (((v >> 24) & 0xff) == c) r++; } return r; } DECLSPEC float hc_get_entropy (PRIVATE_AS const u32 *buf, const int elems) { const int length = elems * 4; float entropy = 0.0f; #ifdef _unroll #pragma unroll #endif for (u32 c = 0; c < 256; c++) { const int r = hc_count_char (buf, elems, c); if (r == 0) continue; float w = (float) r / length; entropy += -w * log2 (w); } return entropy; } DECLSPEC int is_valid_hex_8 (const u8 v) { // direct lookup table is slower thanks to CMOV if ((v >= (u8) '0') && (v <= (u8) '9')) return 1; if ((v >= (u8) 'a') && (v <= (u8) 'f')) return 1; return 0; } DECLSPEC int is_valid_hex_32 (const u32 v) { if (is_valid_hex_8 ((u8) (v >> 0)) == 0) return 0; if (is_valid_hex_8 ((u8) (v >> 8)) == 0) return 0; if (is_valid_hex_8 ((u8) (v >> 16)) == 0) return 0; if (is_valid_hex_8 ((u8) (v >> 24)) == 0) return 0; return 1; } DECLSPEC int is_valid_base58_8 (const u8 v) { if (v > (u8) 'z') return 0; if (v < (u8) '1') return 0; if ((v > (u8) '9') && (v < (u8) 'A')) return 0; if ((v > (u8) 'Z') && (v < (u8) 'a')) return 0; return 1; } DECLSPEC int is_valid_base58_32 (const u32 v) { if (is_valid_base58_8 ((u8) (v >> 0)) == 0) return 0; if (is_valid_base58_8 ((u8) (v >> 8)) == 0) return 0; if (is_valid_base58_8 ((u8) (v >> 16)) == 0) return 0; if (is_valid_base58_8 ((u8) (v >> 24)) == 0) return 0; return 1; } DECLSPEC int hc_find_keyboard_layout_map (const u32 search, const int search_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt) { for (int idx = 0; idx < keyboard_layout_mapping_cnt; idx++) { const u32 src_char = s_keyboard_layout_mapping_buf[idx].src_char; const int src_len = s_keyboard_layout_mapping_buf[idx].src_len; if (src_len == search_len) { const u32 mask = 0xffffffff >> ((4 - search_len) * 8); if ((src_char & mask) == (search & mask)) return idx; } } return -1; } DECLSPEC int hc_execute_keyboard_layout_mapping (PRIVATE_AS u32 *w, const int pw_len, LOCAL_AS keyboard_layout_mapping_t *s_keyboard_layout_mapping_buf, const int keyboard_layout_mapping_cnt) { u32 out_buf[32] = { 0 }; PRIVATE_AS u8 *out_ptr = (PRIVATE_AS u8 *) out_buf; int out_len = 0; // TC/VC passwords are limited to 128 PRIVATE_AS u8 *w_ptr = (PRIVATE_AS u8 *) w; int pw_pos = 0; while (pw_pos < pw_len) { u32 src0 = 0; u32 src1 = 0; u32 src2 = 0; u32 src3 = 0; #define MIN(a,b) (((a) < (b)) ? (a) : (b)) const int rem = MIN (pw_len - pw_pos, 4); #undef MIN if (rem > 0) src0 = w_ptr[pw_pos + 0]; if (rem > 1) src1 = w_ptr[pw_pos + 1]; if (rem > 2) src2 = w_ptr[pw_pos + 2]; if (rem > 3) src3 = w_ptr[pw_pos + 3]; const u32 src = (src0 << 0) | (src1 << 8) | (src2 << 16) | (src3 << 24); int src_len; for (src_len = rem; src_len > 0; src_len--) { const int idx = hc_find_keyboard_layout_map (src, src_len, s_keyboard_layout_mapping_buf, keyboard_layout_mapping_cnt); if (idx == -1) continue; u32 dst_char = s_keyboard_layout_mapping_buf[idx].dst_char; int dst_len = s_keyboard_layout_mapping_buf[idx].dst_len; switch (dst_len) { case 1: out_ptr[out_len++] = (dst_char >> 0) & 0xff; break; case 2: out_ptr[out_len++] = (dst_char >> 0) & 0xff; out_ptr[out_len++] = (dst_char >> 8) & 0xff; break; case 3: out_ptr[out_len++] = (dst_char >> 0) & 0xff; out_ptr[out_len++] = (dst_char >> 8) & 0xff; out_ptr[out_len++] = (dst_char >> 16) & 0xff; break; case 4: out_ptr[out_len++] = (dst_char >> 0) & 0xff; out_ptr[out_len++] = (dst_char >> 8) & 0xff; out_ptr[out_len++] = (dst_char >> 16) & 0xff; out_ptr[out_len++] = (dst_char >> 24) & 0xff; break; } pw_pos += src_len; break; } // not matched, keep original if (src_len == 0) { out_ptr[out_len] = w_ptr[pw_pos]; out_len++; pw_pos++; } } w[ 0] = out_buf[ 0]; w[ 1] = out_buf[ 1]; w[ 2] = out_buf[ 2]; w[ 3] = out_buf[ 3]; w[ 4] = out_buf[ 4]; w[ 5] = out_buf[ 5]; w[ 6] = out_buf[ 6]; w[ 7] = out_buf[ 7]; w[ 8] = out_buf[ 8]; w[ 9] = out_buf[ 9]; w[10] = out_buf[10]; w[11] = out_buf[11]; w[12] = out_buf[12]; w[13] = out_buf[13]; w[14] = out_buf[14]; w[15] = out_buf[15]; w[16] = out_buf[16]; w[17] = out_buf[17]; w[18] = out_buf[18]; w[19] = out_buf[19]; w[20] = out_buf[20]; w[21] = out_buf[21]; w[22] = out_buf[22]; w[23] = out_buf[23]; w[24] = out_buf[24]; w[25] = out_buf[25]; w[26] = out_buf[26]; w[27] = out_buf[27]; w[28] = out_buf[28]; w[29] = out_buf[29]; w[30] = out_buf[30]; w[31] = out_buf[31]; return out_len; } /** * vector functions */ DECLSPEC void make_utf16be (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV out2[3] = hc_byte_perm (in[3], 0, 0x3727); out2[2] = hc_byte_perm (in[3], 0, 0x1707); out2[1] = hc_byte_perm (in[2], 0, 0x3727); out2[0] = hc_byte_perm (in[2], 0, 0x1707); out1[3] = hc_byte_perm (in[1], 0, 0x3727); out1[2] = hc_byte_perm (in[1], 0, 0x1707); out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); out2[1] = hc_byte_perm (in[2], 0, 0x03070207); out2[0] = hc_byte_perm (in[2], 0, 0x01070007); out1[3] = hc_byte_perm (in[1], 0, 0x03070207); out1[2] = hc_byte_perm (in[1], 0, 0x01070007); out1[1] = hc_byte_perm (in[0], 0, 0x03070207); out1[0] = hc_byte_perm (in[0], 0, 0x01070007); #else out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00); out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00); out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00); out2[0] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00); out1[3] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00); out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00); out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00); out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00); #endif } DECLSPEC void make_utf16beN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV out2[3] = hc_byte_perm (in[3], 0, 0x1707); out2[2] = hc_byte_perm (in[3], 0, 0x3727); out2[1] = hc_byte_perm (in[2], 0, 0x1707); out2[0] = hc_byte_perm (in[2], 0, 0x3727); out1[3] = hc_byte_perm (in[1], 0, 0x1707); out1[2] = hc_byte_perm (in[1], 0, 0x3727); out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); out2[1] = hc_byte_perm (in[2], 0, 0x01070007); out2[0] = hc_byte_perm (in[2], 0, 0x03070207); out1[3] = hc_byte_perm (in[1], 0, 0x01070007); out1[2] = hc_byte_perm (in[1], 0, 0x03070207); out1[1] = hc_byte_perm (in[0], 0, 0x01070007); out1[0] = hc_byte_perm (in[0], 0, 0x03070207); #else out2[3] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00); out2[2] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00); out2[1] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00); out2[0] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00); out1[3] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00); out1[2] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00); out1[1] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00); out1[0] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00); #endif } DECLSPEC void make_utf16le (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV out2[3] = hc_byte_perm (in[3], 0, 0x7372); out2[2] = hc_byte_perm (in[3], 0, 0x7170); out2[1] = hc_byte_perm (in[2], 0, 0x7372); out2[0] = hc_byte_perm (in[2], 0, 0x7170); out1[3] = hc_byte_perm (in[1], 0, 0x7372); out1[2] = hc_byte_perm (in[1], 0, 0x7170); out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); out2[1] = hc_byte_perm (in[2], 0, 0x07030702); out2[0] = hc_byte_perm (in[2], 0, 0x07010700); out1[3] = hc_byte_perm (in[1], 0, 0x07030702); out1[2] = hc_byte_perm (in[1], 0, 0x07010700); out1[1] = hc_byte_perm (in[0], 0, 0x07030702); out1[0] = hc_byte_perm (in[0], 0, 0x07010700); #else out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); #endif } DECLSPEC void make_utf16leN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV out2[3] = hc_byte_perm (in[3], 0, 0x7170); out2[2] = hc_byte_perm (in[3], 0, 0x7372); out2[1] = hc_byte_perm (in[2], 0, 0x7170); out2[0] = hc_byte_perm (in[2], 0, 0x7372); out1[3] = hc_byte_perm (in[1], 0, 0x7170); out1[2] = hc_byte_perm (in[1], 0, 0x7372); out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); out2[1] = hc_byte_perm (in[2], 0, 0x07010700); out2[0] = hc_byte_perm (in[2], 0, 0x07030702); out1[3] = hc_byte_perm (in[1], 0, 0x07010700); out1[2] = hc_byte_perm (in[1], 0, 0x07030702); out1[1] = hc_byte_perm (in[0], 0, 0x07010700); out1[0] = hc_byte_perm (in[0], 0, 0x07030702); #else out2[3] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); out2[2] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); out2[1] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); out2[0] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); out1[3] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); out1[2] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); out1[1] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); out1[0] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); #endif } DECLSPEC void undo_utf16be (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *in2, PRIVATE_AS u32x *out) { #if defined IS_NV out[0] = hc_byte_perm (in1[0], in1[1], 0x4602); out[1] = hc_byte_perm (in1[2], in1[3], 0x4602); out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); out[2] = hc_byte_perm (in2[0], in2[1], 0x04060002); out[3] = hc_byte_perm (in2[2], in2[3], 0x04060002); #else out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16) | ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0); out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16) | ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0); out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16) | ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0); out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16) | ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0); #endif } DECLSPEC void undo_utf16le (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *in2, PRIVATE_AS u32x *out) { #if defined IS_NV out[0] = hc_byte_perm (in1[0], in1[1], 0x6420); out[1] = hc_byte_perm (in1[2], in1[3], 0x6420); out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); out[2] = hc_byte_perm (in2[0], in2[1], 0x06040200); out[3] = hc_byte_perm (in2[2], in2[3], 0x06040200); #else out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8) | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8); out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8) | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8); out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8) | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8); out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8) | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8); #endif } DECLSPEC void set_mark_1x4 (PRIVATE_AS u32 *v, const u32 offset) { const u32 c = (offset & 15) / 4; const u32 r = 0xff << ((offset & 3) * 8); v[0] = (c == 0) ? r : 0; v[1] = (c == 1) ? r : 0; v[2] = (c == 2) ? r : 0; v[3] = (c == 3) ? r : 0; } DECLSPEC void append_helper_1x4 (PRIVATE_AS u32x *r, const u32 v, PRIVATE_AS const u32 *m) { r[0] |= v & m[0]; r[1] |= v & m[1]; r[2] |= v & m[2]; r[3] |= v & m[3]; } DECLSPEC void append_0x80_1x4 (PRIVATE_AS u32x *w0, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); append_helper_1x4 (w0, 0x80808080, v); } DECLSPEC void append_0x80_2x4 (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); const u32 offset16 = offset / 16; append_helper_1x4 (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4 (w1, ((offset16 == 1) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_3x4 (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); const u32 offset16 = offset / 16; append_helper_1x4 (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4 (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4 (w2, ((offset16 == 2) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_4x4 (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); const u32 offset16 = offset / 16; append_helper_1x4 (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4 (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4 (w2, ((offset16 == 2) ? 0x80808080 : 0), v); append_helper_1x4 (w3, ((offset16 == 3) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_8x4 (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); const u32 offset16 = offset / 16; append_helper_1x4 (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4 (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4 (w2, ((offset16 == 2) ? 0x80808080 : 0), v); append_helper_1x4 (w3, ((offset16 == 3) ? 0x80808080 : 0), v); append_helper_1x4 (w4, ((offset16 == 4) ? 0x80808080 : 0), v); append_helper_1x4 (w5, ((offset16 == 5) ? 0x80808080 : 0), v); append_helper_1x4 (w6, ((offset16 == 6) ? 0x80808080 : 0), v); append_helper_1x4 (w7, ((offset16 == 7) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_1x16 (PRIVATE_AS u32x *w, const u32 offset) { u32 v[4]; set_mark_1x4 (v, offset); const u32 offset16 = offset / 16; append_helper_1x4 (w + 0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4 (w + 4, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4 (w + 8, ((offset16 == 2) ? 0x80808080 : 0), v); append_helper_1x4 (w + 12, ((offset16 == 3) ? 0x80808080 : 0), v); } DECLSPEC void switch_buffer_by_offset_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w3[3] = hc_bytealign (w3[2], w3[3], offset); w3[2] = hc_bytealign (w3[1], w3[2], offset); w3[1] = hc_bytealign (w3[0], w3[1], offset); w3[0] = hc_bytealign (w2[3], w3[0], offset); w2[3] = hc_bytealign (w2[2], w2[3], offset); w2[2] = hc_bytealign (w2[1], w2[2], offset); w2[1] = hc_bytealign (w2[0], w2[1], offset); w2[0] = hc_bytealign (w1[3], w2[0], offset); w1[3] = hc_bytealign (w1[2], w1[3], offset); w1[2] = hc_bytealign (w1[1], w1[2], offset); w1[1] = hc_bytealign (w1[0], w1[1], offset); w1[0] = hc_bytealign (w0[3], w1[0], offset); w0[3] = hc_bytealign (w0[2], w0[3], offset); w0[2] = hc_bytealign (w0[1], w0[2], offset); w0[1] = hc_bytealign (w0[0], w0[1], offset); w0[0] = hc_bytealign ( 0, w0[0], offset); break; case 1: w3[3] = hc_bytealign (w3[1], w3[2], offset); w3[2] = hc_bytealign (w3[0], w3[1], offset); w3[1] = hc_bytealign (w2[3], w3[0], offset); w3[0] = hc_bytealign (w2[2], w2[3], offset); w2[3] = hc_bytealign (w2[1], w2[2], offset); w2[2] = hc_bytealign (w2[0], w2[1], offset); w2[1] = hc_bytealign (w1[3], w2[0], offset); w2[0] = hc_bytealign (w1[2], w1[3], offset); w1[3] = hc_bytealign (w1[1], w1[2], offset); w1[2] = hc_bytealign (w1[0], w1[1], offset); w1[1] = hc_bytealign (w0[3], w1[0], offset); w1[0] = hc_bytealign (w0[2], w0[3], offset); w0[3] = hc_bytealign (w0[1], w0[2], offset); w0[2] = hc_bytealign (w0[0], w0[1], offset); w0[1] = hc_bytealign ( 0, w0[0], offset); w0[0] = 0; break; case 2: w3[3] = hc_bytealign (w3[0], w3[1], offset); w3[2] = hc_bytealign (w2[3], w3[0], offset); w3[1] = hc_bytealign (w2[2], w2[3], offset); w3[0] = hc_bytealign (w2[1], w2[2], offset); w2[3] = hc_bytealign (w2[0], w2[1], offset); w2[2] = hc_bytealign (w1[3], w2[0], offset); w2[1] = hc_bytealign (w1[2], w1[3], offset); w2[0] = hc_bytealign (w1[1], w1[2], offset); w1[3] = hc_bytealign (w1[0], w1[1], offset); w1[2] = hc_bytealign (w0[3], w1[0], offset); w1[1] = hc_bytealign (w0[2], w0[3], offset); w1[0] = hc_bytealign (w0[1], w0[2], offset); w0[3] = hc_bytealign (w0[0], w0[1], offset); w0[2] = hc_bytealign ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_bytealign (w2[3], w3[0], offset); w3[2] = hc_bytealign (w2[2], w2[3], offset); w3[1] = hc_bytealign (w2[1], w2[2], offset); w3[0] = hc_bytealign (w2[0], w2[1], offset); w2[3] = hc_bytealign (w1[3], w2[0], offset); w2[2] = hc_bytealign (w1[2], w1[3], offset); w2[1] = hc_bytealign (w1[1], w1[2], offset); w2[0] = hc_bytealign (w1[0], w1[1], offset); w1[3] = hc_bytealign (w0[3], w1[0], offset); w1[2] = hc_bytealign (w0[2], w0[3], offset); w1[1] = hc_bytealign (w0[1], w0[2], offset); w1[0] = hc_bytealign (w0[0], w0[1], offset); w0[3] = hc_bytealign ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_bytealign (w2[2], w2[3], offset); w3[2] = hc_bytealign (w2[1], w2[2], offset); w3[1] = hc_bytealign (w2[0], w2[1], offset); w3[0] = hc_bytealign (w1[3], w2[0], offset); w2[3] = hc_bytealign (w1[2], w1[3], offset); w2[2] = hc_bytealign (w1[1], w1[2], offset); w2[1] = hc_bytealign (w1[0], w1[1], offset); w2[0] = hc_bytealign (w0[3], w1[0], offset); w1[3] = hc_bytealign (w0[2], w0[3], offset); w1[2] = hc_bytealign (w0[1], w0[2], offset); w1[1] = hc_bytealign (w0[0], w0[1], offset); w1[0] = hc_bytealign ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_bytealign (w2[1], w2[2], offset); w3[2] = hc_bytealign (w2[0], w2[1], offset); w3[1] = hc_bytealign (w1[3], w2[0], offset); w3[0] = hc_bytealign (w1[2], w1[3], offset); w2[3] = hc_bytealign (w1[1], w1[2], offset); w2[2] = hc_bytealign (w1[0], w1[1], offset); w2[1] = hc_bytealign (w0[3], w1[0], offset); w2[0] = hc_bytealign (w0[2], w0[3], offset); w1[3] = hc_bytealign (w0[1], w0[2], offset); w1[2] = hc_bytealign (w0[0], w0[1], offset); w1[1] = hc_bytealign ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_bytealign (w2[0], w2[1], offset); w3[2] = hc_bytealign (w1[3], w2[0], offset); w3[1] = hc_bytealign (w1[2], w1[3], offset); w3[0] = hc_bytealign (w1[1], w1[2], offset); w2[3] = hc_bytealign (w1[0], w1[1], offset); w2[2] = hc_bytealign (w0[3], w1[0], offset); w2[1] = hc_bytealign (w0[2], w0[3], offset); w2[0] = hc_bytealign (w0[1], w0[2], offset); w1[3] = hc_bytealign (w0[0], w0[1], offset); w1[2] = hc_bytealign ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_bytealign (w1[3], w2[0], offset); w3[2] = hc_bytealign (w1[2], w1[3], offset); w3[1] = hc_bytealign (w1[1], w1[2], offset); w3[0] = hc_bytealign (w1[0], w1[1], offset); w2[3] = hc_bytealign (w0[3], w1[0], offset); w2[2] = hc_bytealign (w0[2], w0[3], offset); w2[1] = hc_bytealign (w0[1], w0[2], offset); w2[0] = hc_bytealign (w0[0], w0[1], offset); w1[3] = hc_bytealign ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_bytealign (w1[2], w1[3], offset); w3[2] = hc_bytealign (w1[1], w1[2], offset); w3[1] = hc_bytealign (w1[0], w1[1], offset); w3[0] = hc_bytealign (w0[3], w1[0], offset); w2[3] = hc_bytealign (w0[2], w0[3], offset); w2[2] = hc_bytealign (w0[1], w0[2], offset); w2[1] = hc_bytealign (w0[0], w0[1], offset); w2[0] = hc_bytealign ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_bytealign (w1[1], w1[2], offset); w3[2] = hc_bytealign (w1[0], w1[1], offset); w3[1] = hc_bytealign (w0[3], w1[0], offset); w3[0] = hc_bytealign (w0[2], w0[3], offset); w2[3] = hc_bytealign (w0[1], w0[2], offset); w2[2] = hc_bytealign (w0[0], w0[1], offset); w2[1] = hc_bytealign ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_bytealign (w1[0], w1[1], offset); w3[2] = hc_bytealign (w0[3], w1[0], offset); w3[1] = hc_bytealign (w0[2], w0[3], offset); w3[0] = hc_bytealign (w0[1], w0[2], offset); w2[3] = hc_bytealign (w0[0], w0[1], offset); w2[2] = hc_bytealign ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_bytealign (w0[3], w1[0], offset); w3[2] = hc_bytealign (w0[2], w0[3], offset); w3[1] = hc_bytealign (w0[1], w0[2], offset); w3[0] = hc_bytealign (w0[0], w0[1], offset); w2[3] = hc_bytealign ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_bytealign (w0[2], w0[3], offset); w3[2] = hc_bytealign (w0[1], w0[2], offset); w3[1] = hc_bytealign (w0[0], w0[1], offset); w3[0] = hc_bytealign ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_bytealign (w0[1], w0[2], offset); w3[2] = hc_bytealign (w0[0], w0[1], offset); w3[1] = hc_bytealign ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_bytealign (w0[0], w0[1], offset); w3[2] = hc_bytealign ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_bytealign ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w3[3] = hc_byte_perm (w3[2], w3[3], selector); w3[2] = hc_byte_perm (w3[1], w3[2], selector); w3[1] = hc_byte_perm (w3[0], w3[1], selector); w3[0] = hc_byte_perm (w2[3], w3[0], selector); w2[3] = hc_byte_perm (w2[2], w2[3], selector); w2[2] = hc_byte_perm (w2[1], w2[2], selector); w2[1] = hc_byte_perm (w2[0], w2[1], selector); w2[0] = hc_byte_perm (w1[3], w2[0], selector); w1[3] = hc_byte_perm (w1[2], w1[3], selector); w1[2] = hc_byte_perm (w1[1], w1[2], selector); w1[1] = hc_byte_perm (w1[0], w1[1], selector); w1[0] = hc_byte_perm (w0[3], w1[0], selector); w0[3] = hc_byte_perm (w0[2], w0[3], selector); w0[2] = hc_byte_perm (w0[1], w0[2], selector); w0[1] = hc_byte_perm (w0[0], w0[1], selector); w0[0] = hc_byte_perm ( 0, w0[0], selector); break; case 1: w3[3] = hc_byte_perm (w3[1], w3[2], selector); w3[2] = hc_byte_perm (w3[0], w3[1], selector); w3[1] = hc_byte_perm (w2[3], w3[0], selector); w3[0] = hc_byte_perm (w2[2], w2[3], selector); w2[3] = hc_byte_perm (w2[1], w2[2], selector); w2[2] = hc_byte_perm (w2[0], w2[1], selector); w2[1] = hc_byte_perm (w1[3], w2[0], selector); w2[0] = hc_byte_perm (w1[2], w1[3], selector); w1[3] = hc_byte_perm (w1[1], w1[2], selector); w1[2] = hc_byte_perm (w1[0], w1[1], selector); w1[1] = hc_byte_perm (w0[3], w1[0], selector); w1[0] = hc_byte_perm (w0[2], w0[3], selector); w0[3] = hc_byte_perm (w0[1], w0[2], selector); w0[2] = hc_byte_perm (w0[0], w0[1], selector); w0[1] = hc_byte_perm ( 0, w0[0], selector); w0[0] = 0; break; case 2: w3[3] = hc_byte_perm (w3[0], w3[1], selector); w3[2] = hc_byte_perm (w2[3], w3[0], selector); w3[1] = hc_byte_perm (w2[2], w2[3], selector); w3[0] = hc_byte_perm (w2[1], w2[2], selector); w2[3] = hc_byte_perm (w2[0], w2[1], selector); w2[2] = hc_byte_perm (w1[3], w2[0], selector); w2[1] = hc_byte_perm (w1[2], w1[3], selector); w2[0] = hc_byte_perm (w1[1], w1[2], selector); w1[3] = hc_byte_perm (w1[0], w1[1], selector); w1[2] = hc_byte_perm (w0[3], w1[0], selector); w1[1] = hc_byte_perm (w0[2], w0[3], selector); w1[0] = hc_byte_perm (w0[1], w0[2], selector); w0[3] = hc_byte_perm (w0[0], w0[1], selector); w0[2] = hc_byte_perm ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_byte_perm (w2[3], w3[0], selector); w3[2] = hc_byte_perm (w2[2], w2[3], selector); w3[1] = hc_byte_perm (w2[1], w2[2], selector); w3[0] = hc_byte_perm (w2[0], w2[1], selector); w2[3] = hc_byte_perm (w1[3], w2[0], selector); w2[2] = hc_byte_perm (w1[2], w1[3], selector); w2[1] = hc_byte_perm (w1[1], w1[2], selector); w2[0] = hc_byte_perm (w1[0], w1[1], selector); w1[3] = hc_byte_perm (w0[3], w1[0], selector); w1[2] = hc_byte_perm (w0[2], w0[3], selector); w1[1] = hc_byte_perm (w0[1], w0[2], selector); w1[0] = hc_byte_perm (w0[0], w0[1], selector); w0[3] = hc_byte_perm ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_byte_perm (w2[2], w2[3], selector); w3[2] = hc_byte_perm (w2[1], w2[2], selector); w3[1] = hc_byte_perm (w2[0], w2[1], selector); w3[0] = hc_byte_perm (w1[3], w2[0], selector); w2[3] = hc_byte_perm (w1[2], w1[3], selector); w2[2] = hc_byte_perm (w1[1], w1[2], selector); w2[1] = hc_byte_perm (w1[0], w1[1], selector); w2[0] = hc_byte_perm (w0[3], w1[0], selector); w1[3] = hc_byte_perm (w0[2], w0[3], selector); w1[2] = hc_byte_perm (w0[1], w0[2], selector); w1[1] = hc_byte_perm (w0[0], w0[1], selector); w1[0] = hc_byte_perm ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_byte_perm (w2[1], w2[2], selector); w3[2] = hc_byte_perm (w2[0], w2[1], selector); w3[1] = hc_byte_perm (w1[3], w2[0], selector); w3[0] = hc_byte_perm (w1[2], w1[3], selector); w2[3] = hc_byte_perm (w1[1], w1[2], selector); w2[2] = hc_byte_perm (w1[0], w1[1], selector); w2[1] = hc_byte_perm (w0[3], w1[0], selector); w2[0] = hc_byte_perm (w0[2], w0[3], selector); w1[3] = hc_byte_perm (w0[1], w0[2], selector); w1[2] = hc_byte_perm (w0[0], w0[1], selector); w1[1] = hc_byte_perm ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_byte_perm (w2[0], w2[1], selector); w3[2] = hc_byte_perm (w1[3], w2[0], selector); w3[1] = hc_byte_perm (w1[2], w1[3], selector); w3[0] = hc_byte_perm (w1[1], w1[2], selector); w2[3] = hc_byte_perm (w1[0], w1[1], selector); w2[2] = hc_byte_perm (w0[3], w1[0], selector); w2[1] = hc_byte_perm (w0[2], w0[3], selector); w2[0] = hc_byte_perm (w0[1], w0[2], selector); w1[3] = hc_byte_perm (w0[0], w0[1], selector); w1[2] = hc_byte_perm ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_byte_perm (w1[3], w2[0], selector); w3[2] = hc_byte_perm (w1[2], w1[3], selector); w3[1] = hc_byte_perm (w1[1], w1[2], selector); w3[0] = hc_byte_perm (w1[0], w1[1], selector); w2[3] = hc_byte_perm (w0[3], w1[0], selector); w2[2] = hc_byte_perm (w0[2], w0[3], selector); w2[1] = hc_byte_perm (w0[1], w0[2], selector); w2[0] = hc_byte_perm (w0[0], w0[1], selector); w1[3] = hc_byte_perm ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_byte_perm (w1[2], w1[3], selector); w3[2] = hc_byte_perm (w1[1], w1[2], selector); w3[1] = hc_byte_perm (w1[0], w1[1], selector); w3[0] = hc_byte_perm (w0[3], w1[0], selector); w2[3] = hc_byte_perm (w0[2], w0[3], selector); w2[2] = hc_byte_perm (w0[1], w0[2], selector); w2[1] = hc_byte_perm (w0[0], w0[1], selector); w2[0] = hc_byte_perm ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_byte_perm (w1[1], w1[2], selector); w3[2] = hc_byte_perm (w1[0], w1[1], selector); w3[1] = hc_byte_perm (w0[3], w1[0], selector); w3[0] = hc_byte_perm (w0[2], w0[3], selector); w2[3] = hc_byte_perm (w0[1], w0[2], selector); w2[2] = hc_byte_perm (w0[0], w0[1], selector); w2[1] = hc_byte_perm ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_byte_perm (w1[0], w1[1], selector); w3[2] = hc_byte_perm (w0[3], w1[0], selector); w3[1] = hc_byte_perm (w0[2], w0[3], selector); w3[0] = hc_byte_perm (w0[1], w0[2], selector); w2[3] = hc_byte_perm (w0[0], w0[1], selector); w2[2] = hc_byte_perm ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_byte_perm (w0[3], w1[0], selector); w3[2] = hc_byte_perm (w0[2], w0[3], selector); w3[1] = hc_byte_perm (w0[1], w0[2], selector); w3[0] = hc_byte_perm (w0[0], w0[1], selector); w2[3] = hc_byte_perm ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_byte_perm (w0[2], w0[3], selector); w3[2] = hc_byte_perm (w0[1], w0[2], selector); w3[1] = hc_byte_perm (w0[0], w0[1], selector); w3[0] = hc_byte_perm ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_byte_perm (w0[1], w0[2], selector); w3[2] = hc_byte_perm (w0[0], w0[1], selector); w3[1] = hc_byte_perm ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_byte_perm (w0[0], w0[1], selector); w3[2] = hc_byte_perm ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_byte_perm ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign (w3[3], 0, offset); w3[3] = hc_bytealign (w3[2], w3[3], offset); w3[2] = hc_bytealign (w3[1], w3[2], offset); w3[1] = hc_bytealign (w3[0], w3[1], offset); w3[0] = hc_bytealign (w2[3], w3[0], offset); w2[3] = hc_bytealign (w2[2], w2[3], offset); w2[2] = hc_bytealign (w2[1], w2[2], offset); w2[1] = hc_bytealign (w2[0], w2[1], offset); w2[0] = hc_bytealign (w1[3], w2[0], offset); w1[3] = hc_bytealign (w1[2], w1[3], offset); w1[2] = hc_bytealign (w1[1], w1[2], offset); w1[1] = hc_bytealign (w1[0], w1[1], offset); w1[0] = hc_bytealign (w0[3], w1[0], offset); w0[3] = hc_bytealign (w0[2], w0[3], offset); w0[2] = hc_bytealign (w0[1], w0[2], offset); w0[1] = hc_bytealign (w0[0], w0[1], offset); w0[0] = hc_bytealign ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign (w3[3], 0, offset); c0[0] = hc_bytealign (w3[2], w3[3], offset); w3[3] = hc_bytealign (w3[1], w3[2], offset); w3[2] = hc_bytealign (w3[0], w3[1], offset); w3[1] = hc_bytealign (w2[3], w3[0], offset); w3[0] = hc_bytealign (w2[2], w2[3], offset); w2[3] = hc_bytealign (w2[1], w2[2], offset); w2[2] = hc_bytealign (w2[0], w2[1], offset); w2[1] = hc_bytealign (w1[3], w2[0], offset); w2[0] = hc_bytealign (w1[2], w1[3], offset); w1[3] = hc_bytealign (w1[1], w1[2], offset); w1[2] = hc_bytealign (w1[0], w1[1], offset); w1[1] = hc_bytealign (w0[3], w1[0], offset); w1[0] = hc_bytealign (w0[2], w0[3], offset); w0[3] = hc_bytealign (w0[1], w0[2], offset); w0[2] = hc_bytealign (w0[0], w0[1], offset); w0[1] = hc_bytealign ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign (w3[3], 0, offset); c0[1] = hc_bytealign (w3[2], w3[3], offset); c0[0] = hc_bytealign (w3[1], w3[2], offset); w3[3] = hc_bytealign (w3[0], w3[1], offset); w3[2] = hc_bytealign (w2[3], w3[0], offset); w3[1] = hc_bytealign (w2[2], w2[3], offset); w3[0] = hc_bytealign (w2[1], w2[2], offset); w2[3] = hc_bytealign (w2[0], w2[1], offset); w2[2] = hc_bytealign (w1[3], w2[0], offset); w2[1] = hc_bytealign (w1[2], w1[3], offset); w2[0] = hc_bytealign (w1[1], w1[2], offset); w1[3] = hc_bytealign (w1[0], w1[1], offset); w1[2] = hc_bytealign (w0[3], w1[0], offset); w1[1] = hc_bytealign (w0[2], w0[3], offset); w1[0] = hc_bytealign (w0[1], w0[2], offset); w0[3] = hc_bytealign (w0[0], w0[1], offset); w0[2] = hc_bytealign ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign (w3[3], 0, offset); c0[2] = hc_bytealign (w3[2], w3[3], offset); c0[1] = hc_bytealign (w3[1], w3[2], offset); c0[0] = hc_bytealign (w3[0], w3[1], offset); w3[3] = hc_bytealign (w2[3], w3[0], offset); w3[2] = hc_bytealign (w2[2], w2[3], offset); w3[1] = hc_bytealign (w2[1], w2[2], offset); w3[0] = hc_bytealign (w2[0], w2[1], offset); w2[3] = hc_bytealign (w1[3], w2[0], offset); w2[2] = hc_bytealign (w1[2], w1[3], offset); w2[1] = hc_bytealign (w1[1], w1[2], offset); w2[0] = hc_bytealign (w1[0], w1[1], offset); w1[3] = hc_bytealign (w0[3], w1[0], offset); w1[2] = hc_bytealign (w0[2], w0[3], offset); w1[1] = hc_bytealign (w0[1], w0[2], offset); w1[0] = hc_bytealign (w0[0], w0[1], offset); w0[3] = hc_bytealign ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign (w3[3], 0, offset); c0[3] = hc_bytealign (w3[2], w3[3], offset); c0[2] = hc_bytealign (w3[1], w3[2], offset); c0[1] = hc_bytealign (w3[0], w3[1], offset); c0[0] = hc_bytealign (w2[3], w3[0], offset); w3[3] = hc_bytealign (w2[2], w2[3], offset); w3[2] = hc_bytealign (w2[1], w2[2], offset); w3[1] = hc_bytealign (w2[0], w2[1], offset); w3[0] = hc_bytealign (w1[3], w2[0], offset); w2[3] = hc_bytealign (w1[2], w1[3], offset); w2[2] = hc_bytealign (w1[1], w1[2], offset); w2[1] = hc_bytealign (w1[0], w1[1], offset); w2[0] = hc_bytealign (w0[3], w1[0], offset); w1[3] = hc_bytealign (w0[2], w0[3], offset); w1[2] = hc_bytealign (w0[1], w0[2], offset); w1[1] = hc_bytealign (w0[0], w0[1], offset); w1[0] = hc_bytealign ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign (w3[3], 0, offset); c1[0] = hc_bytealign (w3[2], w3[3], offset); c0[3] = hc_bytealign (w3[1], w3[2], offset); c0[2] = hc_bytealign (w3[0], w3[1], offset); c0[1] = hc_bytealign (w2[3], w3[0], offset); c0[0] = hc_bytealign (w2[2], w2[3], offset); w3[3] = hc_bytealign (w2[1], w2[2], offset); w3[2] = hc_bytealign (w2[0], w2[1], offset); w3[1] = hc_bytealign (w1[3], w2[0], offset); w3[0] = hc_bytealign (w1[2], w1[3], offset); w2[3] = hc_bytealign (w1[1], w1[2], offset); w2[2] = hc_bytealign (w1[0], w1[1], offset); w2[1] = hc_bytealign (w0[3], w1[0], offset); w2[0] = hc_bytealign (w0[2], w0[3], offset); w1[3] = hc_bytealign (w0[1], w0[2], offset); w1[2] = hc_bytealign (w0[0], w0[1], offset); w1[1] = hc_bytealign ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign (w3[3], 0, offset); c1[1] = hc_bytealign (w3[2], w3[3], offset); c1[0] = hc_bytealign (w3[1], w3[2], offset); c0[3] = hc_bytealign (w3[0], w3[1], offset); c0[2] = hc_bytealign (w2[3], w3[0], offset); c0[1] = hc_bytealign (w2[2], w2[3], offset); c0[0] = hc_bytealign (w2[1], w2[2], offset); w3[3] = hc_bytealign (w2[0], w2[1], offset); w3[2] = hc_bytealign (w1[3], w2[0], offset); w3[1] = hc_bytealign (w1[2], w1[3], offset); w3[0] = hc_bytealign (w1[1], w1[2], offset); w2[3] = hc_bytealign (w1[0], w1[1], offset); w2[2] = hc_bytealign (w0[3], w1[0], offset); w2[1] = hc_bytealign (w0[2], w0[3], offset); w2[0] = hc_bytealign (w0[1], w0[2], offset); w1[3] = hc_bytealign (w0[0], w0[1], offset); w1[2] = hc_bytealign ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign (w3[3], 0, offset); c1[2] = hc_bytealign (w3[2], w3[3], offset); c1[1] = hc_bytealign (w3[1], w3[2], offset); c1[0] = hc_bytealign (w3[0], w3[1], offset); c0[3] = hc_bytealign (w2[3], w3[0], offset); c0[2] = hc_bytealign (w2[2], w2[3], offset); c0[1] = hc_bytealign (w2[1], w2[2], offset); c0[0] = hc_bytealign (w2[0], w2[1], offset); w3[3] = hc_bytealign (w1[3], w2[0], offset); w3[2] = hc_bytealign (w1[2], w1[3], offset); w3[1] = hc_bytealign (w1[1], w1[2], offset); w3[0] = hc_bytealign (w1[0], w1[1], offset); w2[3] = hc_bytealign (w0[3], w1[0], offset); w2[2] = hc_bytealign (w0[2], w0[3], offset); w2[1] = hc_bytealign (w0[1], w0[2], offset); w2[0] = hc_bytealign (w0[0], w0[1], offset); w1[3] = hc_bytealign ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign (w3[3], 0, offset); c1[3] = hc_bytealign (w3[2], w3[3], offset); c1[2] = hc_bytealign (w3[1], w3[2], offset); c1[1] = hc_bytealign (w3[0], w3[1], offset); c1[0] = hc_bytealign (w2[3], w3[0], offset); c0[3] = hc_bytealign (w2[2], w2[3], offset); c0[2] = hc_bytealign (w2[1], w2[2], offset); c0[1] = hc_bytealign (w2[0], w2[1], offset); c0[0] = hc_bytealign (w1[3], w2[0], offset); w3[3] = hc_bytealign (w1[2], w1[3], offset); w3[2] = hc_bytealign (w1[1], w1[2], offset); w3[1] = hc_bytealign (w1[0], w1[1], offset); w3[0] = hc_bytealign (w0[3], w1[0], offset); w2[3] = hc_bytealign (w0[2], w0[3], offset); w2[2] = hc_bytealign (w0[1], w0[2], offset); w2[1] = hc_bytealign (w0[0], w0[1], offset); w2[0] = hc_bytealign ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign (w3[3], 0, offset); c2[0] = hc_bytealign (w3[2], w3[3], offset); c1[3] = hc_bytealign (w3[1], w3[2], offset); c1[2] = hc_bytealign (w3[0], w3[1], offset); c1[1] = hc_bytealign (w2[3], w3[0], offset); c1[0] = hc_bytealign (w2[2], w2[3], offset); c0[3] = hc_bytealign (w2[1], w2[2], offset); c0[2] = hc_bytealign (w2[0], w2[1], offset); c0[1] = hc_bytealign (w1[3], w2[0], offset); c0[0] = hc_bytealign (w1[2], w1[3], offset); w3[3] = hc_bytealign (w1[1], w1[2], offset); w3[2] = hc_bytealign (w1[0], w1[1], offset); w3[1] = hc_bytealign (w0[3], w1[0], offset); w3[0] = hc_bytealign (w0[2], w0[3], offset); w2[3] = hc_bytealign (w0[1], w0[2], offset); w2[2] = hc_bytealign (w0[0], w0[1], offset); w2[1] = hc_bytealign ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign (w3[3], 0, offset); c2[1] = hc_bytealign (w3[2], w3[3], offset); c2[0] = hc_bytealign (w3[1], w3[2], offset); c1[3] = hc_bytealign (w3[0], w3[1], offset); c1[2] = hc_bytealign (w2[3], w3[0], offset); c1[1] = hc_bytealign (w2[2], w2[3], offset); c1[0] = hc_bytealign (w2[1], w2[2], offset); c0[3] = hc_bytealign (w2[0], w2[1], offset); c0[2] = hc_bytealign (w1[3], w2[0], offset); c0[1] = hc_bytealign (w1[2], w1[3], offset); c0[0] = hc_bytealign (w1[1], w1[2], offset); w3[3] = hc_bytealign (w1[0], w1[1], offset); w3[2] = hc_bytealign (w0[3], w1[0], offset); w3[1] = hc_bytealign (w0[2], w0[3], offset); w3[0] = hc_bytealign (w0[1], w0[2], offset); w2[3] = hc_bytealign (w0[0], w0[1], offset); w2[2] = hc_bytealign ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign (w3[3], 0, offset); c2[2] = hc_bytealign (w3[2], w3[3], offset); c2[1] = hc_bytealign (w3[1], w3[2], offset); c2[0] = hc_bytealign (w3[0], w3[1], offset); c1[3] = hc_bytealign (w2[3], w3[0], offset); c1[2] = hc_bytealign (w2[2], w2[3], offset); c1[1] = hc_bytealign (w2[1], w2[2], offset); c1[0] = hc_bytealign (w2[0], w2[1], offset); c0[3] = hc_bytealign (w1[3], w2[0], offset); c0[2] = hc_bytealign (w1[2], w1[3], offset); c0[1] = hc_bytealign (w1[1], w1[2], offset); c0[0] = hc_bytealign (w1[0], w1[1], offset); w3[3] = hc_bytealign (w0[3], w1[0], offset); w3[2] = hc_bytealign (w0[2], w0[3], offset); w3[1] = hc_bytealign (w0[1], w0[2], offset); w3[0] = hc_bytealign (w0[0], w0[1], offset); w2[3] = hc_bytealign ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign (w3[3], 0, offset); c2[3] = hc_bytealign (w3[2], w3[3], offset); c2[2] = hc_bytealign (w3[1], w3[2], offset); c2[1] = hc_bytealign (w3[0], w3[1], offset); c2[0] = hc_bytealign (w2[3], w3[0], offset); c1[3] = hc_bytealign (w2[2], w2[3], offset); c1[2] = hc_bytealign (w2[1], w2[2], offset); c1[1] = hc_bytealign (w2[0], w2[1], offset); c1[0] = hc_bytealign (w1[3], w2[0], offset); c0[3] = hc_bytealign (w1[2], w1[3], offset); c0[2] = hc_bytealign (w1[1], w1[2], offset); c0[1] = hc_bytealign (w1[0], w1[1], offset); c0[0] = hc_bytealign (w0[3], w1[0], offset); w3[3] = hc_bytealign (w0[2], w0[3], offset); w3[2] = hc_bytealign (w0[1], w0[2], offset); w3[1] = hc_bytealign (w0[0], w0[1], offset); w3[0] = hc_bytealign ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign (w3[3], 0, offset); c3[0] = hc_bytealign (w3[2], w3[3], offset); c2[3] = hc_bytealign (w3[1], w3[2], offset); c2[2] = hc_bytealign (w3[0], w3[1], offset); c2[1] = hc_bytealign (w2[3], w3[0], offset); c2[0] = hc_bytealign (w2[2], w2[3], offset); c1[3] = hc_bytealign (w2[1], w2[2], offset); c1[2] = hc_bytealign (w2[0], w2[1], offset); c1[1] = hc_bytealign (w1[3], w2[0], offset); c1[0] = hc_bytealign (w1[2], w1[3], offset); c0[3] = hc_bytealign (w1[1], w1[2], offset); c0[2] = hc_bytealign (w1[0], w1[1], offset); c0[1] = hc_bytealign (w0[3], w1[0], offset); c0[0] = hc_bytealign (w0[2], w0[3], offset); w3[3] = hc_bytealign (w0[1], w0[2], offset); w3[2] = hc_bytealign (w0[0], w0[1], offset); w3[1] = hc_bytealign ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign (w3[3], 0, offset); c3[1] = hc_bytealign (w3[2], w3[3], offset); c3[0] = hc_bytealign (w3[1], w3[2], offset); c2[3] = hc_bytealign (w3[0], w3[1], offset); c2[2] = hc_bytealign (w2[3], w3[0], offset); c2[1] = hc_bytealign (w2[2], w2[3], offset); c2[0] = hc_bytealign (w2[1], w2[2], offset); c1[3] = hc_bytealign (w2[0], w2[1], offset); c1[2] = hc_bytealign (w1[3], w2[0], offset); c1[1] = hc_bytealign (w1[2], w1[3], offset); c1[0] = hc_bytealign (w1[1], w1[2], offset); c0[3] = hc_bytealign (w1[0], w1[1], offset); c0[2] = hc_bytealign (w0[3], w1[0], offset); c0[1] = hc_bytealign (w0[2], w0[3], offset); c0[0] = hc_bytealign (w0[1], w0[2], offset); w3[3] = hc_bytealign (w0[0], w0[1], offset); w3[2] = hc_bytealign ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign (w3[3], 0, offset); c3[2] = hc_bytealign (w3[2], w3[3], offset); c3[1] = hc_bytealign (w3[1], w3[2], offset); c3[0] = hc_bytealign (w3[0], w3[1], offset); c2[3] = hc_bytealign (w2[3], w3[0], offset); c2[2] = hc_bytealign (w2[2], w2[3], offset); c2[1] = hc_bytealign (w2[1], w2[2], offset); c2[0] = hc_bytealign (w2[0], w2[1], offset); c1[3] = hc_bytealign (w1[3], w2[0], offset); c1[2] = hc_bytealign (w1[2], w1[3], offset); c1[1] = hc_bytealign (w1[1], w1[2], offset); c1[0] = hc_bytealign (w1[0], w1[1], offset); c0[3] = hc_bytealign (w0[3], w1[0], offset); c0[2] = hc_bytealign (w0[2], w0[3], offset); c0[1] = hc_bytealign (w0[1], w0[2], offset); c0[0] = hc_bytealign (w0[0], w0[1], offset); w3[3] = hc_bytealign ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #ifdef IS_NV // atm only same code as for AMD, but could be improved switch (offset_switch) { case 0: c0[0] = hc_bytealign (w3[3], 0, offset); w3[3] = hc_bytealign (w3[2], w3[3], offset); w3[2] = hc_bytealign (w3[1], w3[2], offset); w3[1] = hc_bytealign (w3[0], w3[1], offset); w3[0] = hc_bytealign (w2[3], w3[0], offset); w2[3] = hc_bytealign (w2[2], w2[3], offset); w2[2] = hc_bytealign (w2[1], w2[2], offset); w2[1] = hc_bytealign (w2[0], w2[1], offset); w2[0] = hc_bytealign (w1[3], w2[0], offset); w1[3] = hc_bytealign (w1[2], w1[3], offset); w1[2] = hc_bytealign (w1[1], w1[2], offset); w1[1] = hc_bytealign (w1[0], w1[1], offset); w1[0] = hc_bytealign (w0[3], w1[0], offset); w0[3] = hc_bytealign (w0[2], w0[3], offset); w0[2] = hc_bytealign (w0[1], w0[2], offset); w0[1] = hc_bytealign (w0[0], w0[1], offset); w0[0] = hc_bytealign ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign (w3[3], 0, offset); c0[0] = hc_bytealign (w3[2], w3[3], offset); w3[3] = hc_bytealign (w3[1], w3[2], offset); w3[2] = hc_bytealign (w3[0], w3[1], offset); w3[1] = hc_bytealign (w2[3], w3[0], offset); w3[0] = hc_bytealign (w2[2], w2[3], offset); w2[3] = hc_bytealign (w2[1], w2[2], offset); w2[2] = hc_bytealign (w2[0], w2[1], offset); w2[1] = hc_bytealign (w1[3], w2[0], offset); w2[0] = hc_bytealign (w1[2], w1[3], offset); w1[3] = hc_bytealign (w1[1], w1[2], offset); w1[2] = hc_bytealign (w1[0], w1[1], offset); w1[1] = hc_bytealign (w0[3], w1[0], offset); w1[0] = hc_bytealign (w0[2], w0[3], offset); w0[3] = hc_bytealign (w0[1], w0[2], offset); w0[2] = hc_bytealign (w0[0], w0[1], offset); w0[1] = hc_bytealign ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign (w3[3], 0, offset); c0[1] = hc_bytealign (w3[2], w3[3], offset); c0[0] = hc_bytealign (w3[1], w3[2], offset); w3[3] = hc_bytealign (w3[0], w3[1], offset); w3[2] = hc_bytealign (w2[3], w3[0], offset); w3[1] = hc_bytealign (w2[2], w2[3], offset); w3[0] = hc_bytealign (w2[1], w2[2], offset); w2[3] = hc_bytealign (w2[0], w2[1], offset); w2[2] = hc_bytealign (w1[3], w2[0], offset); w2[1] = hc_bytealign (w1[2], w1[3], offset); w2[0] = hc_bytealign (w1[1], w1[2], offset); w1[3] = hc_bytealign (w1[0], w1[1], offset); w1[2] = hc_bytealign (w0[3], w1[0], offset); w1[1] = hc_bytealign (w0[2], w0[3], offset); w1[0] = hc_bytealign (w0[1], w0[2], offset); w0[3] = hc_bytealign (w0[0], w0[1], offset); w0[2] = hc_bytealign ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign (w3[3], 0, offset); c0[2] = hc_bytealign (w3[2], w3[3], offset); c0[1] = hc_bytealign (w3[1], w3[2], offset); c0[0] = hc_bytealign (w3[0], w3[1], offset); w3[3] = hc_bytealign (w2[3], w3[0], offset); w3[2] = hc_bytealign (w2[2], w2[3], offset); w3[1] = hc_bytealign (w2[1], w2[2], offset); w3[0] = hc_bytealign (w2[0], w2[1], offset); w2[3] = hc_bytealign (w1[3], w2[0], offset); w2[2] = hc_bytealign (w1[2], w1[3], offset); w2[1] = hc_bytealign (w1[1], w1[2], offset); w2[0] = hc_bytealign (w1[0], w1[1], offset); w1[3] = hc_bytealign (w0[3], w1[0], offset); w1[2] = hc_bytealign (w0[2], w0[3], offset); w1[1] = hc_bytealign (w0[1], w0[2], offset); w1[0] = hc_bytealign (w0[0], w0[1], offset); w0[3] = hc_bytealign ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign (w3[3], 0, offset); c0[3] = hc_bytealign (w3[2], w3[3], offset); c0[2] = hc_bytealign (w3[1], w3[2], offset); c0[1] = hc_bytealign (w3[0], w3[1], offset); c0[0] = hc_bytealign (w2[3], w3[0], offset); w3[3] = hc_bytealign (w2[2], w2[3], offset); w3[2] = hc_bytealign (w2[1], w2[2], offset); w3[1] = hc_bytealign (w2[0], w2[1], offset); w3[0] = hc_bytealign (w1[3], w2[0], offset); w2[3] = hc_bytealign (w1[2], w1[3], offset); w2[2] = hc_bytealign (w1[1], w1[2], offset); w2[1] = hc_bytealign (w1[0], w1[1], offset); w2[0] = hc_bytealign (w0[3], w1[0], offset); w1[3] = hc_bytealign (w0[2], w0[3], offset); w1[2] = hc_bytealign (w0[1], w0[2], offset); w1[1] = hc_bytealign (w0[0], w0[1], offset); w1[0] = hc_bytealign ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign (w3[3], 0, offset); c1[0] = hc_bytealign (w3[2], w3[3], offset); c0[3] = hc_bytealign (w3[1], w3[2], offset); c0[2] = hc_bytealign (w3[0], w3[1], offset); c0[1] = hc_bytealign (w2[3], w3[0], offset); c0[0] = hc_bytealign (w2[2], w2[3], offset); w3[3] = hc_bytealign (w2[1], w2[2], offset); w3[2] = hc_bytealign (w2[0], w2[1], offset); w3[1] = hc_bytealign (w1[3], w2[0], offset); w3[0] = hc_bytealign (w1[2], w1[3], offset); w2[3] = hc_bytealign (w1[1], w1[2], offset); w2[2] = hc_bytealign (w1[0], w1[1], offset); w2[1] = hc_bytealign (w0[3], w1[0], offset); w2[0] = hc_bytealign (w0[2], w0[3], offset); w1[3] = hc_bytealign (w0[1], w0[2], offset); w1[2] = hc_bytealign (w0[0], w0[1], offset); w1[1] = hc_bytealign ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign (w3[3], 0, offset); c1[1] = hc_bytealign (w3[2], w3[3], offset); c1[0] = hc_bytealign (w3[1], w3[2], offset); c0[3] = hc_bytealign (w3[0], w3[1], offset); c0[2] = hc_bytealign (w2[3], w3[0], offset); c0[1] = hc_bytealign (w2[2], w2[3], offset); c0[0] = hc_bytealign (w2[1], w2[2], offset); w3[3] = hc_bytealign (w2[0], w2[1], offset); w3[2] = hc_bytealign (w1[3], w2[0], offset); w3[1] = hc_bytealign (w1[2], w1[3], offset); w3[0] = hc_bytealign (w1[1], w1[2], offset); w2[3] = hc_bytealign (w1[0], w1[1], offset); w2[2] = hc_bytealign (w0[3], w1[0], offset); w2[1] = hc_bytealign (w0[2], w0[3], offset); w2[0] = hc_bytealign (w0[1], w0[2], offset); w1[3] = hc_bytealign (w0[0], w0[1], offset); w1[2] = hc_bytealign ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign (w3[3], 0, offset); c1[2] = hc_bytealign (w3[2], w3[3], offset); c1[1] = hc_bytealign (w3[1], w3[2], offset); c1[0] = hc_bytealign (w3[0], w3[1], offset); c0[3] = hc_bytealign (w2[3], w3[0], offset); c0[2] = hc_bytealign (w2[2], w2[3], offset); c0[1] = hc_bytealign (w2[1], w2[2], offset); c0[0] = hc_bytealign (w2[0], w2[1], offset); w3[3] = hc_bytealign (w1[3], w2[0], offset); w3[2] = hc_bytealign (w1[2], w1[3], offset); w3[1] = hc_bytealign (w1[1], w1[2], offset); w3[0] = hc_bytealign (w1[0], w1[1], offset); w2[3] = hc_bytealign (w0[3], w1[0], offset); w2[2] = hc_bytealign (w0[2], w0[3], offset); w2[1] = hc_bytealign (w0[1], w0[2], offset); w2[0] = hc_bytealign (w0[0], w0[1], offset); w1[3] = hc_bytealign ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign (w3[3], 0, offset); c1[3] = hc_bytealign (w3[2], w3[3], offset); c1[2] = hc_bytealign (w3[1], w3[2], offset); c1[1] = hc_bytealign (w3[0], w3[1], offset); c1[0] = hc_bytealign (w2[3], w3[0], offset); c0[3] = hc_bytealign (w2[2], w2[3], offset); c0[2] = hc_bytealign (w2[1], w2[2], offset); c0[1] = hc_bytealign (w2[0], w2[1], offset); c0[0] = hc_bytealign (w1[3], w2[0], offset); w3[3] = hc_bytealign (w1[2], w1[3], offset); w3[2] = hc_bytealign (w1[1], w1[2], offset); w3[1] = hc_bytealign (w1[0], w1[1], offset); w3[0] = hc_bytealign (w0[3], w1[0], offset); w2[3] = hc_bytealign (w0[2], w0[3], offset); w2[2] = hc_bytealign (w0[1], w0[2], offset); w2[1] = hc_bytealign (w0[0], w0[1], offset); w2[0] = hc_bytealign ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign (w3[3], 0, offset); c2[0] = hc_bytealign (w3[2], w3[3], offset); c1[3] = hc_bytealign (w3[1], w3[2], offset); c1[2] = hc_bytealign (w3[0], w3[1], offset); c1[1] = hc_bytealign (w2[3], w3[0], offset); c1[0] = hc_bytealign (w2[2], w2[3], offset); c0[3] = hc_bytealign (w2[1], w2[2], offset); c0[2] = hc_bytealign (w2[0], w2[1], offset); c0[1] = hc_bytealign (w1[3], w2[0], offset); c0[0] = hc_bytealign (w1[2], w1[3], offset); w3[3] = hc_bytealign (w1[1], w1[2], offset); w3[2] = hc_bytealign (w1[0], w1[1], offset); w3[1] = hc_bytealign (w0[3], w1[0], offset); w3[0] = hc_bytealign (w0[2], w0[3], offset); w2[3] = hc_bytealign (w0[1], w0[2], offset); w2[2] = hc_bytealign (w0[0], w0[1], offset); w2[1] = hc_bytealign ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign (w3[3], 0, offset); c2[1] = hc_bytealign (w3[2], w3[3], offset); c2[0] = hc_bytealign (w3[1], w3[2], offset); c1[3] = hc_bytealign (w3[0], w3[1], offset); c1[2] = hc_bytealign (w2[3], w3[0], offset); c1[1] = hc_bytealign (w2[2], w2[3], offset); c1[0] = hc_bytealign (w2[1], w2[2], offset); c0[3] = hc_bytealign (w2[0], w2[1], offset); c0[2] = hc_bytealign (w1[3], w2[0], offset); c0[1] = hc_bytealign (w1[2], w1[3], offset); c0[0] = hc_bytealign (w1[1], w1[2], offset); w3[3] = hc_bytealign (w1[0], w1[1], offset); w3[2] = hc_bytealign (w0[3], w1[0], offset); w3[1] = hc_bytealign (w0[2], w0[3], offset); w3[0] = hc_bytealign (w0[1], w0[2], offset); w2[3] = hc_bytealign (w0[0], w0[1], offset); w2[2] = hc_bytealign ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign (w3[3], 0, offset); c2[2] = hc_bytealign (w3[2], w3[3], offset); c2[1] = hc_bytealign (w3[1], w3[2], offset); c2[0] = hc_bytealign (w3[0], w3[1], offset); c1[3] = hc_bytealign (w2[3], w3[0], offset); c1[2] = hc_bytealign (w2[2], w2[3], offset); c1[1] = hc_bytealign (w2[1], w2[2], offset); c1[0] = hc_bytealign (w2[0], w2[1], offset); c0[3] = hc_bytealign (w1[3], w2[0], offset); c0[2] = hc_bytealign (w1[2], w1[3], offset); c0[1] = hc_bytealign (w1[1], w1[2], offset); c0[0] = hc_bytealign (w1[0], w1[1], offset); w3[3] = hc_bytealign (w0[3], w1[0], offset); w3[2] = hc_bytealign (w0[2], w0[3], offset); w3[1] = hc_bytealign (w0[1], w0[2], offset); w3[0] = hc_bytealign (w0[0], w0[1], offset); w2[3] = hc_bytealign ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign (w3[3], 0, offset); c2[3] = hc_bytealign (w3[2], w3[3], offset); c2[2] = hc_bytealign (w3[1], w3[2], offset); c2[1] = hc_bytealign (w3[0], w3[1], offset); c2[0] = hc_bytealign (w2[3], w3[0], offset); c1[3] = hc_bytealign (w2[2], w2[3], offset); c1[2] = hc_bytealign (w2[1], w2[2], offset); c1[1] = hc_bytealign (w2[0], w2[1], offset); c1[0] = hc_bytealign (w1[3], w2[0], offset); c0[3] = hc_bytealign (w1[2], w1[3], offset); c0[2] = hc_bytealign (w1[1], w1[2], offset); c0[1] = hc_bytealign (w1[0], w1[1], offset); c0[0] = hc_bytealign (w0[3], w1[0], offset); w3[3] = hc_bytealign (w0[2], w0[3], offset); w3[2] = hc_bytealign (w0[1], w0[2], offset); w3[1] = hc_bytealign (w0[0], w0[1], offset); w3[0] = hc_bytealign ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign (w3[3], 0, offset); c3[0] = hc_bytealign (w3[2], w3[3], offset); c2[3] = hc_bytealign (w3[1], w3[2], offset); c2[2] = hc_bytealign (w3[0], w3[1], offset); c2[1] = hc_bytealign (w2[3], w3[0], offset); c2[0] = hc_bytealign (w2[2], w2[3], offset); c1[3] = hc_bytealign (w2[1], w2[2], offset); c1[2] = hc_bytealign (w2[0], w2[1], offset); c1[1] = hc_bytealign (w1[3], w2[0], offset); c1[0] = hc_bytealign (w1[2], w1[3], offset); c0[3] = hc_bytealign (w1[1], w1[2], offset); c0[2] = hc_bytealign (w1[0], w1[1], offset); c0[1] = hc_bytealign (w0[3], w1[0], offset); c0[0] = hc_bytealign (w0[2], w0[3], offset); w3[3] = hc_bytealign (w0[1], w0[2], offset); w3[2] = hc_bytealign (w0[0], w0[1], offset); w3[1] = hc_bytealign ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign (w3[3], 0, offset); c3[1] = hc_bytealign (w3[2], w3[3], offset); c3[0] = hc_bytealign (w3[1], w3[2], offset); c2[3] = hc_bytealign (w3[0], w3[1], offset); c2[2] = hc_bytealign (w2[3], w3[0], offset); c2[1] = hc_bytealign (w2[2], w2[3], offset); c2[0] = hc_bytealign (w2[1], w2[2], offset); c1[3] = hc_bytealign (w2[0], w2[1], offset); c1[2] = hc_bytealign (w1[3], w2[0], offset); c1[1] = hc_bytealign (w1[2], w1[3], offset); c1[0] = hc_bytealign (w1[1], w1[2], offset); c0[3] = hc_bytealign (w1[0], w1[1], offset); c0[2] = hc_bytealign (w0[3], w1[0], offset); c0[1] = hc_bytealign (w0[2], w0[3], offset); c0[0] = hc_bytealign (w0[1], w0[2], offset); w3[3] = hc_bytealign (w0[0], w0[1], offset); w3[2] = hc_bytealign ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign (w3[3], 0, offset); c3[2] = hc_bytealign (w3[2], w3[3], offset); c3[1] = hc_bytealign (w3[1], w3[2], offset); c3[0] = hc_bytealign (w3[0], w3[1], offset); c2[3] = hc_bytealign (w2[3], w3[0], offset); c2[2] = hc_bytealign (w2[2], w2[3], offset); c2[1] = hc_bytealign (w2[1], w2[2], offset); c2[0] = hc_bytealign (w2[0], w2[1], offset); c1[3] = hc_bytealign (w1[3], w2[0], offset); c1[2] = hc_bytealign (w1[2], w1[3], offset); c1[1] = hc_bytealign (w1[1], w1[2], offset); c1[0] = hc_bytealign (w1[0], w1[1], offset); c0[3] = hc_bytealign (w0[3], w1[0], offset); c0[2] = hc_bytealign (w0[2], w0[3], offset); c0[1] = hc_bytealign (w0[1], w0[2], offset); c0[0] = hc_bytealign (w0[0], w0[1], offset); w3[3] = hc_bytealign ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w3[3] = hc_bytealign_be (w3[2], w3[3], offset); w3[2] = hc_bytealign_be (w3[1], w3[2], offset); w3[1] = hc_bytealign_be (w3[0], w3[1], offset); w3[0] = hc_bytealign_be (w2[3], w3[0], offset); w2[3] = hc_bytealign_be (w2[2], w2[3], offset); w2[2] = hc_bytealign_be (w2[1], w2[2], offset); w2[1] = hc_bytealign_be (w2[0], w2[1], offset); w2[0] = hc_bytealign_be (w1[3], w2[0], offset); w1[3] = hc_bytealign_be (w1[2], w1[3], offset); w1[2] = hc_bytealign_be (w1[1], w1[2], offset); w1[1] = hc_bytealign_be (w1[0], w1[1], offset); w1[0] = hc_bytealign_be (w0[3], w1[0], offset); w0[3] = hc_bytealign_be (w0[2], w0[3], offset); w0[2] = hc_bytealign_be (w0[1], w0[2], offset); w0[1] = hc_bytealign_be (w0[0], w0[1], offset); w0[0] = hc_bytealign_be ( 0, w0[0], offset); break; case 1: w3[3] = hc_bytealign_be (w3[1], w3[2], offset); w3[2] = hc_bytealign_be (w3[0], w3[1], offset); w3[1] = hc_bytealign_be (w2[3], w3[0], offset); w3[0] = hc_bytealign_be (w2[2], w2[3], offset); w2[3] = hc_bytealign_be (w2[1], w2[2], offset); w2[2] = hc_bytealign_be (w2[0], w2[1], offset); w2[1] = hc_bytealign_be (w1[3], w2[0], offset); w2[0] = hc_bytealign_be (w1[2], w1[3], offset); w1[3] = hc_bytealign_be (w1[1], w1[2], offset); w1[2] = hc_bytealign_be (w1[0], w1[1], offset); w1[1] = hc_bytealign_be (w0[3], w1[0], offset); w1[0] = hc_bytealign_be (w0[2], w0[3], offset); w0[3] = hc_bytealign_be (w0[1], w0[2], offset); w0[2] = hc_bytealign_be (w0[0], w0[1], offset); w0[1] = hc_bytealign_be ( 0, w0[0], offset); w0[0] = 0; break; case 2: w3[3] = hc_bytealign_be (w3[0], w3[1], offset); w3[2] = hc_bytealign_be (w2[3], w3[0], offset); w3[1] = hc_bytealign_be (w2[2], w2[3], offset); w3[0] = hc_bytealign_be (w2[1], w2[2], offset); w2[3] = hc_bytealign_be (w2[0], w2[1], offset); w2[2] = hc_bytealign_be (w1[3], w2[0], offset); w2[1] = hc_bytealign_be (w1[2], w1[3], offset); w2[0] = hc_bytealign_be (w1[1], w1[2], offset); w1[3] = hc_bytealign_be (w1[0], w1[1], offset); w1[2] = hc_bytealign_be (w0[3], w1[0], offset); w1[1] = hc_bytealign_be (w0[2], w0[3], offset); w1[0] = hc_bytealign_be (w0[1], w0[2], offset); w0[3] = hc_bytealign_be (w0[0], w0[1], offset); w0[2] = hc_bytealign_be ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_bytealign_be (w2[3], w3[0], offset); w3[2] = hc_bytealign_be (w2[2], w2[3], offset); w3[1] = hc_bytealign_be (w2[1], w2[2], offset); w3[0] = hc_bytealign_be (w2[0], w2[1], offset); w2[3] = hc_bytealign_be (w1[3], w2[0], offset); w2[2] = hc_bytealign_be (w1[2], w1[3], offset); w2[1] = hc_bytealign_be (w1[1], w1[2], offset); w2[0] = hc_bytealign_be (w1[0], w1[1], offset); w1[3] = hc_bytealign_be (w0[3], w1[0], offset); w1[2] = hc_bytealign_be (w0[2], w0[3], offset); w1[1] = hc_bytealign_be (w0[1], w0[2], offset); w1[0] = hc_bytealign_be (w0[0], w0[1], offset); w0[3] = hc_bytealign_be ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_bytealign_be (w2[2], w2[3], offset); w3[2] = hc_bytealign_be (w2[1], w2[2], offset); w3[1] = hc_bytealign_be (w2[0], w2[1], offset); w3[0] = hc_bytealign_be (w1[3], w2[0], offset); w2[3] = hc_bytealign_be (w1[2], w1[3], offset); w2[2] = hc_bytealign_be (w1[1], w1[2], offset); w2[1] = hc_bytealign_be (w1[0], w1[1], offset); w2[0] = hc_bytealign_be (w0[3], w1[0], offset); w1[3] = hc_bytealign_be (w0[2], w0[3], offset); w1[2] = hc_bytealign_be (w0[1], w0[2], offset); w1[1] = hc_bytealign_be (w0[0], w0[1], offset); w1[0] = hc_bytealign_be ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_bytealign_be (w2[1], w2[2], offset); w3[2] = hc_bytealign_be (w2[0], w2[1], offset); w3[1] = hc_bytealign_be (w1[3], w2[0], offset); w3[0] = hc_bytealign_be (w1[2], w1[3], offset); w2[3] = hc_bytealign_be (w1[1], w1[2], offset); w2[2] = hc_bytealign_be (w1[0], w1[1], offset); w2[1] = hc_bytealign_be (w0[3], w1[0], offset); w2[0] = hc_bytealign_be (w0[2], w0[3], offset); w1[3] = hc_bytealign_be (w0[1], w0[2], offset); w1[2] = hc_bytealign_be (w0[0], w0[1], offset); w1[1] = hc_bytealign_be ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_bytealign_be (w2[0], w2[1], offset); w3[2] = hc_bytealign_be (w1[3], w2[0], offset); w3[1] = hc_bytealign_be (w1[2], w1[3], offset); w3[0] = hc_bytealign_be (w1[1], w1[2], offset); w2[3] = hc_bytealign_be (w1[0], w1[1], offset); w2[2] = hc_bytealign_be (w0[3], w1[0], offset); w2[1] = hc_bytealign_be (w0[2], w0[3], offset); w2[0] = hc_bytealign_be (w0[1], w0[2], offset); w1[3] = hc_bytealign_be (w0[0], w0[1], offset); w1[2] = hc_bytealign_be ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_bytealign_be (w1[3], w2[0], offset); w3[2] = hc_bytealign_be (w1[2], w1[3], offset); w3[1] = hc_bytealign_be (w1[1], w1[2], offset); w3[0] = hc_bytealign_be (w1[0], w1[1], offset); w2[3] = hc_bytealign_be (w0[3], w1[0], offset); w2[2] = hc_bytealign_be (w0[2], w0[3], offset); w2[1] = hc_bytealign_be (w0[1], w0[2], offset); w2[0] = hc_bytealign_be (w0[0], w0[1], offset); w1[3] = hc_bytealign_be ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_bytealign_be (w1[2], w1[3], offset); w3[2] = hc_bytealign_be (w1[1], w1[2], offset); w3[1] = hc_bytealign_be (w1[0], w1[1], offset); w3[0] = hc_bytealign_be (w0[3], w1[0], offset); w2[3] = hc_bytealign_be (w0[2], w0[3], offset); w2[2] = hc_bytealign_be (w0[1], w0[2], offset); w2[1] = hc_bytealign_be (w0[0], w0[1], offset); w2[0] = hc_bytealign_be ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_bytealign_be (w1[1], w1[2], offset); w3[2] = hc_bytealign_be (w1[0], w1[1], offset); w3[1] = hc_bytealign_be (w0[3], w1[0], offset); w3[0] = hc_bytealign_be (w0[2], w0[3], offset); w2[3] = hc_bytealign_be (w0[1], w0[2], offset); w2[2] = hc_bytealign_be (w0[0], w0[1], offset); w2[1] = hc_bytealign_be ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_bytealign_be (w1[0], w1[1], offset); w3[2] = hc_bytealign_be (w0[3], w1[0], offset); w3[1] = hc_bytealign_be (w0[2], w0[3], offset); w3[0] = hc_bytealign_be (w0[1], w0[2], offset); w2[3] = hc_bytealign_be (w0[0], w0[1], offset); w2[2] = hc_bytealign_be ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_bytealign_be (w0[3], w1[0], offset); w3[2] = hc_bytealign_be (w0[2], w0[3], offset); w3[1] = hc_bytealign_be (w0[1], w0[2], offset); w3[0] = hc_bytealign_be (w0[0], w0[1], offset); w2[3] = hc_bytealign_be ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_bytealign_be (w0[2], w0[3], offset); w3[2] = hc_bytealign_be (w0[1], w0[2], offset); w3[1] = hc_bytealign_be (w0[0], w0[1], offset); w3[0] = hc_bytealign_be ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_bytealign_be (w0[1], w0[2], offset); w3[2] = hc_bytealign_be (w0[0], w0[1], offset); w3[1] = hc_bytealign_be ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_bytealign_be (w0[0], w0[1], offset); w3[2] = hc_bytealign_be ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_bytealign_be ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w3[3] = hc_byte_perm (w3[3], w3[2], selector); w3[2] = hc_byte_perm (w3[2], w3[1], selector); w3[1] = hc_byte_perm (w3[1], w3[0], selector); w3[0] = hc_byte_perm (w3[0], w2[3], selector); w2[3] = hc_byte_perm (w2[3], w2[2], selector); w2[2] = hc_byte_perm (w2[2], w2[1], selector); w2[1] = hc_byte_perm (w2[1], w2[0], selector); w2[0] = hc_byte_perm (w2[0], w1[3], selector); w1[3] = hc_byte_perm (w1[3], w1[2], selector); w1[2] = hc_byte_perm (w1[2], w1[1], selector); w1[1] = hc_byte_perm (w1[1], w1[0], selector); w1[0] = hc_byte_perm (w1[0], w0[3], selector); w0[3] = hc_byte_perm (w0[3], w0[2], selector); w0[2] = hc_byte_perm (w0[2], w0[1], selector); w0[1] = hc_byte_perm (w0[1], w0[0], selector); w0[0] = hc_byte_perm (w0[0], 0, selector); break; case 1: w3[3] = hc_byte_perm (w3[2], w3[1], selector); w3[2] = hc_byte_perm (w3[1], w3[0], selector); w3[1] = hc_byte_perm (w3[0], w2[3], selector); w3[0] = hc_byte_perm (w2[3], w2[2], selector); w2[3] = hc_byte_perm (w2[2], w2[1], selector); w2[2] = hc_byte_perm (w2[1], w2[0], selector); w2[1] = hc_byte_perm (w2[0], w1[3], selector); w2[0] = hc_byte_perm (w1[3], w1[2], selector); w1[3] = hc_byte_perm (w1[2], w1[1], selector); w1[2] = hc_byte_perm (w1[1], w1[0], selector); w1[1] = hc_byte_perm (w1[0], w0[3], selector); w1[0] = hc_byte_perm (w0[3], w0[2], selector); w0[3] = hc_byte_perm (w0[2], w0[1], selector); w0[2] = hc_byte_perm (w0[1], w0[0], selector); w0[1] = hc_byte_perm (w0[0], 0, selector); w0[0] = 0; break; case 2: w3[3] = hc_byte_perm (w3[1], w3[0], selector); w3[2] = hc_byte_perm (w3[0], w2[3], selector); w3[1] = hc_byte_perm (w2[3], w2[2], selector); w3[0] = hc_byte_perm (w2[2], w2[1], selector); w2[3] = hc_byte_perm (w2[1], w2[0], selector); w2[2] = hc_byte_perm (w2[0], w1[3], selector); w2[1] = hc_byte_perm (w1[3], w1[2], selector); w2[0] = hc_byte_perm (w1[2], w1[1], selector); w1[3] = hc_byte_perm (w1[1], w1[0], selector); w1[2] = hc_byte_perm (w1[0], w0[3], selector); w1[1] = hc_byte_perm (w0[3], w0[2], selector); w1[0] = hc_byte_perm (w0[2], w0[1], selector); w0[3] = hc_byte_perm (w0[1], w0[0], selector); w0[2] = hc_byte_perm (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_byte_perm (w3[0], w2[3], selector); w3[2] = hc_byte_perm (w2[3], w2[2], selector); w3[1] = hc_byte_perm (w2[2], w2[1], selector); w3[0] = hc_byte_perm (w2[1], w2[0], selector); w2[3] = hc_byte_perm (w2[0], w1[3], selector); w2[2] = hc_byte_perm (w1[3], w1[2], selector); w2[1] = hc_byte_perm (w1[2], w1[1], selector); w2[0] = hc_byte_perm (w1[1], w1[0], selector); w1[3] = hc_byte_perm (w1[0], w0[3], selector); w1[2] = hc_byte_perm (w0[3], w0[2], selector); w1[1] = hc_byte_perm (w0[2], w0[1], selector); w1[0] = hc_byte_perm (w0[1], w0[0], selector); w0[3] = hc_byte_perm (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_byte_perm (w2[3], w2[2], selector); w3[2] = hc_byte_perm (w2[2], w2[1], selector); w3[1] = hc_byte_perm (w2[1], w2[0], selector); w3[0] = hc_byte_perm (w2[0], w1[3], selector); w2[3] = hc_byte_perm (w1[3], w1[2], selector); w2[2] = hc_byte_perm (w1[2], w1[1], selector); w2[1] = hc_byte_perm (w1[1], w1[0], selector); w2[0] = hc_byte_perm (w1[0], w0[3], selector); w1[3] = hc_byte_perm (w0[3], w0[2], selector); w1[2] = hc_byte_perm (w0[2], w0[1], selector); w1[1] = hc_byte_perm (w0[1], w0[0], selector); w1[0] = hc_byte_perm (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_byte_perm (w2[2], w2[1], selector); w3[2] = hc_byte_perm (w2[1], w2[0], selector); w3[1] = hc_byte_perm (w2[0], w1[3], selector); w3[0] = hc_byte_perm (w1[3], w1[2], selector); w2[3] = hc_byte_perm (w1[2], w1[1], selector); w2[2] = hc_byte_perm (w1[1], w1[0], selector); w2[1] = hc_byte_perm (w1[0], w0[3], selector); w2[0] = hc_byte_perm (w0[3], w0[2], selector); w1[3] = hc_byte_perm (w0[2], w0[1], selector); w1[2] = hc_byte_perm (w0[1], w0[0], selector); w1[1] = hc_byte_perm (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_byte_perm (w2[1], w2[0], selector); w3[2] = hc_byte_perm (w2[0], w1[3], selector); w3[1] = hc_byte_perm (w1[3], w1[2], selector); w3[0] = hc_byte_perm (w1[2], w1[1], selector); w2[3] = hc_byte_perm (w1[1], w1[0], selector); w2[2] = hc_byte_perm (w1[0], w0[3], selector); w2[1] = hc_byte_perm (w0[3], w0[2], selector); w2[0] = hc_byte_perm (w0[2], w0[1], selector); w1[3] = hc_byte_perm (w0[1], w0[0], selector); w1[2] = hc_byte_perm (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_byte_perm (w2[0], w1[3], selector); w3[2] = hc_byte_perm (w1[3], w1[2], selector); w3[1] = hc_byte_perm (w1[2], w1[1], selector); w3[0] = hc_byte_perm (w1[1], w1[0], selector); w2[3] = hc_byte_perm (w1[0], w0[3], selector); w2[2] = hc_byte_perm (w0[3], w0[2], selector); w2[1] = hc_byte_perm (w0[2], w0[1], selector); w2[0] = hc_byte_perm (w0[1], w0[0], selector); w1[3] = hc_byte_perm (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_byte_perm (w1[3], w1[2], selector); w3[2] = hc_byte_perm (w1[2], w1[1], selector); w3[1] = hc_byte_perm (w1[1], w1[0], selector); w3[0] = hc_byte_perm (w1[0], w0[3], selector); w2[3] = hc_byte_perm (w0[3], w0[2], selector); w2[2] = hc_byte_perm (w0[2], w0[1], selector); w2[1] = hc_byte_perm (w0[1], w0[0], selector); w2[0] = hc_byte_perm (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_byte_perm (w1[2], w1[1], selector); w3[2] = hc_byte_perm (w1[1], w1[0], selector); w3[1] = hc_byte_perm (w1[0], w0[3], selector); w3[0] = hc_byte_perm (w0[3], w0[2], selector); w2[3] = hc_byte_perm (w0[2], w0[1], selector); w2[2] = hc_byte_perm (w0[1], w0[0], selector); w2[1] = hc_byte_perm (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_byte_perm (w1[1], w1[0], selector); w3[2] = hc_byte_perm (w1[0], w0[3], selector); w3[1] = hc_byte_perm (w0[3], w0[2], selector); w3[0] = hc_byte_perm (w0[2], w0[1], selector); w2[3] = hc_byte_perm (w0[1], w0[0], selector); w2[2] = hc_byte_perm (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_byte_perm (w1[0], w0[3], selector); w3[2] = hc_byte_perm (w0[3], w0[2], selector); w3[1] = hc_byte_perm (w0[2], w0[1], selector); w3[0] = hc_byte_perm (w0[1], w0[0], selector); w2[3] = hc_byte_perm (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_byte_perm (w0[3], w0[2], selector); w3[2] = hc_byte_perm (w0[2], w0[1], selector); w3[1] = hc_byte_perm (w0[1], w0[0], selector); w3[0] = hc_byte_perm (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_byte_perm (w0[2], w0[1], selector); w3[2] = hc_byte_perm (w0[1], w0[0], selector); w3[1] = hc_byte_perm (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_byte_perm (w0[1], w0[0], selector); w3[2] = hc_byte_perm (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_byte_perm (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_be (w3[3], 0, offset); w3[3] = hc_bytealign_be (w3[2], w3[3], offset); w3[2] = hc_bytealign_be (w3[1], w3[2], offset); w3[1] = hc_bytealign_be (w3[0], w3[1], offset); w3[0] = hc_bytealign_be (w2[3], w3[0], offset); w2[3] = hc_bytealign_be (w2[2], w2[3], offset); w2[2] = hc_bytealign_be (w2[1], w2[2], offset); w2[1] = hc_bytealign_be (w2[0], w2[1], offset); w2[0] = hc_bytealign_be (w1[3], w2[0], offset); w1[3] = hc_bytealign_be (w1[2], w1[3], offset); w1[2] = hc_bytealign_be (w1[1], w1[2], offset); w1[1] = hc_bytealign_be (w1[0], w1[1], offset); w1[0] = hc_bytealign_be (w0[3], w1[0], offset); w0[3] = hc_bytealign_be (w0[2], w0[3], offset); w0[2] = hc_bytealign_be (w0[1], w0[2], offset); w0[1] = hc_bytealign_be (w0[0], w0[1], offset); w0[0] = hc_bytealign_be ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_be (w3[3], 0, offset); c0[0] = hc_bytealign_be (w3[2], w3[3], offset); w3[3] = hc_bytealign_be (w3[1], w3[2], offset); w3[2] = hc_bytealign_be (w3[0], w3[1], offset); w3[1] = hc_bytealign_be (w2[3], w3[0], offset); w3[0] = hc_bytealign_be (w2[2], w2[3], offset); w2[3] = hc_bytealign_be (w2[1], w2[2], offset); w2[2] = hc_bytealign_be (w2[0], w2[1], offset); w2[1] = hc_bytealign_be (w1[3], w2[0], offset); w2[0] = hc_bytealign_be (w1[2], w1[3], offset); w1[3] = hc_bytealign_be (w1[1], w1[2], offset); w1[2] = hc_bytealign_be (w1[0], w1[1], offset); w1[1] = hc_bytealign_be (w0[3], w1[0], offset); w1[0] = hc_bytealign_be (w0[2], w0[3], offset); w0[3] = hc_bytealign_be (w0[1], w0[2], offset); w0[2] = hc_bytealign_be (w0[0], w0[1], offset); w0[1] = hc_bytealign_be ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_be (w3[3], 0, offset); c0[1] = hc_bytealign_be (w3[2], w3[3], offset); c0[0] = hc_bytealign_be (w3[1], w3[2], offset); w3[3] = hc_bytealign_be (w3[0], w3[1], offset); w3[2] = hc_bytealign_be (w2[3], w3[0], offset); w3[1] = hc_bytealign_be (w2[2], w2[3], offset); w3[0] = hc_bytealign_be (w2[1], w2[2], offset); w2[3] = hc_bytealign_be (w2[0], w2[1], offset); w2[2] = hc_bytealign_be (w1[3], w2[0], offset); w2[1] = hc_bytealign_be (w1[2], w1[3], offset); w2[0] = hc_bytealign_be (w1[1], w1[2], offset); w1[3] = hc_bytealign_be (w1[0], w1[1], offset); w1[2] = hc_bytealign_be (w0[3], w1[0], offset); w1[1] = hc_bytealign_be (w0[2], w0[3], offset); w1[0] = hc_bytealign_be (w0[1], w0[2], offset); w0[3] = hc_bytealign_be (w0[0], w0[1], offset); w0[2] = hc_bytealign_be ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_be (w3[3], 0, offset); c0[2] = hc_bytealign_be (w3[2], w3[3], offset); c0[1] = hc_bytealign_be (w3[1], w3[2], offset); c0[0] = hc_bytealign_be (w3[0], w3[1], offset); w3[3] = hc_bytealign_be (w2[3], w3[0], offset); w3[2] = hc_bytealign_be (w2[2], w2[3], offset); w3[1] = hc_bytealign_be (w2[1], w2[2], offset); w3[0] = hc_bytealign_be (w2[0], w2[1], offset); w2[3] = hc_bytealign_be (w1[3], w2[0], offset); w2[2] = hc_bytealign_be (w1[2], w1[3], offset); w2[1] = hc_bytealign_be (w1[1], w1[2], offset); w2[0] = hc_bytealign_be (w1[0], w1[1], offset); w1[3] = hc_bytealign_be (w0[3], w1[0], offset); w1[2] = hc_bytealign_be (w0[2], w0[3], offset); w1[1] = hc_bytealign_be (w0[1], w0[2], offset); w1[0] = hc_bytealign_be (w0[0], w0[1], offset); w0[3] = hc_bytealign_be ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_be (w3[3], 0, offset); c0[3] = hc_bytealign_be (w3[2], w3[3], offset); c0[2] = hc_bytealign_be (w3[1], w3[2], offset); c0[1] = hc_bytealign_be (w3[0], w3[1], offset); c0[0] = hc_bytealign_be (w2[3], w3[0], offset); w3[3] = hc_bytealign_be (w2[2], w2[3], offset); w3[2] = hc_bytealign_be (w2[1], w2[2], offset); w3[1] = hc_bytealign_be (w2[0], w2[1], offset); w3[0] = hc_bytealign_be (w1[3], w2[0], offset); w2[3] = hc_bytealign_be (w1[2], w1[3], offset); w2[2] = hc_bytealign_be (w1[1], w1[2], offset); w2[1] = hc_bytealign_be (w1[0], w1[1], offset); w2[0] = hc_bytealign_be (w0[3], w1[0], offset); w1[3] = hc_bytealign_be (w0[2], w0[3], offset); w1[2] = hc_bytealign_be (w0[1], w0[2], offset); w1[1] = hc_bytealign_be (w0[0], w0[1], offset); w1[0] = hc_bytealign_be ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_be (w3[3], 0, offset); c1[0] = hc_bytealign_be (w3[2], w3[3], offset); c0[3] = hc_bytealign_be (w3[1], w3[2], offset); c0[2] = hc_bytealign_be (w3[0], w3[1], offset); c0[1] = hc_bytealign_be (w2[3], w3[0], offset); c0[0] = hc_bytealign_be (w2[2], w2[3], offset); w3[3] = hc_bytealign_be (w2[1], w2[2], offset); w3[2] = hc_bytealign_be (w2[0], w2[1], offset); w3[1] = hc_bytealign_be (w1[3], w2[0], offset); w3[0] = hc_bytealign_be (w1[2], w1[3], offset); w2[3] = hc_bytealign_be (w1[1], w1[2], offset); w2[2] = hc_bytealign_be (w1[0], w1[1], offset); w2[1] = hc_bytealign_be (w0[3], w1[0], offset); w2[0] = hc_bytealign_be (w0[2], w0[3], offset); w1[3] = hc_bytealign_be (w0[1], w0[2], offset); w1[2] = hc_bytealign_be (w0[0], w0[1], offset); w1[1] = hc_bytealign_be ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_be (w3[3], 0, offset); c1[1] = hc_bytealign_be (w3[2], w3[3], offset); c1[0] = hc_bytealign_be (w3[1], w3[2], offset); c0[3] = hc_bytealign_be (w3[0], w3[1], offset); c0[2] = hc_bytealign_be (w2[3], w3[0], offset); c0[1] = hc_bytealign_be (w2[2], w2[3], offset); c0[0] = hc_bytealign_be (w2[1], w2[2], offset); w3[3] = hc_bytealign_be (w2[0], w2[1], offset); w3[2] = hc_bytealign_be (w1[3], w2[0], offset); w3[1] = hc_bytealign_be (w1[2], w1[3], offset); w3[0] = hc_bytealign_be (w1[1], w1[2], offset); w2[3] = hc_bytealign_be (w1[0], w1[1], offset); w2[2] = hc_bytealign_be (w0[3], w1[0], offset); w2[1] = hc_bytealign_be (w0[2], w0[3], offset); w2[0] = hc_bytealign_be (w0[1], w0[2], offset); w1[3] = hc_bytealign_be (w0[0], w0[1], offset); w1[2] = hc_bytealign_be ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_be (w3[3], 0, offset); c1[2] = hc_bytealign_be (w3[2], w3[3], offset); c1[1] = hc_bytealign_be (w3[1], w3[2], offset); c1[0] = hc_bytealign_be (w3[0], w3[1], offset); c0[3] = hc_bytealign_be (w2[3], w3[0], offset); c0[2] = hc_bytealign_be (w2[2], w2[3], offset); c0[1] = hc_bytealign_be (w2[1], w2[2], offset); c0[0] = hc_bytealign_be (w2[0], w2[1], offset); w3[3] = hc_bytealign_be (w1[3], w2[0], offset); w3[2] = hc_bytealign_be (w1[2], w1[3], offset); w3[1] = hc_bytealign_be (w1[1], w1[2], offset); w3[0] = hc_bytealign_be (w1[0], w1[1], offset); w2[3] = hc_bytealign_be (w0[3], w1[0], offset); w2[2] = hc_bytealign_be (w0[2], w0[3], offset); w2[1] = hc_bytealign_be (w0[1], w0[2], offset); w2[0] = hc_bytealign_be (w0[0], w0[1], offset); w1[3] = hc_bytealign_be ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_be (w3[3], 0, offset); c1[3] = hc_bytealign_be (w3[2], w3[3], offset); c1[2] = hc_bytealign_be (w3[1], w3[2], offset); c1[1] = hc_bytealign_be (w3[0], w3[1], offset); c1[0] = hc_bytealign_be (w2[3], w3[0], offset); c0[3] = hc_bytealign_be (w2[2], w2[3], offset); c0[2] = hc_bytealign_be (w2[1], w2[2], offset); c0[1] = hc_bytealign_be (w2[0], w2[1], offset); c0[0] = hc_bytealign_be (w1[3], w2[0], offset); w3[3] = hc_bytealign_be (w1[2], w1[3], offset); w3[2] = hc_bytealign_be (w1[1], w1[2], offset); w3[1] = hc_bytealign_be (w1[0], w1[1], offset); w3[0] = hc_bytealign_be (w0[3], w1[0], offset); w2[3] = hc_bytealign_be (w0[2], w0[3], offset); w2[2] = hc_bytealign_be (w0[1], w0[2], offset); w2[1] = hc_bytealign_be (w0[0], w0[1], offset); w2[0] = hc_bytealign_be ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_be (w3[3], 0, offset); c2[0] = hc_bytealign_be (w3[2], w3[3], offset); c1[3] = hc_bytealign_be (w3[1], w3[2], offset); c1[2] = hc_bytealign_be (w3[0], w3[1], offset); c1[1] = hc_bytealign_be (w2[3], w3[0], offset); c1[0] = hc_bytealign_be (w2[2], w2[3], offset); c0[3] = hc_bytealign_be (w2[1], w2[2], offset); c0[2] = hc_bytealign_be (w2[0], w2[1], offset); c0[1] = hc_bytealign_be (w1[3], w2[0], offset); c0[0] = hc_bytealign_be (w1[2], w1[3], offset); w3[3] = hc_bytealign_be (w1[1], w1[2], offset); w3[2] = hc_bytealign_be (w1[0], w1[1], offset); w3[1] = hc_bytealign_be (w0[3], w1[0], offset); w3[0] = hc_bytealign_be (w0[2], w0[3], offset); w2[3] = hc_bytealign_be (w0[1], w0[2], offset); w2[2] = hc_bytealign_be (w0[0], w0[1], offset); w2[1] = hc_bytealign_be ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_be (w3[3], 0, offset); c2[1] = hc_bytealign_be (w3[2], w3[3], offset); c2[0] = hc_bytealign_be (w3[1], w3[2], offset); c1[3] = hc_bytealign_be (w3[0], w3[1], offset); c1[2] = hc_bytealign_be (w2[3], w3[0], offset); c1[1] = hc_bytealign_be (w2[2], w2[3], offset); c1[0] = hc_bytealign_be (w2[1], w2[2], offset); c0[3] = hc_bytealign_be (w2[0], w2[1], offset); c0[2] = hc_bytealign_be (w1[3], w2[0], offset); c0[1] = hc_bytealign_be (w1[2], w1[3], offset); c0[0] = hc_bytealign_be (w1[1], w1[2], offset); w3[3] = hc_bytealign_be (w1[0], w1[1], offset); w3[2] = hc_bytealign_be (w0[3], w1[0], offset); w3[1] = hc_bytealign_be (w0[2], w0[3], offset); w3[0] = hc_bytealign_be (w0[1], w0[2], offset); w2[3] = hc_bytealign_be (w0[0], w0[1], offset); w2[2] = hc_bytealign_be ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_be (w3[3], 0, offset); c2[2] = hc_bytealign_be (w3[2], w3[3], offset); c2[1] = hc_bytealign_be (w3[1], w3[2], offset); c2[0] = hc_bytealign_be (w3[0], w3[1], offset); c1[3] = hc_bytealign_be (w2[3], w3[0], offset); c1[2] = hc_bytealign_be (w2[2], w2[3], offset); c1[1] = hc_bytealign_be (w2[1], w2[2], offset); c1[0] = hc_bytealign_be (w2[0], w2[1], offset); c0[3] = hc_bytealign_be (w1[3], w2[0], offset); c0[2] = hc_bytealign_be (w1[2], w1[3], offset); c0[1] = hc_bytealign_be (w1[1], w1[2], offset); c0[0] = hc_bytealign_be (w1[0], w1[1], offset); w3[3] = hc_bytealign_be (w0[3], w1[0], offset); w3[2] = hc_bytealign_be (w0[2], w0[3], offset); w3[1] = hc_bytealign_be (w0[1], w0[2], offset); w3[0] = hc_bytealign_be (w0[0], w0[1], offset); w2[3] = hc_bytealign_be ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_be (w3[3], 0, offset); c2[3] = hc_bytealign_be (w3[2], w3[3], offset); c2[2] = hc_bytealign_be (w3[1], w3[2], offset); c2[1] = hc_bytealign_be (w3[0], w3[1], offset); c2[0] = hc_bytealign_be (w2[3], w3[0], offset); c1[3] = hc_bytealign_be (w2[2], w2[3], offset); c1[2] = hc_bytealign_be (w2[1], w2[2], offset); c1[1] = hc_bytealign_be (w2[0], w2[1], offset); c1[0] = hc_bytealign_be (w1[3], w2[0], offset); c0[3] = hc_bytealign_be (w1[2], w1[3], offset); c0[2] = hc_bytealign_be (w1[1], w1[2], offset); c0[1] = hc_bytealign_be (w1[0], w1[1], offset); c0[0] = hc_bytealign_be (w0[3], w1[0], offset); w3[3] = hc_bytealign_be (w0[2], w0[3], offset); w3[2] = hc_bytealign_be (w0[1], w0[2], offset); w3[1] = hc_bytealign_be (w0[0], w0[1], offset); w3[0] = hc_bytealign_be ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_be (w3[3], 0, offset); c3[0] = hc_bytealign_be (w3[2], w3[3], offset); c2[3] = hc_bytealign_be (w3[1], w3[2], offset); c2[2] = hc_bytealign_be (w3[0], w3[1], offset); c2[1] = hc_bytealign_be (w2[3], w3[0], offset); c2[0] = hc_bytealign_be (w2[2], w2[3], offset); c1[3] = hc_bytealign_be (w2[1], w2[2], offset); c1[2] = hc_bytealign_be (w2[0], w2[1], offset); c1[1] = hc_bytealign_be (w1[3], w2[0], offset); c1[0] = hc_bytealign_be (w1[2], w1[3], offset); c0[3] = hc_bytealign_be (w1[1], w1[2], offset); c0[2] = hc_bytealign_be (w1[0], w1[1], offset); c0[1] = hc_bytealign_be (w0[3], w1[0], offset); c0[0] = hc_bytealign_be (w0[2], w0[3], offset); w3[3] = hc_bytealign_be (w0[1], w0[2], offset); w3[2] = hc_bytealign_be (w0[0], w0[1], offset); w3[1] = hc_bytealign_be ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_be (w3[3], 0, offset); c3[1] = hc_bytealign_be (w3[2], w3[3], offset); c3[0] = hc_bytealign_be (w3[1], w3[2], offset); c2[3] = hc_bytealign_be (w3[0], w3[1], offset); c2[2] = hc_bytealign_be (w2[3], w3[0], offset); c2[1] = hc_bytealign_be (w2[2], w2[3], offset); c2[0] = hc_bytealign_be (w2[1], w2[2], offset); c1[3] = hc_bytealign_be (w2[0], w2[1], offset); c1[2] = hc_bytealign_be (w1[3], w2[0], offset); c1[1] = hc_bytealign_be (w1[2], w1[3], offset); c1[0] = hc_bytealign_be (w1[1], w1[2], offset); c0[3] = hc_bytealign_be (w1[0], w1[1], offset); c0[2] = hc_bytealign_be (w0[3], w1[0], offset); c0[1] = hc_bytealign_be (w0[2], w0[3], offset); c0[0] = hc_bytealign_be (w0[1], w0[2], offset); w3[3] = hc_bytealign_be (w0[0], w0[1], offset); w3[2] = hc_bytealign_be ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_be (w3[3], 0, offset); c3[2] = hc_bytealign_be (w3[2], w3[3], offset); c3[1] = hc_bytealign_be (w3[1], w3[2], offset); c3[0] = hc_bytealign_be (w3[0], w3[1], offset); c2[3] = hc_bytealign_be (w2[3], w3[0], offset); c2[2] = hc_bytealign_be (w2[2], w2[3], offset); c2[1] = hc_bytealign_be (w2[1], w2[2], offset); c2[0] = hc_bytealign_be (w2[0], w2[1], offset); c1[3] = hc_bytealign_be (w1[3], w2[0], offset); c1[2] = hc_bytealign_be (w1[2], w1[3], offset); c1[1] = hc_bytealign_be (w1[1], w1[2], offset); c1[0] = hc_bytealign_be (w1[0], w1[1], offset); c0[3] = hc_bytealign_be (w0[3], w1[0], offset); c0[2] = hc_bytealign_be (w0[2], w0[3], offset); c0[1] = hc_bytealign_be (w0[1], w0[2], offset); c0[0] = hc_bytealign_be (w0[0], w0[1], offset); w3[3] = hc_bytealign_be ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm ( 0, w3[3], selector); w3[3] = hc_byte_perm (w3[3], w3[2], selector); w3[2] = hc_byte_perm (w3[2], w3[1], selector); w3[1] = hc_byte_perm (w3[1], w3[0], selector); w3[0] = hc_byte_perm (w3[0], w2[3], selector); w2[3] = hc_byte_perm (w2[3], w2[2], selector); w2[2] = hc_byte_perm (w2[2], w2[1], selector); w2[1] = hc_byte_perm (w2[1], w2[0], selector); w2[0] = hc_byte_perm (w2[0], w1[3], selector); w1[3] = hc_byte_perm (w1[3], w1[2], selector); w1[2] = hc_byte_perm (w1[2], w1[1], selector); w1[1] = hc_byte_perm (w1[1], w1[0], selector); w1[0] = hc_byte_perm (w1[0], w0[3], selector); w0[3] = hc_byte_perm (w0[3], w0[2], selector); w0[2] = hc_byte_perm (w0[2], w0[1], selector); w0[1] = hc_byte_perm (w0[1], w0[0], selector); w0[0] = hc_byte_perm (w0[0], 0, selector); break; case 1: c0[1] = hc_byte_perm ( 0, w3[3], selector); c0[0] = hc_byte_perm (w3[3], w3[2], selector); w3[3] = hc_byte_perm (w3[2], w3[1], selector); w3[2] = hc_byte_perm (w3[1], w3[0], selector); w3[1] = hc_byte_perm (w3[0], w2[3], selector); w3[0] = hc_byte_perm (w2[3], w2[2], selector); w2[3] = hc_byte_perm (w2[2], w2[1], selector); w2[2] = hc_byte_perm (w2[1], w2[0], selector); w2[1] = hc_byte_perm (w2[0], w1[3], selector); w2[0] = hc_byte_perm (w1[3], w1[2], selector); w1[3] = hc_byte_perm (w1[2], w1[1], selector); w1[2] = hc_byte_perm (w1[1], w1[0], selector); w1[1] = hc_byte_perm (w1[0], w0[3], selector); w1[0] = hc_byte_perm (w0[3], w0[2], selector); w0[3] = hc_byte_perm (w0[2], w0[1], selector); w0[2] = hc_byte_perm (w0[1], w0[0], selector); w0[1] = hc_byte_perm (w0[0], 0, selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm ( 0, w3[3], selector); c0[1] = hc_byte_perm (w3[3], w3[2], selector); c0[0] = hc_byte_perm (w3[2], w3[1], selector); w3[3] = hc_byte_perm (w3[1], w3[0], selector); w3[2] = hc_byte_perm (w3[0], w2[3], selector); w3[1] = hc_byte_perm (w2[3], w2[2], selector); w3[0] = hc_byte_perm (w2[2], w2[1], selector); w2[3] = hc_byte_perm (w2[1], w2[0], selector); w2[2] = hc_byte_perm (w2[0], w1[3], selector); w2[1] = hc_byte_perm (w1[3], w1[2], selector); w2[0] = hc_byte_perm (w1[2], w1[1], selector); w1[3] = hc_byte_perm (w1[1], w1[0], selector); w1[2] = hc_byte_perm (w1[0], w0[3], selector); w1[1] = hc_byte_perm (w0[3], w0[2], selector); w1[0] = hc_byte_perm (w0[2], w0[1], selector); w0[3] = hc_byte_perm (w0[1], w0[0], selector); w0[2] = hc_byte_perm (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm ( 0, w3[3], selector); c0[2] = hc_byte_perm (w3[3], w3[2], selector); c0[1] = hc_byte_perm (w3[2], w3[1], selector); c0[0] = hc_byte_perm (w3[1], w3[0], selector); w3[3] = hc_byte_perm (w3[0], w2[3], selector); w3[2] = hc_byte_perm (w2[3], w2[2], selector); w3[1] = hc_byte_perm (w2[2], w2[1], selector); w3[0] = hc_byte_perm (w2[1], w2[0], selector); w2[3] = hc_byte_perm (w2[0], w1[3], selector); w2[2] = hc_byte_perm (w1[3], w1[2], selector); w2[1] = hc_byte_perm (w1[2], w1[1], selector); w2[0] = hc_byte_perm (w1[1], w1[0], selector); w1[3] = hc_byte_perm (w1[0], w0[3], selector); w1[2] = hc_byte_perm (w0[3], w0[2], selector); w1[1] = hc_byte_perm (w0[2], w0[1], selector); w1[0] = hc_byte_perm (w0[1], w0[0], selector); w0[3] = hc_byte_perm (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm ( 0, w3[3], selector); c0[3] = hc_byte_perm (w3[3], w3[2], selector); c0[2] = hc_byte_perm (w3[2], w3[1], selector); c0[1] = hc_byte_perm (w3[1], w3[0], selector); c0[0] = hc_byte_perm (w3[0], w2[3], selector); w3[3] = hc_byte_perm (w2[3], w2[2], selector); w3[2] = hc_byte_perm (w2[2], w2[1], selector); w3[1] = hc_byte_perm (w2[1], w2[0], selector); w3[0] = hc_byte_perm (w2[0], w1[3], selector); w2[3] = hc_byte_perm (w1[3], w1[2], selector); w2[2] = hc_byte_perm (w1[2], w1[1], selector); w2[1] = hc_byte_perm (w1[1], w1[0], selector); w2[0] = hc_byte_perm (w1[0], w0[3], selector); w1[3] = hc_byte_perm (w0[3], w0[2], selector); w1[2] = hc_byte_perm (w0[2], w0[1], selector); w1[1] = hc_byte_perm (w0[1], w0[0], selector); w1[0] = hc_byte_perm (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm ( 0, w3[3], selector); c1[0] = hc_byte_perm (w3[3], w3[2], selector); c0[3] = hc_byte_perm (w3[2], w3[1], selector); c0[2] = hc_byte_perm (w3[1], w3[0], selector); c0[1] = hc_byte_perm (w3[0], w2[3], selector); c0[0] = hc_byte_perm (w2[3], w2[2], selector); w3[3] = hc_byte_perm (w2[2], w2[1], selector); w3[2] = hc_byte_perm (w2[1], w2[0], selector); w3[1] = hc_byte_perm (w2[0], w1[3], selector); w3[0] = hc_byte_perm (w1[3], w1[2], selector); w2[3] = hc_byte_perm (w1[2], w1[1], selector); w2[2] = hc_byte_perm (w1[1], w1[0], selector); w2[1] = hc_byte_perm (w1[0], w0[3], selector); w2[0] = hc_byte_perm (w0[3], w0[2], selector); w1[3] = hc_byte_perm (w0[2], w0[1], selector); w1[2] = hc_byte_perm (w0[1], w0[0], selector); w1[1] = hc_byte_perm (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm ( 0, w3[3], selector); c1[1] = hc_byte_perm (w3[3], w3[2], selector); c1[0] = hc_byte_perm (w3[2], w3[1], selector); c0[3] = hc_byte_perm (w3[1], w3[0], selector); c0[2] = hc_byte_perm (w3[0], w2[3], selector); c0[1] = hc_byte_perm (w2[3], w2[2], selector); c0[0] = hc_byte_perm (w2[2], w2[1], selector); w3[3] = hc_byte_perm (w2[1], w2[0], selector); w3[2] = hc_byte_perm (w2[0], w1[3], selector); w3[1] = hc_byte_perm (w1[3], w1[2], selector); w3[0] = hc_byte_perm (w1[2], w1[1], selector); w2[3] = hc_byte_perm (w1[1], w1[0], selector); w2[2] = hc_byte_perm (w1[0], w0[3], selector); w2[1] = hc_byte_perm (w0[3], w0[2], selector); w2[0] = hc_byte_perm (w0[2], w0[1], selector); w1[3] = hc_byte_perm (w0[1], w0[0], selector); w1[2] = hc_byte_perm (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm ( 0, w3[3], selector); c1[2] = hc_byte_perm (w3[3], w3[2], selector); c1[1] = hc_byte_perm (w3[2], w3[1], selector); c1[0] = hc_byte_perm (w3[1], w3[0], selector); c0[3] = hc_byte_perm (w3[0], w2[3], selector); c0[2] = hc_byte_perm (w2[3], w2[2], selector); c0[1] = hc_byte_perm (w2[2], w2[1], selector); c0[0] = hc_byte_perm (w2[1], w2[0], selector); w3[3] = hc_byte_perm (w2[0], w1[3], selector); w3[2] = hc_byte_perm (w1[3], w1[2], selector); w3[1] = hc_byte_perm (w1[2], w1[1], selector); w3[0] = hc_byte_perm (w1[1], w1[0], selector); w2[3] = hc_byte_perm (w1[0], w0[3], selector); w2[2] = hc_byte_perm (w0[3], w0[2], selector); w2[1] = hc_byte_perm (w0[2], w0[1], selector); w2[0] = hc_byte_perm (w0[1], w0[0], selector); w1[3] = hc_byte_perm (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm ( 0, w3[3], selector); c1[3] = hc_byte_perm (w3[3], w3[2], selector); c1[2] = hc_byte_perm (w3[2], w3[1], selector); c1[1] = hc_byte_perm (w3[1], w3[0], selector); c1[0] = hc_byte_perm (w3[0], w2[3], selector); c0[3] = hc_byte_perm (w2[3], w2[2], selector); c0[2] = hc_byte_perm (w2[2], w2[1], selector); c0[1] = hc_byte_perm (w2[1], w2[0], selector); c0[0] = hc_byte_perm (w2[0], w1[3], selector); w3[3] = hc_byte_perm (w1[3], w1[2], selector); w3[2] = hc_byte_perm (w1[2], w1[1], selector); w3[1] = hc_byte_perm (w1[1], w1[0], selector); w3[0] = hc_byte_perm (w1[0], w0[3], selector); w2[3] = hc_byte_perm (w0[3], w0[2], selector); w2[2] = hc_byte_perm (w0[2], w0[1], selector); w2[1] = hc_byte_perm (w0[1], w0[0], selector); w2[0] = hc_byte_perm (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm ( 0, w3[3], selector); c2[0] = hc_byte_perm (w3[3], w3[2], selector); c1[3] = hc_byte_perm (w3[2], w3[1], selector); c1[2] = hc_byte_perm (w3[1], w3[0], selector); c1[1] = hc_byte_perm (w3[0], w2[3], selector); c1[0] = hc_byte_perm (w2[3], w2[2], selector); c0[3] = hc_byte_perm (w2[2], w2[1], selector); c0[2] = hc_byte_perm (w2[1], w2[0], selector); c0[1] = hc_byte_perm (w2[0], w1[3], selector); c0[0] = hc_byte_perm (w1[3], w1[2], selector); w3[3] = hc_byte_perm (w1[2], w1[1], selector); w3[2] = hc_byte_perm (w1[1], w1[0], selector); w3[1] = hc_byte_perm (w1[0], w0[3], selector); w3[0] = hc_byte_perm (w0[3], w0[2], selector); w2[3] = hc_byte_perm (w0[2], w0[1], selector); w2[2] = hc_byte_perm (w0[1], w0[0], selector); w2[1] = hc_byte_perm (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm ( 0, w3[3], selector); c2[1] = hc_byte_perm (w3[3], w3[2], selector); c2[0] = hc_byte_perm (w3[2], w3[1], selector); c1[3] = hc_byte_perm (w3[1], w3[0], selector); c1[2] = hc_byte_perm (w3[0], w2[3], selector); c1[1] = hc_byte_perm (w2[3], w2[2], selector); c1[0] = hc_byte_perm (w2[2], w2[1], selector); c0[3] = hc_byte_perm (w2[1], w2[0], selector); c0[2] = hc_byte_perm (w2[0], w1[3], selector); c0[1] = hc_byte_perm (w1[3], w1[2], selector); c0[0] = hc_byte_perm (w1[2], w1[1], selector); w3[3] = hc_byte_perm (w1[1], w1[0], selector); w3[2] = hc_byte_perm (w1[0], w0[3], selector); w3[1] = hc_byte_perm (w0[3], w0[2], selector); w3[0] = hc_byte_perm (w0[2], w0[1], selector); w2[3] = hc_byte_perm (w0[1], w0[0], selector); w2[2] = hc_byte_perm (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm ( 0, w3[3], selector); c2[2] = hc_byte_perm (w3[3], w3[2], selector); c2[1] = hc_byte_perm (w3[2], w3[1], selector); c2[0] = hc_byte_perm (w3[1], w3[0], selector); c1[3] = hc_byte_perm (w3[0], w2[3], selector); c1[2] = hc_byte_perm (w2[3], w2[2], selector); c1[1] = hc_byte_perm (w2[2], w2[1], selector); c1[0] = hc_byte_perm (w2[1], w2[0], selector); c0[3] = hc_byte_perm (w2[0], w1[3], selector); c0[2] = hc_byte_perm (w1[3], w1[2], selector); c0[1] = hc_byte_perm (w1[2], w1[1], selector); c0[0] = hc_byte_perm (w1[1], w1[0], selector); w3[3] = hc_byte_perm (w1[0], w0[3], selector); w3[2] = hc_byte_perm (w0[3], w0[2], selector); w3[1] = hc_byte_perm (w0[2], w0[1], selector); w3[0] = hc_byte_perm (w0[1], w0[0], selector); w2[3] = hc_byte_perm (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm ( 0, w3[3], selector); c2[3] = hc_byte_perm (w3[3], w3[2], selector); c2[2] = hc_byte_perm (w3[2], w3[1], selector); c2[1] = hc_byte_perm (w3[1], w3[0], selector); c2[0] = hc_byte_perm (w3[0], w2[3], selector); c1[3] = hc_byte_perm (w2[3], w2[2], selector); c1[2] = hc_byte_perm (w2[2], w2[1], selector); c1[1] = hc_byte_perm (w2[1], w2[0], selector); c1[0] = hc_byte_perm (w2[0], w1[3], selector); c0[3] = hc_byte_perm (w1[3], w1[2], selector); c0[2] = hc_byte_perm (w1[2], w1[1], selector); c0[1] = hc_byte_perm (w1[1], w1[0], selector); c0[0] = hc_byte_perm (w1[0], w0[3], selector); w3[3] = hc_byte_perm (w0[3], w0[2], selector); w3[2] = hc_byte_perm (w0[2], w0[1], selector); w3[1] = hc_byte_perm (w0[1], w0[0], selector); w3[0] = hc_byte_perm (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm ( 0, w3[3], selector); c3[0] = hc_byte_perm (w3[3], w3[2], selector); c2[3] = hc_byte_perm (w3[2], w3[1], selector); c2[2] = hc_byte_perm (w3[1], w3[0], selector); c2[1] = hc_byte_perm (w3[0], w2[3], selector); c2[0] = hc_byte_perm (w2[3], w2[2], selector); c1[3] = hc_byte_perm (w2[2], w2[1], selector); c1[2] = hc_byte_perm (w2[1], w2[0], selector); c1[1] = hc_byte_perm (w2[0], w1[3], selector); c1[0] = hc_byte_perm (w1[3], w1[2], selector); c0[3] = hc_byte_perm (w1[2], w1[1], selector); c0[2] = hc_byte_perm (w1[1], w1[0], selector); c0[1] = hc_byte_perm (w1[0], w0[3], selector); c0[0] = hc_byte_perm (w0[3], w0[2], selector); w3[3] = hc_byte_perm (w0[2], w0[1], selector); w3[2] = hc_byte_perm (w0[1], w0[0], selector); w3[1] = hc_byte_perm (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm ( 0, w3[3], selector); c3[1] = hc_byte_perm (w3[3], w3[2], selector); c3[0] = hc_byte_perm (w3[2], w3[1], selector); c2[3] = hc_byte_perm (w3[1], w3[0], selector); c2[2] = hc_byte_perm (w3[0], w2[3], selector); c2[1] = hc_byte_perm (w2[3], w2[2], selector); c2[0] = hc_byte_perm (w2[2], w2[1], selector); c1[3] = hc_byte_perm (w2[1], w2[0], selector); c1[2] = hc_byte_perm (w2[0], w1[3], selector); c1[1] = hc_byte_perm (w1[3], w1[2], selector); c1[0] = hc_byte_perm (w1[2], w1[1], selector); c0[3] = hc_byte_perm (w1[1], w1[0], selector); c0[2] = hc_byte_perm (w1[0], w0[3], selector); c0[1] = hc_byte_perm (w0[3], w0[2], selector); c0[0] = hc_byte_perm (w0[2], w0[1], selector); w3[3] = hc_byte_perm (w0[1], w0[0], selector); w3[2] = hc_byte_perm (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm ( 0, w3[3], selector); c3[2] = hc_byte_perm (w3[3], w3[2], selector); c3[1] = hc_byte_perm (w3[2], w3[1], selector); c3[0] = hc_byte_perm (w3[1], w3[0], selector); c2[3] = hc_byte_perm (w3[0], w2[3], selector); c2[2] = hc_byte_perm (w2[3], w2[2], selector); c2[1] = hc_byte_perm (w2[2], w2[1], selector); c2[0] = hc_byte_perm (w2[1], w2[0], selector); c1[3] = hc_byte_perm (w2[0], w1[3], selector); c1[2] = hc_byte_perm (w1[3], w1[2], selector); c1[1] = hc_byte_perm (w1[2], w1[1], selector); c1[0] = hc_byte_perm (w1[1], w1[0], selector); c0[3] = hc_byte_perm (w1[0], w0[3], selector); c0[2] = hc_byte_perm (w0[3], w0[2], selector); c0[1] = hc_byte_perm (w0[2], w0[1], selector); c0[0] = hc_byte_perm (w0[1], w0[0], selector); w3[3] = hc_byte_perm (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w7[3] = hc_bytealign (w7[2], w7[3], offset); w7[2] = hc_bytealign (w7[1], w7[2], offset); w7[1] = hc_bytealign (w7[0], w7[1], offset); w7[0] = hc_bytealign (w6[3], w7[0], offset); w6[3] = hc_bytealign (w6[2], w6[3], offset); w6[2] = hc_bytealign (w6[1], w6[2], offset); w6[1] = hc_bytealign (w6[0], w6[1], offset); w6[0] = hc_bytealign (w5[3], w6[0], offset); w5[3] = hc_bytealign (w5[2], w5[3], offset); w5[2] = hc_bytealign (w5[1], w5[2], offset); w5[1] = hc_bytealign (w5[0], w5[1], offset); w5[0] = hc_bytealign (w4[3], w5[0], offset); w4[3] = hc_bytealign (w4[2], w4[3], offset); w4[2] = hc_bytealign (w4[1], w4[2], offset); w4[1] = hc_bytealign (w4[0], w4[1], offset); w4[0] = hc_bytealign (w3[3], w4[0], offset); w3[3] = hc_bytealign (w3[2], w3[3], offset); w3[2] = hc_bytealign (w3[1], w3[2], offset); w3[1] = hc_bytealign (w3[0], w3[1], offset); w3[0] = hc_bytealign (w2[3], w3[0], offset); w2[3] = hc_bytealign (w2[2], w2[3], offset); w2[2] = hc_bytealign (w2[1], w2[2], offset); w2[1] = hc_bytealign (w2[0], w2[1], offset); w2[0] = hc_bytealign (w1[3], w2[0], offset); w1[3] = hc_bytealign (w1[2], w1[3], offset); w1[2] = hc_bytealign (w1[1], w1[2], offset); w1[1] = hc_bytealign (w1[0], w1[1], offset); w1[0] = hc_bytealign (w0[3], w1[0], offset); w0[3] = hc_bytealign (w0[2], w0[3], offset); w0[2] = hc_bytealign (w0[1], w0[2], offset); w0[1] = hc_bytealign (w0[0], w0[1], offset); w0[0] = hc_bytealign ( 0, w0[0], offset); break; case 1: w7[3] = hc_bytealign (w7[1], w7[2], offset); w7[2] = hc_bytealign (w7[0], w7[1], offset); w7[1] = hc_bytealign (w6[3], w7[0], offset); w7[0] = hc_bytealign (w6[2], w6[3], offset); w6[3] = hc_bytealign (w6[1], w6[2], offset); w6[2] = hc_bytealign (w6[0], w6[1], offset); w6[1] = hc_bytealign (w5[3], w6[0], offset); w6[0] = hc_bytealign (w5[2], w5[3], offset); w5[3] = hc_bytealign (w5[1], w5[2], offset); w5[2] = hc_bytealign (w5[0], w5[1], offset); w5[1] = hc_bytealign (w4[3], w5[0], offset); w5[0] = hc_bytealign (w4[2], w4[3], offset); w4[3] = hc_bytealign (w4[1], w4[2], offset); w4[2] = hc_bytealign (w4[0], w4[1], offset); w4[1] = hc_bytealign (w3[3], w4[0], offset); w4[0] = hc_bytealign (w3[2], w3[3], offset); w3[3] = hc_bytealign (w3[1], w3[2], offset); w3[2] = hc_bytealign (w3[0], w3[1], offset); w3[1] = hc_bytealign (w2[3], w3[0], offset); w3[0] = hc_bytealign (w2[2], w2[3], offset); w2[3] = hc_bytealign (w2[1], w2[2], offset); w2[2] = hc_bytealign (w2[0], w2[1], offset); w2[1] = hc_bytealign (w1[3], w2[0], offset); w2[0] = hc_bytealign (w1[2], w1[3], offset); w1[3] = hc_bytealign (w1[1], w1[2], offset); w1[2] = hc_bytealign (w1[0], w1[1], offset); w1[1] = hc_bytealign (w0[3], w1[0], offset); w1[0] = hc_bytealign (w0[2], w0[3], offset); w0[3] = hc_bytealign (w0[1], w0[2], offset); w0[2] = hc_bytealign (w0[0], w0[1], offset); w0[1] = hc_bytealign ( 0, w0[0], offset); w0[0] = 0; break; case 2: w7[3] = hc_bytealign (w7[0], w7[1], offset); w7[2] = hc_bytealign (w6[3], w7[0], offset); w7[1] = hc_bytealign (w6[2], w6[3], offset); w7[0] = hc_bytealign (w6[1], w6[2], offset); w6[3] = hc_bytealign (w6[0], w6[1], offset); w6[2] = hc_bytealign (w5[3], w6[0], offset); w6[1] = hc_bytealign (w5[2], w5[3], offset); w6[0] = hc_bytealign (w5[1], w5[2], offset); w5[3] = hc_bytealign (w5[0], w5[1], offset); w5[2] = hc_bytealign (w4[3], w5[0], offset); w5[1] = hc_bytealign (w4[2], w4[3], offset); w5[0] = hc_bytealign (w4[1], w4[2], offset); w4[3] = hc_bytealign (w4[0], w4[1], offset); w4[2] = hc_bytealign (w3[3], w4[0], offset); w4[1] = hc_bytealign (w3[2], w3[3], offset); w4[0] = hc_bytealign (w3[1], w3[2], offset); w3[3] = hc_bytealign (w3[0], w3[1], offset); w3[2] = hc_bytealign (w2[3], w3[0], offset); w3[1] = hc_bytealign (w2[2], w2[3], offset); w3[0] = hc_bytealign (w2[1], w2[2], offset); w2[3] = hc_bytealign (w2[0], w2[1], offset); w2[2] = hc_bytealign (w1[3], w2[0], offset); w2[1] = hc_bytealign (w1[2], w1[3], offset); w2[0] = hc_bytealign (w1[1], w1[2], offset); w1[3] = hc_bytealign (w1[0], w1[1], offset); w1[2] = hc_bytealign (w0[3], w1[0], offset); w1[1] = hc_bytealign (w0[2], w0[3], offset); w1[0] = hc_bytealign (w0[1], w0[2], offset); w0[3] = hc_bytealign (w0[0], w0[1], offset); w0[2] = hc_bytealign ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_bytealign (w6[3], w7[0], offset); w7[2] = hc_bytealign (w6[2], w6[3], offset); w7[1] = hc_bytealign (w6[1], w6[2], offset); w7[0] = hc_bytealign (w6[0], w6[1], offset); w6[3] = hc_bytealign (w5[3], w6[0], offset); w6[2] = hc_bytealign (w5[2], w5[3], offset); w6[1] = hc_bytealign (w5[1], w5[2], offset); w6[0] = hc_bytealign (w5[0], w5[1], offset); w5[3] = hc_bytealign (w4[3], w5[0], offset); w5[2] = hc_bytealign (w4[2], w4[3], offset); w5[1] = hc_bytealign (w4[1], w4[2], offset); w5[0] = hc_bytealign (w4[0], w4[1], offset); w4[3] = hc_bytealign (w3[3], w4[0], offset); w4[2] = hc_bytealign (w3[2], w3[3], offset); w4[1] = hc_bytealign (w3[1], w3[2], offset); w4[0] = hc_bytealign (w3[0], w3[1], offset); w3[3] = hc_bytealign (w2[3], w3[0], offset); w3[2] = hc_bytealign (w2[2], w2[3], offset); w3[1] = hc_bytealign (w2[1], w2[2], offset); w3[0] = hc_bytealign (w2[0], w2[1], offset); w2[3] = hc_bytealign (w1[3], w2[0], offset); w2[2] = hc_bytealign (w1[2], w1[3], offset); w2[1] = hc_bytealign (w1[1], w1[2], offset); w2[0] = hc_bytealign (w1[0], w1[1], offset); w1[3] = hc_bytealign (w0[3], w1[0], offset); w1[2] = hc_bytealign (w0[2], w0[3], offset); w1[1] = hc_bytealign (w0[1], w0[2], offset); w1[0] = hc_bytealign (w0[0], w0[1], offset); w0[3] = hc_bytealign ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_bytealign (w6[2], w6[3], offset); w7[2] = hc_bytealign (w6[1], w6[2], offset); w7[1] = hc_bytealign (w6[0], w6[1], offset); w7[0] = hc_bytealign (w5[3], w6[0], offset); w6[3] = hc_bytealign (w5[2], w5[3], offset); w6[2] = hc_bytealign (w5[1], w5[2], offset); w6[1] = hc_bytealign (w5[0], w5[1], offset); w6[0] = hc_bytealign (w4[3], w5[0], offset); w5[3] = hc_bytealign (w4[2], w4[3], offset); w5[2] = hc_bytealign (w4[1], w4[2], offset); w5[1] = hc_bytealign (w4[0], w4[1], offset); w5[0] = hc_bytealign (w3[3], w4[0], offset); w4[3] = hc_bytealign (w3[2], w3[3], offset); w4[2] = hc_bytealign (w3[1], w3[2], offset); w4[1] = hc_bytealign (w3[0], w3[1], offset); w4[0] = hc_bytealign (w2[3], w3[0], offset); w3[3] = hc_bytealign (w2[2], w2[3], offset); w3[2] = hc_bytealign (w2[1], w2[2], offset); w3[1] = hc_bytealign (w2[0], w2[1], offset); w3[0] = hc_bytealign (w1[3], w2[0], offset); w2[3] = hc_bytealign (w1[2], w1[3], offset); w2[2] = hc_bytealign (w1[1], w1[2], offset); w2[1] = hc_bytealign (w1[0], w1[1], offset); w2[0] = hc_bytealign (w0[3], w1[0], offset); w1[3] = hc_bytealign (w0[2], w0[3], offset); w1[2] = hc_bytealign (w0[1], w0[2], offset); w1[1] = hc_bytealign (w0[0], w0[1], offset); w1[0] = hc_bytealign ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_bytealign (w6[1], w6[2], offset); w7[2] = hc_bytealign (w6[0], w6[1], offset); w7[1] = hc_bytealign (w5[3], w6[0], offset); w7[0] = hc_bytealign (w5[2], w5[3], offset); w6[3] = hc_bytealign (w5[1], w5[2], offset); w6[2] = hc_bytealign (w5[0], w5[1], offset); w6[1] = hc_bytealign (w4[3], w5[0], offset); w6[0] = hc_bytealign (w4[2], w4[3], offset); w5[3] = hc_bytealign (w4[1], w4[2], offset); w5[2] = hc_bytealign (w4[0], w4[1], offset); w5[1] = hc_bytealign (w3[3], w4[0], offset); w5[0] = hc_bytealign (w3[2], w3[3], offset); w4[3] = hc_bytealign (w3[1], w3[2], offset); w4[2] = hc_bytealign (w3[0], w3[1], offset); w4[1] = hc_bytealign (w2[3], w3[0], offset); w4[0] = hc_bytealign (w2[2], w2[3], offset); w3[3] = hc_bytealign (w2[1], w2[2], offset); w3[2] = hc_bytealign (w2[0], w2[1], offset); w3[1] = hc_bytealign (w1[3], w2[0], offset); w3[0] = hc_bytealign (w1[2], w1[3], offset); w2[3] = hc_bytealign (w1[1], w1[2], offset); w2[2] = hc_bytealign (w1[0], w1[1], offset); w2[1] = hc_bytealign (w0[3], w1[0], offset); w2[0] = hc_bytealign (w0[2], w0[3], offset); w1[3] = hc_bytealign (w0[1], w0[2], offset); w1[2] = hc_bytealign (w0[0], w0[1], offset); w1[1] = hc_bytealign ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_bytealign (w6[0], w6[1], offset); w7[2] = hc_bytealign (w5[3], w6[0], offset); w7[1] = hc_bytealign (w5[2], w5[3], offset); w7[0] = hc_bytealign (w5[1], w5[2], offset); w6[3] = hc_bytealign (w5[0], w5[1], offset); w6[2] = hc_bytealign (w4[3], w5[0], offset); w6[1] = hc_bytealign (w4[2], w4[3], offset); w6[0] = hc_bytealign (w4[1], w4[2], offset); w5[3] = hc_bytealign (w4[0], w4[1], offset); w5[2] = hc_bytealign (w3[3], w4[0], offset); w5[1] = hc_bytealign (w3[2], w3[3], offset); w5[0] = hc_bytealign (w3[1], w3[2], offset); w4[3] = hc_bytealign (w3[0], w3[1], offset); w4[2] = hc_bytealign (w2[3], w3[0], offset); w4[1] = hc_bytealign (w2[2], w2[3], offset); w4[0] = hc_bytealign (w2[1], w2[2], offset); w3[3] = hc_bytealign (w2[0], w2[1], offset); w3[2] = hc_bytealign (w1[3], w2[0], offset); w3[1] = hc_bytealign (w1[2], w1[3], offset); w3[0] = hc_bytealign (w1[1], w1[2], offset); w2[3] = hc_bytealign (w1[0], w1[1], offset); w2[2] = hc_bytealign (w0[3], w1[0], offset); w2[1] = hc_bytealign (w0[2], w0[3], offset); w2[0] = hc_bytealign (w0[1], w0[2], offset); w1[3] = hc_bytealign (w0[0], w0[1], offset); w1[2] = hc_bytealign ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_bytealign (w5[3], w6[0], offset); w7[2] = hc_bytealign (w5[2], w5[3], offset); w7[1] = hc_bytealign (w5[1], w5[2], offset); w7[0] = hc_bytealign (w5[0], w5[1], offset); w6[3] = hc_bytealign (w4[3], w5[0], offset); w6[2] = hc_bytealign (w4[2], w4[3], offset); w6[1] = hc_bytealign (w4[1], w4[2], offset); w6[0] = hc_bytealign (w4[0], w4[1], offset); w5[3] = hc_bytealign (w3[3], w4[0], offset); w5[2] = hc_bytealign (w3[2], w3[3], offset); w5[1] = hc_bytealign (w3[1], w3[2], offset); w5[0] = hc_bytealign (w3[0], w3[1], offset); w4[3] = hc_bytealign (w2[3], w3[0], offset); w4[2] = hc_bytealign (w2[2], w2[3], offset); w4[1] = hc_bytealign (w2[1], w2[2], offset); w4[0] = hc_bytealign (w2[0], w2[1], offset); w3[3] = hc_bytealign (w1[3], w2[0], offset); w3[2] = hc_bytealign (w1[2], w1[3], offset); w3[1] = hc_bytealign (w1[1], w1[2], offset); w3[0] = hc_bytealign (w1[0], w1[1], offset); w2[3] = hc_bytealign (w0[3], w1[0], offset); w2[2] = hc_bytealign (w0[2], w0[3], offset); w2[1] = hc_bytealign (w0[1], w0[2], offset); w2[0] = hc_bytealign (w0[0], w0[1], offset); w1[3] = hc_bytealign ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_bytealign (w5[2], w5[3], offset); w7[2] = hc_bytealign (w5[1], w5[2], offset); w7[1] = hc_bytealign (w5[0], w5[1], offset); w7[0] = hc_bytealign (w4[3], w5[0], offset); w6[3] = hc_bytealign (w4[2], w4[3], offset); w6[2] = hc_bytealign (w4[1], w4[2], offset); w6[1] = hc_bytealign (w4[0], w4[1], offset); w6[0] = hc_bytealign (w3[3], w4[0], offset); w5[3] = hc_bytealign (w3[2], w3[3], offset); w5[2] = hc_bytealign (w3[1], w3[2], offset); w5[1] = hc_bytealign (w3[0], w3[1], offset); w5[0] = hc_bytealign (w2[3], w3[0], offset); w4[3] = hc_bytealign (w2[2], w2[3], offset); w4[2] = hc_bytealign (w2[1], w2[2], offset); w4[1] = hc_bytealign (w2[0], w2[1], offset); w4[0] = hc_bytealign (w1[3], w2[0], offset); w3[3] = hc_bytealign (w1[2], w1[3], offset); w3[2] = hc_bytealign (w1[1], w1[2], offset); w3[1] = hc_bytealign (w1[0], w1[1], offset); w3[0] = hc_bytealign (w0[3], w1[0], offset); w2[3] = hc_bytealign (w0[2], w0[3], offset); w2[2] = hc_bytealign (w0[1], w0[2], offset); w2[1] = hc_bytealign (w0[0], w0[1], offset); w2[0] = hc_bytealign ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_bytealign (w5[1], w5[2], offset); w7[2] = hc_bytealign (w5[0], w5[1], offset); w7[1] = hc_bytealign (w4[3], w5[0], offset); w7[0] = hc_bytealign (w4[2], w4[3], offset); w6[3] = hc_bytealign (w4[1], w4[2], offset); w6[2] = hc_bytealign (w4[0], w4[1], offset); w6[1] = hc_bytealign (w3[3], w4[0], offset); w6[0] = hc_bytealign (w3[2], w3[3], offset); w5[3] = hc_bytealign (w3[1], w3[2], offset); w5[2] = hc_bytealign (w3[0], w3[1], offset); w5[1] = hc_bytealign (w2[3], w3[0], offset); w5[0] = hc_bytealign (w2[2], w2[3], offset); w4[3] = hc_bytealign (w2[1], w2[2], offset); w4[2] = hc_bytealign (w2[0], w2[1], offset); w4[1] = hc_bytealign (w1[3], w2[0], offset); w4[0] = hc_bytealign (w1[2], w1[3], offset); w3[3] = hc_bytealign (w1[1], w1[2], offset); w3[2] = hc_bytealign (w1[0], w1[1], offset); w3[1] = hc_bytealign (w0[3], w1[0], offset); w3[0] = hc_bytealign (w0[2], w0[3], offset); w2[3] = hc_bytealign (w0[1], w0[2], offset); w2[2] = hc_bytealign (w0[0], w0[1], offset); w2[1] = hc_bytealign ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_bytealign (w5[0], w5[1], offset); w7[2] = hc_bytealign (w4[3], w5[0], offset); w7[1] = hc_bytealign (w4[2], w4[3], offset); w7[0] = hc_bytealign (w4[1], w4[2], offset); w6[3] = hc_bytealign (w4[0], w4[1], offset); w6[2] = hc_bytealign (w3[3], w4[0], offset); w6[1] = hc_bytealign (w3[2], w3[3], offset); w6[0] = hc_bytealign (w3[1], w3[2], offset); w5[3] = hc_bytealign (w3[0], w3[1], offset); w5[2] = hc_bytealign (w2[3], w3[0], offset); w5[1] = hc_bytealign (w2[2], w2[3], offset); w5[0] = hc_bytealign (w2[1], w2[2], offset); w4[3] = hc_bytealign (w2[0], w2[1], offset); w4[2] = hc_bytealign (w1[3], w2[0], offset); w4[1] = hc_bytealign (w1[2], w1[3], offset); w4[0] = hc_bytealign (w1[1], w1[2], offset); w3[3] = hc_bytealign (w1[0], w1[1], offset); w3[2] = hc_bytealign (w0[3], w1[0], offset); w3[1] = hc_bytealign (w0[2], w0[3], offset); w3[0] = hc_bytealign (w0[1], w0[2], offset); w2[3] = hc_bytealign (w0[0], w0[1], offset); w2[2] = hc_bytealign ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_bytealign (w4[3], w5[0], offset); w7[2] = hc_bytealign (w4[2], w4[3], offset); w7[1] = hc_bytealign (w4[1], w4[2], offset); w7[0] = hc_bytealign (w4[0], w4[1], offset); w6[3] = hc_bytealign (w3[3], w4[0], offset); w6[2] = hc_bytealign (w3[2], w3[3], offset); w6[1] = hc_bytealign (w3[1], w3[2], offset); w6[0] = hc_bytealign (w3[0], w3[1], offset); w5[3] = hc_bytealign (w2[3], w3[0], offset); w5[2] = hc_bytealign (w2[2], w2[3], offset); w5[1] = hc_bytealign (w2[1], w2[2], offset); w5[0] = hc_bytealign (w2[0], w2[1], offset); w4[3] = hc_bytealign (w1[3], w2[0], offset); w4[2] = hc_bytealign (w1[2], w1[3], offset); w4[1] = hc_bytealign (w1[1], w1[2], offset); w4[0] = hc_bytealign (w1[0], w1[1], offset); w3[3] = hc_bytealign (w0[3], w1[0], offset); w3[2] = hc_bytealign (w0[2], w0[3], offset); w3[1] = hc_bytealign (w0[1], w0[2], offset); w3[0] = hc_bytealign (w0[0], w0[1], offset); w2[3] = hc_bytealign ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_bytealign (w4[2], w4[3], offset); w7[2] = hc_bytealign (w4[1], w4[2], offset); w7[1] = hc_bytealign (w4[0], w4[1], offset); w7[0] = hc_bytealign (w3[3], w4[0], offset); w6[3] = hc_bytealign (w3[2], w3[3], offset); w6[2] = hc_bytealign (w3[1], w3[2], offset); w6[1] = hc_bytealign (w3[0], w3[1], offset); w6[0] = hc_bytealign (w2[3], w3[0], offset); w5[3] = hc_bytealign (w2[2], w2[3], offset); w5[2] = hc_bytealign (w2[1], w2[2], offset); w5[1] = hc_bytealign (w2[0], w2[1], offset); w5[0] = hc_bytealign (w1[3], w2[0], offset); w4[3] = hc_bytealign (w1[2], w1[3], offset); w4[2] = hc_bytealign (w1[1], w1[2], offset); w4[1] = hc_bytealign (w1[0], w1[1], offset); w4[0] = hc_bytealign (w0[3], w1[0], offset); w3[3] = hc_bytealign (w0[2], w0[3], offset); w3[2] = hc_bytealign (w0[1], w0[2], offset); w3[1] = hc_bytealign (w0[0], w0[1], offset); w3[0] = hc_bytealign ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_bytealign (w4[1], w4[2], offset); w7[2] = hc_bytealign (w4[0], w4[1], offset); w7[1] = hc_bytealign (w3[3], w4[0], offset); w7[0] = hc_bytealign (w3[2], w3[3], offset); w6[3] = hc_bytealign (w3[1], w3[2], offset); w6[2] = hc_bytealign (w3[0], w3[1], offset); w6[1] = hc_bytealign (w2[3], w3[0], offset); w6[0] = hc_bytealign (w2[2], w2[3], offset); w5[3] = hc_bytealign (w2[1], w2[2], offset); w5[2] = hc_bytealign (w2[0], w2[1], offset); w5[1] = hc_bytealign (w1[3], w2[0], offset); w5[0] = hc_bytealign (w1[2], w1[3], offset); w4[3] = hc_bytealign (w1[1], w1[2], offset); w4[2] = hc_bytealign (w1[0], w1[1], offset); w4[1] = hc_bytealign (w0[3], w1[0], offset); w4[0] = hc_bytealign (w0[2], w0[3], offset); w3[3] = hc_bytealign (w0[1], w0[2], offset); w3[2] = hc_bytealign (w0[0], w0[1], offset); w3[1] = hc_bytealign ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_bytealign (w4[0], w4[1], offset); w7[2] = hc_bytealign (w3[3], w4[0], offset); w7[1] = hc_bytealign (w3[2], w3[3], offset); w7[0] = hc_bytealign (w3[1], w3[2], offset); w6[3] = hc_bytealign (w3[0], w3[1], offset); w6[2] = hc_bytealign (w2[3], w3[0], offset); w6[1] = hc_bytealign (w2[2], w2[3], offset); w6[0] = hc_bytealign (w2[1], w2[2], offset); w5[3] = hc_bytealign (w2[0], w2[1], offset); w5[2] = hc_bytealign (w1[3], w2[0], offset); w5[1] = hc_bytealign (w1[2], w1[3], offset); w5[0] = hc_bytealign (w1[1], w1[2], offset); w4[3] = hc_bytealign (w1[0], w1[1], offset); w4[2] = hc_bytealign (w0[3], w1[0], offset); w4[1] = hc_bytealign (w0[2], w0[3], offset); w4[0] = hc_bytealign (w0[1], w0[2], offset); w3[3] = hc_bytealign (w0[0], w0[1], offset); w3[2] = hc_bytealign ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_bytealign (w3[3], w4[0], offset); w7[2] = hc_bytealign (w3[2], w3[3], offset); w7[1] = hc_bytealign (w3[1], w3[2], offset); w7[0] = hc_bytealign (w3[0], w3[1], offset); w6[3] = hc_bytealign (w2[3], w3[0], offset); w6[2] = hc_bytealign (w2[2], w2[3], offset); w6[1] = hc_bytealign (w2[1], w2[2], offset); w6[0] = hc_bytealign (w2[0], w2[1], offset); w5[3] = hc_bytealign (w1[3], w2[0], offset); w5[2] = hc_bytealign (w1[2], w1[3], offset); w5[1] = hc_bytealign (w1[1], w1[2], offset); w5[0] = hc_bytealign (w1[0], w1[1], offset); w4[3] = hc_bytealign (w0[3], w1[0], offset); w4[2] = hc_bytealign (w0[2], w0[3], offset); w4[1] = hc_bytealign (w0[1], w0[2], offset); w4[0] = hc_bytealign (w0[0], w0[1], offset); w3[3] = hc_bytealign ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_bytealign (w3[2], w3[3], offset); w7[2] = hc_bytealign (w3[1], w3[2], offset); w7[1] = hc_bytealign (w3[0], w3[1], offset); w7[0] = hc_bytealign (w2[3], w3[0], offset); w6[3] = hc_bytealign (w2[2], w2[3], offset); w6[2] = hc_bytealign (w2[1], w2[2], offset); w6[1] = hc_bytealign (w2[0], w2[1], offset); w6[0] = hc_bytealign (w1[3], w2[0], offset); w5[3] = hc_bytealign (w1[2], w1[3], offset); w5[2] = hc_bytealign (w1[1], w1[2], offset); w5[1] = hc_bytealign (w1[0], w1[1], offset); w5[0] = hc_bytealign (w0[3], w1[0], offset); w4[3] = hc_bytealign (w0[2], w0[3], offset); w4[2] = hc_bytealign (w0[1], w0[2], offset); w4[1] = hc_bytealign (w0[0], w0[1], offset); w4[0] = hc_bytealign ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_bytealign (w3[1], w3[2], offset); w7[2] = hc_bytealign (w3[0], w3[1], offset); w7[1] = hc_bytealign (w2[3], w3[0], offset); w7[0] = hc_bytealign (w2[2], w2[3], offset); w6[3] = hc_bytealign (w2[1], w2[2], offset); w6[2] = hc_bytealign (w2[0], w2[1], offset); w6[1] = hc_bytealign (w1[3], w2[0], offset); w6[0] = hc_bytealign (w1[2], w1[3], offset); w5[3] = hc_bytealign (w1[1], w1[2], offset); w5[2] = hc_bytealign (w1[0], w1[1], offset); w5[1] = hc_bytealign (w0[3], w1[0], offset); w5[0] = hc_bytealign (w0[2], w0[3], offset); w4[3] = hc_bytealign (w0[1], w0[2], offset); w4[2] = hc_bytealign (w0[0], w0[1], offset); w4[1] = hc_bytealign ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_bytealign (w3[0], w3[1], offset); w7[2] = hc_bytealign (w2[3], w3[0], offset); w7[1] = hc_bytealign (w2[2], w2[3], offset); w7[0] = hc_bytealign (w2[1], w2[2], offset); w6[3] = hc_bytealign (w2[0], w2[1], offset); w6[2] = hc_bytealign (w1[3], w2[0], offset); w6[1] = hc_bytealign (w1[2], w1[3], offset); w6[0] = hc_bytealign (w1[1], w1[2], offset); w5[3] = hc_bytealign (w1[0], w1[1], offset); w5[2] = hc_bytealign (w0[3], w1[0], offset); w5[1] = hc_bytealign (w0[2], w0[3], offset); w5[0] = hc_bytealign (w0[1], w0[2], offset); w4[3] = hc_bytealign (w0[0], w0[1], offset); w4[2] = hc_bytealign ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_bytealign (w2[3], w3[0], offset); w7[2] = hc_bytealign (w2[2], w2[3], offset); w7[1] = hc_bytealign (w2[1], w2[2], offset); w7[0] = hc_bytealign (w2[0], w2[1], offset); w6[3] = hc_bytealign (w1[3], w2[0], offset); w6[2] = hc_bytealign (w1[2], w1[3], offset); w6[1] = hc_bytealign (w1[1], w1[2], offset); w6[0] = hc_bytealign (w1[0], w1[1], offset); w5[3] = hc_bytealign (w0[3], w1[0], offset); w5[2] = hc_bytealign (w0[2], w0[3], offset); w5[1] = hc_bytealign (w0[1], w0[2], offset); w5[0] = hc_bytealign (w0[0], w0[1], offset); w4[3] = hc_bytealign ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_bytealign (w2[2], w2[3], offset); w7[2] = hc_bytealign (w2[1], w2[2], offset); w7[1] = hc_bytealign (w2[0], w2[1], offset); w7[0] = hc_bytealign (w1[3], w2[0], offset); w6[3] = hc_bytealign (w1[2], w1[3], offset); w6[2] = hc_bytealign (w1[1], w1[2], offset); w6[1] = hc_bytealign (w1[0], w1[1], offset); w6[0] = hc_bytealign (w0[3], w1[0], offset); w5[3] = hc_bytealign (w0[2], w0[3], offset); w5[2] = hc_bytealign (w0[1], w0[2], offset); w5[1] = hc_bytealign (w0[0], w0[1], offset); w5[0] = hc_bytealign ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_bytealign (w2[1], w2[2], offset); w7[2] = hc_bytealign (w2[0], w2[1], offset); w7[1] = hc_bytealign (w1[3], w2[0], offset); w7[0] = hc_bytealign (w1[2], w1[3], offset); w6[3] = hc_bytealign (w1[1], w1[2], offset); w6[2] = hc_bytealign (w1[0], w1[1], offset); w6[1] = hc_bytealign (w0[3], w1[0], offset); w6[0] = hc_bytealign (w0[2], w0[3], offset); w5[3] = hc_bytealign (w0[1], w0[2], offset); w5[2] = hc_bytealign (w0[0], w0[1], offset); w5[1] = hc_bytealign ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_bytealign (w2[0], w2[1], offset); w7[2] = hc_bytealign (w1[3], w2[0], offset); w7[1] = hc_bytealign (w1[2], w1[3], offset); w7[0] = hc_bytealign (w1[1], w1[2], offset); w6[3] = hc_bytealign (w1[0], w1[1], offset); w6[2] = hc_bytealign (w0[3], w1[0], offset); w6[1] = hc_bytealign (w0[2], w0[3], offset); w6[0] = hc_bytealign (w0[1], w0[2], offset); w5[3] = hc_bytealign (w0[0], w0[1], offset); w5[2] = hc_bytealign ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_bytealign (w1[3], w2[0], offset); w7[2] = hc_bytealign (w1[2], w1[3], offset); w7[1] = hc_bytealign (w1[1], w1[2], offset); w7[0] = hc_bytealign (w1[0], w1[1], offset); w6[3] = hc_bytealign (w0[3], w1[0], offset); w6[2] = hc_bytealign (w0[2], w0[3], offset); w6[1] = hc_bytealign (w0[1], w0[2], offset); w6[0] = hc_bytealign (w0[0], w0[1], offset); w5[3] = hc_bytealign ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_bytealign (w1[2], w1[3], offset); w7[2] = hc_bytealign (w1[1], w1[2], offset); w7[1] = hc_bytealign (w1[0], w1[1], offset); w7[0] = hc_bytealign (w0[3], w1[0], offset); w6[3] = hc_bytealign (w0[2], w0[3], offset); w6[2] = hc_bytealign (w0[1], w0[2], offset); w6[1] = hc_bytealign (w0[0], w0[1], offset); w6[0] = hc_bytealign ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_bytealign (w1[1], w1[2], offset); w7[2] = hc_bytealign (w1[0], w1[1], offset); w7[1] = hc_bytealign (w0[3], w1[0], offset); w7[0] = hc_bytealign (w0[2], w0[3], offset); w6[3] = hc_bytealign (w0[1], w0[2], offset); w6[2] = hc_bytealign (w0[0], w0[1], offset); w6[1] = hc_bytealign ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_bytealign (w1[0], w1[1], offset); w7[2] = hc_bytealign (w0[3], w1[0], offset); w7[1] = hc_bytealign (w0[2], w0[3], offset); w7[0] = hc_bytealign (w0[1], w0[2], offset); w6[3] = hc_bytealign (w0[0], w0[1], offset); w6[2] = hc_bytealign ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_bytealign (w0[3], w1[0], offset); w7[2] = hc_bytealign (w0[2], w0[3], offset); w7[1] = hc_bytealign (w0[1], w0[2], offset); w7[0] = hc_bytealign (w0[0], w0[1], offset); w6[3] = hc_bytealign ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_bytealign (w0[2], w0[3], offset); w7[2] = hc_bytealign (w0[1], w0[2], offset); w7[1] = hc_bytealign (w0[0], w0[1], offset); w7[0] = hc_bytealign ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_bytealign (w0[1], w0[2], offset); w7[2] = hc_bytealign (w0[0], w0[1], offset); w7[1] = hc_bytealign ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_bytealign (w0[0], w0[1], offset); w7[2] = hc_bytealign ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_bytealign ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w7[3] = hc_byte_perm (w7[2], w7[3], selector); w7[2] = hc_byte_perm (w7[1], w7[2], selector); w7[1] = hc_byte_perm (w7[0], w7[1], selector); w7[0] = hc_byte_perm (w6[3], w7[0], selector); w6[3] = hc_byte_perm (w6[2], w6[3], selector); w6[2] = hc_byte_perm (w6[1], w6[2], selector); w6[1] = hc_byte_perm (w6[0], w6[1], selector); w6[0] = hc_byte_perm (w5[3], w6[0], selector); w5[3] = hc_byte_perm (w5[2], w5[3], selector); w5[2] = hc_byte_perm (w5[1], w5[2], selector); w5[1] = hc_byte_perm (w5[0], w5[1], selector); w5[0] = hc_byte_perm (w4[3], w5[0], selector); w4[3] = hc_byte_perm (w4[2], w4[3], selector); w4[2] = hc_byte_perm (w4[1], w4[2], selector); w4[1] = hc_byte_perm (w4[0], w4[1], selector); w4[0] = hc_byte_perm (w3[3], w4[0], selector); w3[3] = hc_byte_perm (w3[2], w3[3], selector); w3[2] = hc_byte_perm (w3[1], w3[2], selector); w3[1] = hc_byte_perm (w3[0], w3[1], selector); w3[0] = hc_byte_perm (w2[3], w3[0], selector); w2[3] = hc_byte_perm (w2[2], w2[3], selector); w2[2] = hc_byte_perm (w2[1], w2[2], selector); w2[1] = hc_byte_perm (w2[0], w2[1], selector); w2[0] = hc_byte_perm (w1[3], w2[0], selector); w1[3] = hc_byte_perm (w1[2], w1[3], selector); w1[2] = hc_byte_perm (w1[1], w1[2], selector); w1[1] = hc_byte_perm (w1[0], w1[1], selector); w1[0] = hc_byte_perm (w0[3], w1[0], selector); w0[3] = hc_byte_perm (w0[2], w0[3], selector); w0[2] = hc_byte_perm (w0[1], w0[2], selector); w0[1] = hc_byte_perm (w0[0], w0[1], selector); w0[0] = hc_byte_perm ( 0, w0[0], selector); break; case 1: w7[3] = hc_byte_perm (w7[1], w7[2], selector); w7[2] = hc_byte_perm (w7[0], w7[1], selector); w7[1] = hc_byte_perm (w6[3], w7[0], selector); w7[0] = hc_byte_perm (w6[2], w6[3], selector); w6[3] = hc_byte_perm (w6[1], w6[2], selector); w6[2] = hc_byte_perm (w6[0], w6[1], selector); w6[1] = hc_byte_perm (w5[3], w6[0], selector); w6[0] = hc_byte_perm (w5[2], w5[3], selector); w5[3] = hc_byte_perm (w5[1], w5[2], selector); w5[2] = hc_byte_perm (w5[0], w5[1], selector); w5[1] = hc_byte_perm (w4[3], w5[0], selector); w5[0] = hc_byte_perm (w4[2], w4[3], selector); w4[3] = hc_byte_perm (w4[1], w4[2], selector); w4[2] = hc_byte_perm (w4[0], w4[1], selector); w4[1] = hc_byte_perm (w3[3], w4[0], selector); w4[0] = hc_byte_perm (w3[2], w3[3], selector); w3[3] = hc_byte_perm (w3[1], w3[2], selector); w3[2] = hc_byte_perm (w3[0], w3[1], selector); w3[1] = hc_byte_perm (w2[3], w3[0], selector); w3[0] = hc_byte_perm (w2[2], w2[3], selector); w2[3] = hc_byte_perm (w2[1], w2[2], selector); w2[2] = hc_byte_perm (w2[0], w2[1], selector); w2[1] = hc_byte_perm (w1[3], w2[0], selector); w2[0] = hc_byte_perm (w1[2], w1[3], selector); w1[3] = hc_byte_perm (w1[1], w1[2], selector); w1[2] = hc_byte_perm (w1[0], w1[1], selector); w1[1] = hc_byte_perm (w0[3], w1[0], selector); w1[0] = hc_byte_perm (w0[2], w0[3], selector); w0[3] = hc_byte_perm (w0[1], w0[2], selector); w0[2] = hc_byte_perm (w0[0], w0[1], selector); w0[1] = hc_byte_perm ( 0, w0[0], selector); w0[0] = 0; break; case 2: w7[3] = hc_byte_perm (w7[0], w7[1], selector); w7[2] = hc_byte_perm (w6[3], w7[0], selector); w7[1] = hc_byte_perm (w6[2], w6[3], selector); w7[0] = hc_byte_perm (w6[1], w6[2], selector); w6[3] = hc_byte_perm (w6[0], w6[1], selector); w6[2] = hc_byte_perm (w5[3], w6[0], selector); w6[1] = hc_byte_perm (w5[2], w5[3], selector); w6[0] = hc_byte_perm (w5[1], w5[2], selector); w5[3] = hc_byte_perm (w5[0], w5[1], selector); w5[2] = hc_byte_perm (w4[3], w5[0], selector); w5[1] = hc_byte_perm (w4[2], w4[3], selector); w5[0] = hc_byte_perm (w4[1], w4[2], selector); w4[3] = hc_byte_perm (w4[0], w4[1], selector); w4[2] = hc_byte_perm (w3[3], w4[0], selector); w4[1] = hc_byte_perm (w3[2], w3[3], selector); w4[0] = hc_byte_perm (w3[1], w3[2], selector); w3[3] = hc_byte_perm (w3[0], w3[1], selector); w3[2] = hc_byte_perm (w2[3], w3[0], selector); w3[1] = hc_byte_perm (w2[2], w2[3], selector); w3[0] = hc_byte_perm (w2[1], w2[2], selector); w2[3] = hc_byte_perm (w2[0], w2[1], selector); w2[2] = hc_byte_perm (w1[3], w2[0], selector); w2[1] = hc_byte_perm (w1[2], w1[3], selector); w2[0] = hc_byte_perm (w1[1], w1[2], selector); w1[3] = hc_byte_perm (w1[0], w1[1], selector); w1[2] = hc_byte_perm (w0[3], w1[0], selector); w1[1] = hc_byte_perm (w0[2], w0[3], selector); w1[0] = hc_byte_perm (w0[1], w0[2], selector); w0[3] = hc_byte_perm (w0[0], w0[1], selector); w0[2] = hc_byte_perm ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_byte_perm (w6[3], w7[0], selector); w7[2] = hc_byte_perm (w6[2], w6[3], selector); w7[1] = hc_byte_perm (w6[1], w6[2], selector); w7[0] = hc_byte_perm (w6[0], w6[1], selector); w6[3] = hc_byte_perm (w5[3], w6[0], selector); w6[2] = hc_byte_perm (w5[2], w5[3], selector); w6[1] = hc_byte_perm (w5[1], w5[2], selector); w6[0] = hc_byte_perm (w5[0], w5[1], selector); w5[3] = hc_byte_perm (w4[3], w5[0], selector); w5[2] = hc_byte_perm (w4[2], w4[3], selector); w5[1] = hc_byte_perm (w4[1], w4[2], selector); w5[0] = hc_byte_perm (w4[0], w4[1], selector); w4[3] = hc_byte_perm (w3[3], w4[0], selector); w4[2] = hc_byte_perm (w3[2], w3[3], selector); w4[1] = hc_byte_perm (w3[1], w3[2], selector); w4[0] = hc_byte_perm (w3[0], w3[1], selector); w3[3] = hc_byte_perm (w2[3], w3[0], selector); w3[2] = hc_byte_perm (w2[2], w2[3], selector); w3[1] = hc_byte_perm (w2[1], w2[2], selector); w3[0] = hc_byte_perm (w2[0], w2[1], selector); w2[3] = hc_byte_perm (w1[3], w2[0], selector); w2[2] = hc_byte_perm (w1[2], w1[3], selector); w2[1] = hc_byte_perm (w1[1], w1[2], selector); w2[0] = hc_byte_perm (w1[0], w1[1], selector); w1[3] = hc_byte_perm (w0[3], w1[0], selector); w1[2] = hc_byte_perm (w0[2], w0[3], selector); w1[1] = hc_byte_perm (w0[1], w0[2], selector); w1[0] = hc_byte_perm (w0[0], w0[1], selector); w0[3] = hc_byte_perm ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_byte_perm (w6[2], w6[3], selector); w7[2] = hc_byte_perm (w6[1], w6[2], selector); w7[1] = hc_byte_perm (w6[0], w6[1], selector); w7[0] = hc_byte_perm (w5[3], w6[0], selector); w6[3] = hc_byte_perm (w5[2], w5[3], selector); w6[2] = hc_byte_perm (w5[1], w5[2], selector); w6[1] = hc_byte_perm (w5[0], w5[1], selector); w6[0] = hc_byte_perm (w4[3], w5[0], selector); w5[3] = hc_byte_perm (w4[2], w4[3], selector); w5[2] = hc_byte_perm (w4[1], w4[2], selector); w5[1] = hc_byte_perm (w4[0], w4[1], selector); w5[0] = hc_byte_perm (w3[3], w4[0], selector); w4[3] = hc_byte_perm (w3[2], w3[3], selector); w4[2] = hc_byte_perm (w3[1], w3[2], selector); w4[1] = hc_byte_perm (w3[0], w3[1], selector); w4[0] = hc_byte_perm (w2[3], w3[0], selector); w3[3] = hc_byte_perm (w2[2], w2[3], selector); w3[2] = hc_byte_perm (w2[1], w2[2], selector); w3[1] = hc_byte_perm (w2[0], w2[1], selector); w3[0] = hc_byte_perm (w1[3], w2[0], selector); w2[3] = hc_byte_perm (w1[2], w1[3], selector); w2[2] = hc_byte_perm (w1[1], w1[2], selector); w2[1] = hc_byte_perm (w1[0], w1[1], selector); w2[0] = hc_byte_perm (w0[3], w1[0], selector); w1[3] = hc_byte_perm (w0[2], w0[3], selector); w1[2] = hc_byte_perm (w0[1], w0[2], selector); w1[1] = hc_byte_perm (w0[0], w0[1], selector); w1[0] = hc_byte_perm ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_byte_perm (w6[1], w6[2], selector); w7[2] = hc_byte_perm (w6[0], w6[1], selector); w7[1] = hc_byte_perm (w5[3], w6[0], selector); w7[0] = hc_byte_perm (w5[2], w5[3], selector); w6[3] = hc_byte_perm (w5[1], w5[2], selector); w6[2] = hc_byte_perm (w5[0], w5[1], selector); w6[1] = hc_byte_perm (w4[3], w5[0], selector); w6[0] = hc_byte_perm (w4[2], w4[3], selector); w5[3] = hc_byte_perm (w4[1], w4[2], selector); w5[2] = hc_byte_perm (w4[0], w4[1], selector); w5[1] = hc_byte_perm (w3[3], w4[0], selector); w5[0] = hc_byte_perm (w3[2], w3[3], selector); w4[3] = hc_byte_perm (w3[1], w3[2], selector); w4[2] = hc_byte_perm (w3[0], w3[1], selector); w4[1] = hc_byte_perm (w2[3], w3[0], selector); w4[0] = hc_byte_perm (w2[2], w2[3], selector); w3[3] = hc_byte_perm (w2[1], w2[2], selector); w3[2] = hc_byte_perm (w2[0], w2[1], selector); w3[1] = hc_byte_perm (w1[3], w2[0], selector); w3[0] = hc_byte_perm (w1[2], w1[3], selector); w2[3] = hc_byte_perm (w1[1], w1[2], selector); w2[2] = hc_byte_perm (w1[0], w1[1], selector); w2[1] = hc_byte_perm (w0[3], w1[0], selector); w2[0] = hc_byte_perm (w0[2], w0[3], selector); w1[3] = hc_byte_perm (w0[1], w0[2], selector); w1[2] = hc_byte_perm (w0[0], w0[1], selector); w1[1] = hc_byte_perm ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_byte_perm (w6[0], w6[1], selector); w7[2] = hc_byte_perm (w5[3], w6[0], selector); w7[1] = hc_byte_perm (w5[2], w5[3], selector); w7[0] = hc_byte_perm (w5[1], w5[2], selector); w6[3] = hc_byte_perm (w5[0], w5[1], selector); w6[2] = hc_byte_perm (w4[3], w5[0], selector); w6[1] = hc_byte_perm (w4[2], w4[3], selector); w6[0] = hc_byte_perm (w4[1], w4[2], selector); w5[3] = hc_byte_perm (w4[0], w4[1], selector); w5[2] = hc_byte_perm (w3[3], w4[0], selector); w5[1] = hc_byte_perm (w3[2], w3[3], selector); w5[0] = hc_byte_perm (w3[1], w3[2], selector); w4[3] = hc_byte_perm (w3[0], w3[1], selector); w4[2] = hc_byte_perm (w2[3], w3[0], selector); w4[1] = hc_byte_perm (w2[2], w2[3], selector); w4[0] = hc_byte_perm (w2[1], w2[2], selector); w3[3] = hc_byte_perm (w2[0], w2[1], selector); w3[2] = hc_byte_perm (w1[3], w2[0], selector); w3[1] = hc_byte_perm (w1[2], w1[3], selector); w3[0] = hc_byte_perm (w1[1], w1[2], selector); w2[3] = hc_byte_perm (w1[0], w1[1], selector); w2[2] = hc_byte_perm (w0[3], w1[0], selector); w2[1] = hc_byte_perm (w0[2], w0[3], selector); w2[0] = hc_byte_perm (w0[1], w0[2], selector); w1[3] = hc_byte_perm (w0[0], w0[1], selector); w1[2] = hc_byte_perm ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_byte_perm (w5[3], w6[0], selector); w7[2] = hc_byte_perm (w5[2], w5[3], selector); w7[1] = hc_byte_perm (w5[1], w5[2], selector); w7[0] = hc_byte_perm (w5[0], w5[1], selector); w6[3] = hc_byte_perm (w4[3], w5[0], selector); w6[2] = hc_byte_perm (w4[2], w4[3], selector); w6[1] = hc_byte_perm (w4[1], w4[2], selector); w6[0] = hc_byte_perm (w4[0], w4[1], selector); w5[3] = hc_byte_perm (w3[3], w4[0], selector); w5[2] = hc_byte_perm (w3[2], w3[3], selector); w5[1] = hc_byte_perm (w3[1], w3[2], selector); w5[0] = hc_byte_perm (w3[0], w3[1], selector); w4[3] = hc_byte_perm (w2[3], w3[0], selector); w4[2] = hc_byte_perm (w2[2], w2[3], selector); w4[1] = hc_byte_perm (w2[1], w2[2], selector); w4[0] = hc_byte_perm (w2[0], w2[1], selector); w3[3] = hc_byte_perm (w1[3], w2[0], selector); w3[2] = hc_byte_perm (w1[2], w1[3], selector); w3[1] = hc_byte_perm (w1[1], w1[2], selector); w3[0] = hc_byte_perm (w1[0], w1[1], selector); w2[3] = hc_byte_perm (w0[3], w1[0], selector); w2[2] = hc_byte_perm (w0[2], w0[3], selector); w2[1] = hc_byte_perm (w0[1], w0[2], selector); w2[0] = hc_byte_perm (w0[0], w0[1], selector); w1[3] = hc_byte_perm ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_byte_perm (w5[2], w5[3], selector); w7[2] = hc_byte_perm (w5[1], w5[2], selector); w7[1] = hc_byte_perm (w5[0], w5[1], selector); w7[0] = hc_byte_perm (w4[3], w5[0], selector); w6[3] = hc_byte_perm (w4[2], w4[3], selector); w6[2] = hc_byte_perm (w4[1], w4[2], selector); w6[1] = hc_byte_perm (w4[0], w4[1], selector); w6[0] = hc_byte_perm (w3[3], w4[0], selector); w5[3] = hc_byte_perm (w3[2], w3[3], selector); w5[2] = hc_byte_perm (w3[1], w3[2], selector); w5[1] = hc_byte_perm (w3[0], w3[1], selector); w5[0] = hc_byte_perm (w2[3], w3[0], selector); w4[3] = hc_byte_perm (w2[2], w2[3], selector); w4[2] = hc_byte_perm (w2[1], w2[2], selector); w4[1] = hc_byte_perm (w2[0], w2[1], selector); w4[0] = hc_byte_perm (w1[3], w2[0], selector); w3[3] = hc_byte_perm (w1[2], w1[3], selector); w3[2] = hc_byte_perm (w1[1], w1[2], selector); w3[1] = hc_byte_perm (w1[0], w1[1], selector); w3[0] = hc_byte_perm (w0[3], w1[0], selector); w2[3] = hc_byte_perm (w0[2], w0[3], selector); w2[2] = hc_byte_perm (w0[1], w0[2], selector); w2[1] = hc_byte_perm (w0[0], w0[1], selector); w2[0] = hc_byte_perm ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_byte_perm (w5[1], w5[2], selector); w7[2] = hc_byte_perm (w5[0], w5[1], selector); w7[1] = hc_byte_perm (w4[3], w5[0], selector); w7[0] = hc_byte_perm (w4[2], w4[3], selector); w6[3] = hc_byte_perm (w4[1], w4[2], selector); w6[2] = hc_byte_perm (w4[0], w4[1], selector); w6[1] = hc_byte_perm (w3[3], w4[0], selector); w6[0] = hc_byte_perm (w3[2], w3[3], selector); w5[3] = hc_byte_perm (w3[1], w3[2], selector); w5[2] = hc_byte_perm (w3[0], w3[1], selector); w5[1] = hc_byte_perm (w2[3], w3[0], selector); w5[0] = hc_byte_perm (w2[2], w2[3], selector); w4[3] = hc_byte_perm (w2[1], w2[2], selector); w4[2] = hc_byte_perm (w2[0], w2[1], selector); w4[1] = hc_byte_perm (w1[3], w2[0], selector); w4[0] = hc_byte_perm (w1[2], w1[3], selector); w3[3] = hc_byte_perm (w1[1], w1[2], selector); w3[2] = hc_byte_perm (w1[0], w1[1], selector); w3[1] = hc_byte_perm (w0[3], w1[0], selector); w3[0] = hc_byte_perm (w0[2], w0[3], selector); w2[3] = hc_byte_perm (w0[1], w0[2], selector); w2[2] = hc_byte_perm (w0[0], w0[1], selector); w2[1] = hc_byte_perm ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_byte_perm (w5[0], w5[1], selector); w7[2] = hc_byte_perm (w4[3], w5[0], selector); w7[1] = hc_byte_perm (w4[2], w4[3], selector); w7[0] = hc_byte_perm (w4[1], w4[2], selector); w6[3] = hc_byte_perm (w4[0], w4[1], selector); w6[2] = hc_byte_perm (w3[3], w4[0], selector); w6[1] = hc_byte_perm (w3[2], w3[3], selector); w6[0] = hc_byte_perm (w3[1], w3[2], selector); w5[3] = hc_byte_perm (w3[0], w3[1], selector); w5[2] = hc_byte_perm (w2[3], w3[0], selector); w5[1] = hc_byte_perm (w2[2], w2[3], selector); w5[0] = hc_byte_perm (w2[1], w2[2], selector); w4[3] = hc_byte_perm (w2[0], w2[1], selector); w4[2] = hc_byte_perm (w1[3], w2[0], selector); w4[1] = hc_byte_perm (w1[2], w1[3], selector); w4[0] = hc_byte_perm (w1[1], w1[2], selector); w3[3] = hc_byte_perm (w1[0], w1[1], selector); w3[2] = hc_byte_perm (w0[3], w1[0], selector); w3[1] = hc_byte_perm (w0[2], w0[3], selector); w3[0] = hc_byte_perm (w0[1], w0[2], selector); w2[3] = hc_byte_perm (w0[0], w0[1], selector); w2[2] = hc_byte_perm ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_byte_perm (w4[3], w5[0], selector); w7[2] = hc_byte_perm (w4[2], w4[3], selector); w7[1] = hc_byte_perm (w4[1], w4[2], selector); w7[0] = hc_byte_perm (w4[0], w4[1], selector); w6[3] = hc_byte_perm (w3[3], w4[0], selector); w6[2] = hc_byte_perm (w3[2], w3[3], selector); w6[1] = hc_byte_perm (w3[1], w3[2], selector); w6[0] = hc_byte_perm (w3[0], w3[1], selector); w5[3] = hc_byte_perm (w2[3], w3[0], selector); w5[2] = hc_byte_perm (w2[2], w2[3], selector); w5[1] = hc_byte_perm (w2[1], w2[2], selector); w5[0] = hc_byte_perm (w2[0], w2[1], selector); w4[3] = hc_byte_perm (w1[3], w2[0], selector); w4[2] = hc_byte_perm (w1[2], w1[3], selector); w4[1] = hc_byte_perm (w1[1], w1[2], selector); w4[0] = hc_byte_perm (w1[0], w1[1], selector); w3[3] = hc_byte_perm (w0[3], w1[0], selector); w3[2] = hc_byte_perm (w0[2], w0[3], selector); w3[1] = hc_byte_perm (w0[1], w0[2], selector); w3[0] = hc_byte_perm (w0[0], w0[1], selector); w2[3] = hc_byte_perm ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_byte_perm (w4[2], w4[3], selector); w7[2] = hc_byte_perm (w4[1], w4[2], selector); w7[1] = hc_byte_perm (w4[0], w4[1], selector); w7[0] = hc_byte_perm (w3[3], w4[0], selector); w6[3] = hc_byte_perm (w3[2], w3[3], selector); w6[2] = hc_byte_perm (w3[1], w3[2], selector); w6[1] = hc_byte_perm (w3[0], w3[1], selector); w6[0] = hc_byte_perm (w2[3], w3[0], selector); w5[3] = hc_byte_perm (w2[2], w2[3], selector); w5[2] = hc_byte_perm (w2[1], w2[2], selector); w5[1] = hc_byte_perm (w2[0], w2[1], selector); w5[0] = hc_byte_perm (w1[3], w2[0], selector); w4[3] = hc_byte_perm (w1[2], w1[3], selector); w4[2] = hc_byte_perm (w1[1], w1[2], selector); w4[1] = hc_byte_perm (w1[0], w1[1], selector); w4[0] = hc_byte_perm (w0[3], w1[0], selector); w3[3] = hc_byte_perm (w0[2], w0[3], selector); w3[2] = hc_byte_perm (w0[1], w0[2], selector); w3[1] = hc_byte_perm (w0[0], w0[1], selector); w3[0] = hc_byte_perm ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_byte_perm (w4[1], w4[2], selector); w7[2] = hc_byte_perm (w4[0], w4[1], selector); w7[1] = hc_byte_perm (w3[3], w4[0], selector); w7[0] = hc_byte_perm (w3[2], w3[3], selector); w6[3] = hc_byte_perm (w3[1], w3[2], selector); w6[2] = hc_byte_perm (w3[0], w3[1], selector); w6[1] = hc_byte_perm (w2[3], w3[0], selector); w6[0] = hc_byte_perm (w2[2], w2[3], selector); w5[3] = hc_byte_perm (w2[1], w2[2], selector); w5[2] = hc_byte_perm (w2[0], w2[1], selector); w5[1] = hc_byte_perm (w1[3], w2[0], selector); w5[0] = hc_byte_perm (w1[2], w1[3], selector); w4[3] = hc_byte_perm (w1[1], w1[2], selector); w4[2] = hc_byte_perm (w1[0], w1[1], selector); w4[1] = hc_byte_perm (w0[3], w1[0], selector); w4[0] = hc_byte_perm (w0[2], w0[3], selector); w3[3] = hc_byte_perm (w0[1], w0[2], selector); w3[2] = hc_byte_perm (w0[0], w0[1], selector); w3[1] = hc_byte_perm ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_byte_perm (w4[0], w4[1], selector); w7[2] = hc_byte_perm (w3[3], w4[0], selector); w7[1] = hc_byte_perm (w3[2], w3[3], selector); w7[0] = hc_byte_perm (w3[1], w3[2], selector); w6[3] = hc_byte_perm (w3[0], w3[1], selector); w6[2] = hc_byte_perm (w2[3], w3[0], selector); w6[1] = hc_byte_perm (w2[2], w2[3], selector); w6[0] = hc_byte_perm (w2[1], w2[2], selector); w5[3] = hc_byte_perm (w2[0], w2[1], selector); w5[2] = hc_byte_perm (w1[3], w2[0], selector); w5[1] = hc_byte_perm (w1[2], w1[3], selector); w5[0] = hc_byte_perm (w1[1], w1[2], selector); w4[3] = hc_byte_perm (w1[0], w1[1], selector); w4[2] = hc_byte_perm (w0[3], w1[0], selector); w4[1] = hc_byte_perm (w0[2], w0[3], selector); w4[0] = hc_byte_perm (w0[1], w0[2], selector); w3[3] = hc_byte_perm (w0[0], w0[1], selector); w3[2] = hc_byte_perm ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_byte_perm (w3[3], w4[0], selector); w7[2] = hc_byte_perm (w3[2], w3[3], selector); w7[1] = hc_byte_perm (w3[1], w3[2], selector); w7[0] = hc_byte_perm (w3[0], w3[1], selector); w6[3] = hc_byte_perm (w2[3], w3[0], selector); w6[2] = hc_byte_perm (w2[2], w2[3], selector); w6[1] = hc_byte_perm (w2[1], w2[2], selector); w6[0] = hc_byte_perm (w2[0], w2[1], selector); w5[3] = hc_byte_perm (w1[3], w2[0], selector); w5[2] = hc_byte_perm (w1[2], w1[3], selector); w5[1] = hc_byte_perm (w1[1], w1[2], selector); w5[0] = hc_byte_perm (w1[0], w1[1], selector); w4[3] = hc_byte_perm (w0[3], w1[0], selector); w4[2] = hc_byte_perm (w0[2], w0[3], selector); w4[1] = hc_byte_perm (w0[1], w0[2], selector); w4[0] = hc_byte_perm (w0[0], w0[1], selector); w3[3] = hc_byte_perm ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign (w7[3], 0, offset); w7[3] = hc_bytealign (w7[2], w7[3], offset); w7[2] = hc_bytealign (w7[1], w7[2], offset); w7[1] = hc_bytealign (w7[0], w7[1], offset); w7[0] = hc_bytealign (w6[3], w7[0], offset); w6[3] = hc_bytealign (w6[2], w6[3], offset); w6[2] = hc_bytealign (w6[1], w6[2], offset); w6[1] = hc_bytealign (w6[0], w6[1], offset); w6[0] = hc_bytealign (w5[3], w6[0], offset); w5[3] = hc_bytealign (w5[2], w5[3], offset); w5[2] = hc_bytealign (w5[1], w5[2], offset); w5[1] = hc_bytealign (w5[0], w5[1], offset); w5[0] = hc_bytealign (w4[3], w5[0], offset); w4[3] = hc_bytealign (w4[2], w4[3], offset); w4[2] = hc_bytealign (w4[1], w4[2], offset); w4[1] = hc_bytealign (w4[0], w4[1], offset); w4[0] = hc_bytealign (w3[3], w4[0], offset); w3[3] = hc_bytealign (w3[2], w3[3], offset); w3[2] = hc_bytealign (w3[1], w3[2], offset); w3[1] = hc_bytealign (w3[0], w3[1], offset); w3[0] = hc_bytealign (w2[3], w3[0], offset); w2[3] = hc_bytealign (w2[2], w2[3], offset); w2[2] = hc_bytealign (w2[1], w2[2], offset); w2[1] = hc_bytealign (w2[0], w2[1], offset); w2[0] = hc_bytealign (w1[3], w2[0], offset); w1[3] = hc_bytealign (w1[2], w1[3], offset); w1[2] = hc_bytealign (w1[1], w1[2], offset); w1[1] = hc_bytealign (w1[0], w1[1], offset); w1[0] = hc_bytealign (w0[3], w1[0], offset); w0[3] = hc_bytealign (w0[2], w0[3], offset); w0[2] = hc_bytealign (w0[1], w0[2], offset); w0[1] = hc_bytealign (w0[0], w0[1], offset); w0[0] = hc_bytealign ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign (w7[3], 0, offset); c0[0] = hc_bytealign (w7[2], w7[3], offset); w7[3] = hc_bytealign (w7[1], w7[2], offset); w7[2] = hc_bytealign (w7[0], w7[1], offset); w7[1] = hc_bytealign (w6[3], w7[0], offset); w7[0] = hc_bytealign (w6[2], w6[3], offset); w6[3] = hc_bytealign (w6[1], w6[2], offset); w6[2] = hc_bytealign (w6[0], w6[1], offset); w6[1] = hc_bytealign (w5[3], w6[0], offset); w6[0] = hc_bytealign (w5[2], w5[3], offset); w5[3] = hc_bytealign (w5[1], w5[2], offset); w5[2] = hc_bytealign (w5[0], w5[1], offset); w5[1] = hc_bytealign (w4[3], w5[0], offset); w5[0] = hc_bytealign (w4[2], w4[3], offset); w4[3] = hc_bytealign (w4[1], w4[2], offset); w4[2] = hc_bytealign (w4[0], w4[1], offset); w4[1] = hc_bytealign (w3[3], w4[0], offset); w4[0] = hc_bytealign (w3[2], w3[3], offset); w3[3] = hc_bytealign (w3[1], w3[2], offset); w3[2] = hc_bytealign (w3[0], w3[1], offset); w3[1] = hc_bytealign (w2[3], w3[0], offset); w3[0] = hc_bytealign (w2[2], w2[3], offset); w2[3] = hc_bytealign (w2[1], w2[2], offset); w2[2] = hc_bytealign (w2[0], w2[1], offset); w2[1] = hc_bytealign (w1[3], w2[0], offset); w2[0] = hc_bytealign (w1[2], w1[3], offset); w1[3] = hc_bytealign (w1[1], w1[2], offset); w1[2] = hc_bytealign (w1[0], w1[1], offset); w1[1] = hc_bytealign (w0[3], w1[0], offset); w1[0] = hc_bytealign (w0[2], w0[3], offset); w0[3] = hc_bytealign (w0[1], w0[2], offset); w0[2] = hc_bytealign (w0[0], w0[1], offset); w0[1] = hc_bytealign ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign (w7[3], 0, offset); c0[1] = hc_bytealign (w7[2], w7[3], offset); c0[0] = hc_bytealign (w7[1], w7[2], offset); w7[3] = hc_bytealign (w7[0], w7[1], offset); w7[2] = hc_bytealign (w6[3], w7[0], offset); w7[1] = hc_bytealign (w6[2], w6[3], offset); w7[0] = hc_bytealign (w6[1], w6[2], offset); w6[3] = hc_bytealign (w6[0], w6[1], offset); w6[2] = hc_bytealign (w5[3], w6[0], offset); w6[1] = hc_bytealign (w5[2], w5[3], offset); w6[0] = hc_bytealign (w5[1], w5[2], offset); w5[3] = hc_bytealign (w5[0], w5[1], offset); w5[2] = hc_bytealign (w4[3], w5[0], offset); w5[1] = hc_bytealign (w4[2], w4[3], offset); w5[0] = hc_bytealign (w4[1], w4[2], offset); w4[3] = hc_bytealign (w4[0], w4[1], offset); w4[2] = hc_bytealign (w3[3], w4[0], offset); w4[1] = hc_bytealign (w3[2], w3[3], offset); w4[0] = hc_bytealign (w3[1], w3[2], offset); w3[3] = hc_bytealign (w3[0], w3[1], offset); w3[2] = hc_bytealign (w2[3], w3[0], offset); w3[1] = hc_bytealign (w2[2], w2[3], offset); w3[0] = hc_bytealign (w2[1], w2[2], offset); w2[3] = hc_bytealign (w2[0], w2[1], offset); w2[2] = hc_bytealign (w1[3], w2[0], offset); w2[1] = hc_bytealign (w1[2], w1[3], offset); w2[0] = hc_bytealign (w1[1], w1[2], offset); w1[3] = hc_bytealign (w1[0], w1[1], offset); w1[2] = hc_bytealign (w0[3], w1[0], offset); w1[1] = hc_bytealign (w0[2], w0[3], offset); w1[0] = hc_bytealign (w0[1], w0[2], offset); w0[3] = hc_bytealign (w0[0], w0[1], offset); w0[2] = hc_bytealign ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign (w7[3], 0, offset); c0[2] = hc_bytealign (w7[2], w7[3], offset); c0[1] = hc_bytealign (w7[1], w7[2], offset); c0[0] = hc_bytealign (w7[0], w7[1], offset); w7[3] = hc_bytealign (w6[3], w7[0], offset); w7[2] = hc_bytealign (w6[2], w6[3], offset); w7[1] = hc_bytealign (w6[1], w6[2], offset); w7[0] = hc_bytealign (w6[0], w6[1], offset); w6[3] = hc_bytealign (w5[3], w6[0], offset); w6[2] = hc_bytealign (w5[2], w5[3], offset); w6[1] = hc_bytealign (w5[1], w5[2], offset); w6[0] = hc_bytealign (w5[0], w5[1], offset); w5[3] = hc_bytealign (w4[3], w5[0], offset); w5[2] = hc_bytealign (w4[2], w4[3], offset); w5[1] = hc_bytealign (w4[1], w4[2], offset); w5[0] = hc_bytealign (w4[0], w4[1], offset); w4[3] = hc_bytealign (w3[3], w4[0], offset); w4[2] = hc_bytealign (w3[2], w3[3], offset); w4[1] = hc_bytealign (w3[1], w3[2], offset); w4[0] = hc_bytealign (w3[0], w3[1], offset); w3[3] = hc_bytealign (w2[3], w3[0], offset); w3[2] = hc_bytealign (w2[2], w2[3], offset); w3[1] = hc_bytealign (w2[1], w2[2], offset); w3[0] = hc_bytealign (w2[0], w2[1], offset); w2[3] = hc_bytealign (w1[3], w2[0], offset); w2[2] = hc_bytealign (w1[2], w1[3], offset); w2[1] = hc_bytealign (w1[1], w1[2], offset); w2[0] = hc_bytealign (w1[0], w1[1], offset); w1[3] = hc_bytealign (w0[3], w1[0], offset); w1[2] = hc_bytealign (w0[2], w0[3], offset); w1[1] = hc_bytealign (w0[1], w0[2], offset); w1[0] = hc_bytealign (w0[0], w0[1], offset); w0[3] = hc_bytealign ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign (w7[3], 0, offset); c0[3] = hc_bytealign (w7[2], w7[3], offset); c0[2] = hc_bytealign (w7[1], w7[2], offset); c0[1] = hc_bytealign (w7[0], w7[1], offset); c0[0] = hc_bytealign (w6[3], w7[0], offset); w7[3] = hc_bytealign (w6[2], w6[3], offset); w7[2] = hc_bytealign (w6[1], w6[2], offset); w7[1] = hc_bytealign (w6[0], w6[1], offset); w7[0] = hc_bytealign (w5[3], w6[0], offset); w6[3] = hc_bytealign (w5[2], w5[3], offset); w6[2] = hc_bytealign (w5[1], w5[2], offset); w6[1] = hc_bytealign (w5[0], w5[1], offset); w6[0] = hc_bytealign (w4[3], w5[0], offset); w5[3] = hc_bytealign (w4[2], w4[3], offset); w5[2] = hc_bytealign (w4[1], w4[2], offset); w5[1] = hc_bytealign (w4[0], w4[1], offset); w5[0] = hc_bytealign (w3[3], w4[0], offset); w4[3] = hc_bytealign (w3[2], w3[3], offset); w4[2] = hc_bytealign (w3[1], w3[2], offset); w4[1] = hc_bytealign (w3[0], w3[1], offset); w4[0] = hc_bytealign (w2[3], w3[0], offset); w3[3] = hc_bytealign (w2[2], w2[3], offset); w3[2] = hc_bytealign (w2[1], w2[2], offset); w3[1] = hc_bytealign (w2[0], w2[1], offset); w3[0] = hc_bytealign (w1[3], w2[0], offset); w2[3] = hc_bytealign (w1[2], w1[3], offset); w2[2] = hc_bytealign (w1[1], w1[2], offset); w2[1] = hc_bytealign (w1[0], w1[1], offset); w2[0] = hc_bytealign (w0[3], w1[0], offset); w1[3] = hc_bytealign (w0[2], w0[3], offset); w1[2] = hc_bytealign (w0[1], w0[2], offset); w1[1] = hc_bytealign (w0[0], w0[1], offset); w1[0] = hc_bytealign ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign (w7[3], 0, offset); c1[0] = hc_bytealign (w7[2], w7[3], offset); c0[3] = hc_bytealign (w7[1], w7[2], offset); c0[2] = hc_bytealign (w7[0], w7[1], offset); c0[1] = hc_bytealign (w6[3], w7[0], offset); c0[0] = hc_bytealign (w6[2], w6[3], offset); w7[3] = hc_bytealign (w6[1], w6[2], offset); w7[2] = hc_bytealign (w6[0], w6[1], offset); w7[1] = hc_bytealign (w5[3], w6[0], offset); w7[0] = hc_bytealign (w5[2], w5[3], offset); w6[3] = hc_bytealign (w5[1], w5[2], offset); w6[2] = hc_bytealign (w5[0], w5[1], offset); w6[1] = hc_bytealign (w4[3], w5[0], offset); w6[0] = hc_bytealign (w4[2], w4[3], offset); w5[3] = hc_bytealign (w4[1], w4[2], offset); w5[2] = hc_bytealign (w4[0], w4[1], offset); w5[1] = hc_bytealign (w3[3], w4[0], offset); w5[0] = hc_bytealign (w3[2], w3[3], offset); w4[3] = hc_bytealign (w3[1], w3[2], offset); w4[2] = hc_bytealign (w3[0], w3[1], offset); w4[1] = hc_bytealign (w2[3], w3[0], offset); w4[0] = hc_bytealign (w2[2], w2[3], offset); w3[3] = hc_bytealign (w2[1], w2[2], offset); w3[2] = hc_bytealign (w2[0], w2[1], offset); w3[1] = hc_bytealign (w1[3], w2[0], offset); w3[0] = hc_bytealign (w1[2], w1[3], offset); w2[3] = hc_bytealign (w1[1], w1[2], offset); w2[2] = hc_bytealign (w1[0], w1[1], offset); w2[1] = hc_bytealign (w0[3], w1[0], offset); w2[0] = hc_bytealign (w0[2], w0[3], offset); w1[3] = hc_bytealign (w0[1], w0[2], offset); w1[2] = hc_bytealign (w0[0], w0[1], offset); w1[1] = hc_bytealign ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign (w7[3], 0, offset); c1[1] = hc_bytealign (w7[2], w7[3], offset); c1[0] = hc_bytealign (w7[1], w7[2], offset); c0[3] = hc_bytealign (w7[0], w7[1], offset); c0[2] = hc_bytealign (w6[3], w7[0], offset); c0[1] = hc_bytealign (w6[2], w6[3], offset); c0[0] = hc_bytealign (w6[1], w6[2], offset); w7[3] = hc_bytealign (w6[0], w6[1], offset); w7[2] = hc_bytealign (w5[3], w6[0], offset); w7[1] = hc_bytealign (w5[2], w5[3], offset); w7[0] = hc_bytealign (w5[1], w5[2], offset); w6[3] = hc_bytealign (w5[0], w5[1], offset); w6[2] = hc_bytealign (w4[3], w5[0], offset); w6[1] = hc_bytealign (w4[2], w4[3], offset); w6[0] = hc_bytealign (w4[1], w4[2], offset); w5[3] = hc_bytealign (w4[0], w4[1], offset); w5[2] = hc_bytealign (w3[3], w4[0], offset); w5[1] = hc_bytealign (w3[2], w3[3], offset); w5[0] = hc_bytealign (w3[1], w3[2], offset); w4[3] = hc_bytealign (w3[0], w3[1], offset); w4[2] = hc_bytealign (w2[3], w3[0], offset); w4[1] = hc_bytealign (w2[2], w2[3], offset); w4[0] = hc_bytealign (w2[1], w2[2], offset); w3[3] = hc_bytealign (w2[0], w2[1], offset); w3[2] = hc_bytealign (w1[3], w2[0], offset); w3[1] = hc_bytealign (w1[2], w1[3], offset); w3[0] = hc_bytealign (w1[1], w1[2], offset); w2[3] = hc_bytealign (w1[0], w1[1], offset); w2[2] = hc_bytealign (w0[3], w1[0], offset); w2[1] = hc_bytealign (w0[2], w0[3], offset); w2[0] = hc_bytealign (w0[1], w0[2], offset); w1[3] = hc_bytealign (w0[0], w0[1], offset); w1[2] = hc_bytealign ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign (w7[3], 0, offset); c1[2] = hc_bytealign (w7[2], w7[3], offset); c1[1] = hc_bytealign (w7[1], w7[2], offset); c1[0] = hc_bytealign (w7[0], w7[1], offset); c0[3] = hc_bytealign (w6[3], w7[0], offset); c0[2] = hc_bytealign (w6[2], w6[3], offset); c0[1] = hc_bytealign (w6[1], w6[2], offset); c0[0] = hc_bytealign (w6[0], w6[1], offset); w7[3] = hc_bytealign (w5[3], w6[0], offset); w7[2] = hc_bytealign (w5[2], w5[3], offset); w7[1] = hc_bytealign (w5[1], w5[2], offset); w7[0] = hc_bytealign (w5[0], w5[1], offset); w6[3] = hc_bytealign (w4[3], w5[0], offset); w6[2] = hc_bytealign (w4[2], w4[3], offset); w6[1] = hc_bytealign (w4[1], w4[2], offset); w6[0] = hc_bytealign (w4[0], w4[1], offset); w5[3] = hc_bytealign (w3[3], w4[0], offset); w5[2] = hc_bytealign (w3[2], w3[3], offset); w5[1] = hc_bytealign (w3[1], w3[2], offset); w5[0] = hc_bytealign (w3[0], w3[1], offset); w4[3] = hc_bytealign (w2[3], w3[0], offset); w4[2] = hc_bytealign (w2[2], w2[3], offset); w4[1] = hc_bytealign (w2[1], w2[2], offset); w4[0] = hc_bytealign (w2[0], w2[1], offset); w3[3] = hc_bytealign (w1[3], w2[0], offset); w3[2] = hc_bytealign (w1[2], w1[3], offset); w3[1] = hc_bytealign (w1[1], w1[2], offset); w3[0] = hc_bytealign (w1[0], w1[1], offset); w2[3] = hc_bytealign (w0[3], w1[0], offset); w2[2] = hc_bytealign (w0[2], w0[3], offset); w2[1] = hc_bytealign (w0[1], w0[2], offset); w2[0] = hc_bytealign (w0[0], w0[1], offset); w1[3] = hc_bytealign ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign (w7[3], 0, offset); c1[3] = hc_bytealign (w7[2], w7[3], offset); c1[2] = hc_bytealign (w7[1], w7[2], offset); c1[1] = hc_bytealign (w7[0], w7[1], offset); c1[0] = hc_bytealign (w6[3], w7[0], offset); c0[3] = hc_bytealign (w6[2], w6[3], offset); c0[2] = hc_bytealign (w6[1], w6[2], offset); c0[1] = hc_bytealign (w6[0], w6[1], offset); c0[0] = hc_bytealign (w5[3], w6[0], offset); w7[3] = hc_bytealign (w5[2], w5[3], offset); w7[2] = hc_bytealign (w5[1], w5[2], offset); w7[1] = hc_bytealign (w5[0], w5[1], offset); w7[0] = hc_bytealign (w4[3], w5[0], offset); w6[3] = hc_bytealign (w4[2], w4[3], offset); w6[2] = hc_bytealign (w4[1], w4[2], offset); w6[1] = hc_bytealign (w4[0], w4[1], offset); w6[0] = hc_bytealign (w3[3], w4[0], offset); w5[3] = hc_bytealign (w3[2], w3[3], offset); w5[2] = hc_bytealign (w3[1], w3[2], offset); w5[1] = hc_bytealign (w3[0], w3[1], offset); w5[0] = hc_bytealign (w2[3], w3[0], offset); w4[3] = hc_bytealign (w2[2], w2[3], offset); w4[2] = hc_bytealign (w2[1], w2[2], offset); w4[1] = hc_bytealign (w2[0], w2[1], offset); w4[0] = hc_bytealign (w1[3], w2[0], offset); w3[3] = hc_bytealign (w1[2], w1[3], offset); w3[2] = hc_bytealign (w1[1], w1[2], offset); w3[1] = hc_bytealign (w1[0], w1[1], offset); w3[0] = hc_bytealign (w0[3], w1[0], offset); w2[3] = hc_bytealign (w0[2], w0[3], offset); w2[2] = hc_bytealign (w0[1], w0[2], offset); w2[1] = hc_bytealign (w0[0], w0[1], offset); w2[0] = hc_bytealign ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign (w7[3], 0, offset); c2[0] = hc_bytealign (w7[2], w7[3], offset); c1[3] = hc_bytealign (w7[1], w7[2], offset); c1[2] = hc_bytealign (w7[0], w7[1], offset); c1[1] = hc_bytealign (w6[3], w7[0], offset); c1[0] = hc_bytealign (w6[2], w6[3], offset); c0[3] = hc_bytealign (w6[1], w6[2], offset); c0[2] = hc_bytealign (w6[0], w6[1], offset); c0[1] = hc_bytealign (w5[3], w6[0], offset); c0[0] = hc_bytealign (w5[2], w5[3], offset); w7[3] = hc_bytealign (w5[1], w5[2], offset); w7[2] = hc_bytealign (w5[0], w5[1], offset); w7[1] = hc_bytealign (w4[3], w5[0], offset); w7[0] = hc_bytealign (w4[2], w4[3], offset); w6[3] = hc_bytealign (w4[1], w4[2], offset); w6[2] = hc_bytealign (w4[0], w4[1], offset); w6[1] = hc_bytealign (w3[3], w4[0], offset); w6[0] = hc_bytealign (w3[2], w3[3], offset); w5[3] = hc_bytealign (w3[1], w3[2], offset); w5[2] = hc_bytealign (w3[0], w3[1], offset); w5[1] = hc_bytealign (w2[3], w3[0], offset); w5[0] = hc_bytealign (w2[2], w2[3], offset); w4[3] = hc_bytealign (w2[1], w2[2], offset); w4[2] = hc_bytealign (w2[0], w2[1], offset); w4[1] = hc_bytealign (w1[3], w2[0], offset); w4[0] = hc_bytealign (w1[2], w1[3], offset); w3[3] = hc_bytealign (w1[1], w1[2], offset); w3[2] = hc_bytealign (w1[0], w1[1], offset); w3[1] = hc_bytealign (w0[3], w1[0], offset); w3[0] = hc_bytealign (w0[2], w0[3], offset); w2[3] = hc_bytealign (w0[1], w0[2], offset); w2[2] = hc_bytealign (w0[0], w0[1], offset); w2[1] = hc_bytealign ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign (w7[3], 0, offset); c2[1] = hc_bytealign (w7[2], w7[3], offset); c2[0] = hc_bytealign (w7[1], w7[2], offset); c1[3] = hc_bytealign (w7[0], w7[1], offset); c1[2] = hc_bytealign (w6[3], w7[0], offset); c1[1] = hc_bytealign (w6[2], w6[3], offset); c1[0] = hc_bytealign (w6[1], w6[2], offset); c0[3] = hc_bytealign (w6[0], w6[1], offset); c0[2] = hc_bytealign (w5[3], w6[0], offset); c0[1] = hc_bytealign (w5[2], w5[3], offset); c0[0] = hc_bytealign (w5[1], w5[2], offset); w7[3] = hc_bytealign (w5[0], w5[1], offset); w7[2] = hc_bytealign (w4[3], w5[0], offset); w7[1] = hc_bytealign (w4[2], w4[3], offset); w7[0] = hc_bytealign (w4[1], w4[2], offset); w6[3] = hc_bytealign (w4[0], w4[1], offset); w6[2] = hc_bytealign (w3[3], w4[0], offset); w6[1] = hc_bytealign (w3[2], w3[3], offset); w6[0] = hc_bytealign (w3[1], w3[2], offset); w5[3] = hc_bytealign (w3[0], w3[1], offset); w5[2] = hc_bytealign (w2[3], w3[0], offset); w5[1] = hc_bytealign (w2[2], w2[3], offset); w5[0] = hc_bytealign (w2[1], w2[2], offset); w4[3] = hc_bytealign (w2[0], w2[1], offset); w4[2] = hc_bytealign (w1[3], w2[0], offset); w4[1] = hc_bytealign (w1[2], w1[3], offset); w4[0] = hc_bytealign (w1[1], w1[2], offset); w3[3] = hc_bytealign (w1[0], w1[1], offset); w3[2] = hc_bytealign (w0[3], w1[0], offset); w3[1] = hc_bytealign (w0[2], w0[3], offset); w3[0] = hc_bytealign (w0[1], w0[2], offset); w2[3] = hc_bytealign (w0[0], w0[1], offset); w2[2] = hc_bytealign ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign (w7[3], 0, offset); c2[2] = hc_bytealign (w7[2], w7[3], offset); c2[1] = hc_bytealign (w7[1], w7[2], offset); c2[0] = hc_bytealign (w7[0], w7[1], offset); c1[3] = hc_bytealign (w6[3], w7[0], offset); c1[2] = hc_bytealign (w6[2], w6[3], offset); c1[1] = hc_bytealign (w6[1], w6[2], offset); c1[0] = hc_bytealign (w6[0], w6[1], offset); c0[3] = hc_bytealign (w5[3], w6[0], offset); c0[2] = hc_bytealign (w5[2], w5[3], offset); c0[1] = hc_bytealign (w5[1], w5[2], offset); c0[0] = hc_bytealign (w5[0], w5[1], offset); w7[3] = hc_bytealign (w4[3], w5[0], offset); w7[2] = hc_bytealign (w4[2], w4[3], offset); w7[1] = hc_bytealign (w4[1], w4[2], offset); w7[0] = hc_bytealign (w4[0], w4[1], offset); w6[3] = hc_bytealign (w3[3], w4[0], offset); w6[2] = hc_bytealign (w3[2], w3[3], offset); w6[1] = hc_bytealign (w3[1], w3[2], offset); w6[0] = hc_bytealign (w3[0], w3[1], offset); w5[3] = hc_bytealign (w2[3], w3[0], offset); w5[2] = hc_bytealign (w2[2], w2[3], offset); w5[1] = hc_bytealign (w2[1], w2[2], offset); w5[0] = hc_bytealign (w2[0], w2[1], offset); w4[3] = hc_bytealign (w1[3], w2[0], offset); w4[2] = hc_bytealign (w1[2], w1[3], offset); w4[1] = hc_bytealign (w1[1], w1[2], offset); w4[0] = hc_bytealign (w1[0], w1[1], offset); w3[3] = hc_bytealign (w0[3], w1[0], offset); w3[2] = hc_bytealign (w0[2], w0[3], offset); w3[1] = hc_bytealign (w0[1], w0[2], offset); w3[0] = hc_bytealign (w0[0], w0[1], offset); w2[3] = hc_bytealign ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign (w7[3], 0, offset); c2[3] = hc_bytealign (w7[2], w7[3], offset); c2[2] = hc_bytealign (w7[1], w7[2], offset); c2[1] = hc_bytealign (w7[0], w7[1], offset); c2[0] = hc_bytealign (w6[3], w7[0], offset); c1[3] = hc_bytealign (w6[2], w6[3], offset); c1[2] = hc_bytealign (w6[1], w6[2], offset); c1[1] = hc_bytealign (w6[0], w6[1], offset); c1[0] = hc_bytealign (w5[3], w6[0], offset); c0[3] = hc_bytealign (w5[2], w5[3], offset); c0[2] = hc_bytealign (w5[1], w5[2], offset); c0[1] = hc_bytealign (w5[0], w5[1], offset); c0[0] = hc_bytealign (w4[3], w5[0], offset); w7[3] = hc_bytealign (w4[2], w4[3], offset); w7[2] = hc_bytealign (w4[1], w4[2], offset); w7[1] = hc_bytealign (w4[0], w4[1], offset); w7[0] = hc_bytealign (w3[3], w4[0], offset); w6[3] = hc_bytealign (w3[2], w3[3], offset); w6[2] = hc_bytealign (w3[1], w3[2], offset); w6[1] = hc_bytealign (w3[0], w3[1], offset); w6[0] = hc_bytealign (w2[3], w3[0], offset); w5[3] = hc_bytealign (w2[2], w2[3], offset); w5[2] = hc_bytealign (w2[1], w2[2], offset); w5[1] = hc_bytealign (w2[0], w2[1], offset); w5[0] = hc_bytealign (w1[3], w2[0], offset); w4[3] = hc_bytealign (w1[2], w1[3], offset); w4[2] = hc_bytealign (w1[1], w1[2], offset); w4[1] = hc_bytealign (w1[0], w1[1], offset); w4[0] = hc_bytealign (w0[3], w1[0], offset); w3[3] = hc_bytealign (w0[2], w0[3], offset); w3[2] = hc_bytealign (w0[1], w0[2], offset); w3[1] = hc_bytealign (w0[0], w0[1], offset); w3[0] = hc_bytealign ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign (w7[3], 0, offset); c3[0] = hc_bytealign (w7[2], w7[3], offset); c2[3] = hc_bytealign (w7[1], w7[2], offset); c2[2] = hc_bytealign (w7[0], w7[1], offset); c2[1] = hc_bytealign (w6[3], w7[0], offset); c2[0] = hc_bytealign (w6[2], w6[3], offset); c1[3] = hc_bytealign (w6[1], w6[2], offset); c1[2] = hc_bytealign (w6[0], w6[1], offset); c1[1] = hc_bytealign (w5[3], w6[0], offset); c1[0] = hc_bytealign (w5[2], w5[3], offset); c0[3] = hc_bytealign (w5[1], w5[2], offset); c0[2] = hc_bytealign (w5[0], w5[1], offset); c0[1] = hc_bytealign (w4[3], w5[0], offset); c0[0] = hc_bytealign (w4[2], w4[3], offset); w7[3] = hc_bytealign (w4[1], w4[2], offset); w7[2] = hc_bytealign (w4[0], w4[1], offset); w7[1] = hc_bytealign (w3[3], w4[0], offset); w7[0] = hc_bytealign (w3[2], w3[3], offset); w6[3] = hc_bytealign (w3[1], w3[2], offset); w6[2] = hc_bytealign (w3[0], w3[1], offset); w6[1] = hc_bytealign (w2[3], w3[0], offset); w6[0] = hc_bytealign (w2[2], w2[3], offset); w5[3] = hc_bytealign (w2[1], w2[2], offset); w5[2] = hc_bytealign (w2[0], w2[1], offset); w5[1] = hc_bytealign (w1[3], w2[0], offset); w5[0] = hc_bytealign (w1[2], w1[3], offset); w4[3] = hc_bytealign (w1[1], w1[2], offset); w4[2] = hc_bytealign (w1[0], w1[1], offset); w4[1] = hc_bytealign (w0[3], w1[0], offset); w4[0] = hc_bytealign (w0[2], w0[3], offset); w3[3] = hc_bytealign (w0[1], w0[2], offset); w3[2] = hc_bytealign (w0[0], w0[1], offset); w3[1] = hc_bytealign ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign (w7[3], 0, offset); c3[1] = hc_bytealign (w7[2], w7[3], offset); c3[0] = hc_bytealign (w7[1], w7[2], offset); c2[3] = hc_bytealign (w7[0], w7[1], offset); c2[2] = hc_bytealign (w6[3], w7[0], offset); c2[1] = hc_bytealign (w6[2], w6[3], offset); c2[0] = hc_bytealign (w6[1], w6[2], offset); c1[3] = hc_bytealign (w6[0], w6[1], offset); c1[2] = hc_bytealign (w5[3], w6[0], offset); c1[1] = hc_bytealign (w5[2], w5[3], offset); c1[0] = hc_bytealign (w5[1], w5[2], offset); c0[3] = hc_bytealign (w5[0], w5[1], offset); c0[2] = hc_bytealign (w4[3], w5[0], offset); c0[1] = hc_bytealign (w4[2], w4[3], offset); c0[0] = hc_bytealign (w4[1], w4[2], offset); w7[3] = hc_bytealign (w4[0], w4[1], offset); w7[2] = hc_bytealign (w3[3], w4[0], offset); w7[1] = hc_bytealign (w3[2], w3[3], offset); w7[0] = hc_bytealign (w3[1], w3[2], offset); w6[3] = hc_bytealign (w3[0], w3[1], offset); w6[2] = hc_bytealign (w2[3], w3[0], offset); w6[1] = hc_bytealign (w2[2], w2[3], offset); w6[0] = hc_bytealign (w2[1], w2[2], offset); w5[3] = hc_bytealign (w2[0], w2[1], offset); w5[2] = hc_bytealign (w1[3], w2[0], offset); w5[1] = hc_bytealign (w1[2], w1[3], offset); w5[0] = hc_bytealign (w1[1], w1[2], offset); w4[3] = hc_bytealign (w1[0], w1[1], offset); w4[2] = hc_bytealign (w0[3], w1[0], offset); w4[1] = hc_bytealign (w0[2], w0[3], offset); w4[0] = hc_bytealign (w0[1], w0[2], offset); w3[3] = hc_bytealign (w0[0], w0[1], offset); w3[2] = hc_bytealign ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign (w7[3], 0, offset); c3[2] = hc_bytealign (w7[2], w7[3], offset); c3[1] = hc_bytealign (w7[1], w7[2], offset); c3[0] = hc_bytealign (w7[0], w7[1], offset); c2[3] = hc_bytealign (w6[3], w7[0], offset); c2[2] = hc_bytealign (w6[2], w6[3], offset); c2[1] = hc_bytealign (w6[1], w6[2], offset); c2[0] = hc_bytealign (w6[0], w6[1], offset); c1[3] = hc_bytealign (w5[3], w6[0], offset); c1[2] = hc_bytealign (w5[2], w5[3], offset); c1[1] = hc_bytealign (w5[1], w5[2], offset); c1[0] = hc_bytealign (w5[0], w5[1], offset); c0[3] = hc_bytealign (w4[3], w5[0], offset); c0[2] = hc_bytealign (w4[2], w4[3], offset); c0[1] = hc_bytealign (w4[1], w4[2], offset); c0[0] = hc_bytealign (w4[0], w4[1], offset); w7[3] = hc_bytealign (w3[3], w4[0], offset); w7[2] = hc_bytealign (w3[2], w3[3], offset); w7[1] = hc_bytealign (w3[1], w3[2], offset); w7[0] = hc_bytealign (w3[0], w3[1], offset); w6[3] = hc_bytealign (w2[3], w3[0], offset); w6[2] = hc_bytealign (w2[2], w2[3], offset); w6[1] = hc_bytealign (w2[1], w2[2], offset); w6[0] = hc_bytealign (w2[0], w2[1], offset); w5[3] = hc_bytealign (w1[3], w2[0], offset); w5[2] = hc_bytealign (w1[2], w1[3], offset); w5[1] = hc_bytealign (w1[1], w1[2], offset); w5[0] = hc_bytealign (w1[0], w1[1], offset); w4[3] = hc_bytealign (w0[3], w1[0], offset); w4[2] = hc_bytealign (w0[2], w0[3], offset); w4[1] = hc_bytealign (w0[1], w0[2], offset); w4[0] = hc_bytealign (w0[0], w0[1], offset); w3[3] = hc_bytealign ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_bytealign (w7[3], 0, offset); c3[3] = hc_bytealign (w7[2], w7[3], offset); c3[2] = hc_bytealign (w7[1], w7[2], offset); c3[1] = hc_bytealign (w7[0], w7[1], offset); c3[0] = hc_bytealign (w6[3], w7[0], offset); c2[3] = hc_bytealign (w6[2], w6[3], offset); c2[2] = hc_bytealign (w6[1], w6[2], offset); c2[1] = hc_bytealign (w6[0], w6[1], offset); c2[0] = hc_bytealign (w5[3], w6[0], offset); c1[3] = hc_bytealign (w5[2], w5[3], offset); c1[2] = hc_bytealign (w5[1], w5[2], offset); c1[1] = hc_bytealign (w5[0], w5[1], offset); c1[0] = hc_bytealign (w4[3], w5[0], offset); c0[3] = hc_bytealign (w4[2], w4[3], offset); c0[2] = hc_bytealign (w4[1], w4[2], offset); c0[1] = hc_bytealign (w4[0], w4[1], offset); c0[0] = hc_bytealign (w3[3], w4[0], offset); w7[3] = hc_bytealign (w3[2], w3[3], offset); w7[2] = hc_bytealign (w3[1], w3[2], offset); w7[1] = hc_bytealign (w3[0], w3[1], offset); w7[0] = hc_bytealign (w2[3], w3[0], offset); w6[3] = hc_bytealign (w2[2], w2[3], offset); w6[2] = hc_bytealign (w2[1], w2[2], offset); w6[1] = hc_bytealign (w2[0], w2[1], offset); w6[0] = hc_bytealign (w1[3], w2[0], offset); w5[3] = hc_bytealign (w1[2], w1[3], offset); w5[2] = hc_bytealign (w1[1], w1[2], offset); w5[1] = hc_bytealign (w1[0], w1[1], offset); w5[0] = hc_bytealign (w0[3], w1[0], offset); w4[3] = hc_bytealign (w0[2], w0[3], offset); w4[2] = hc_bytealign (w0[1], w0[2], offset); w4[1] = hc_bytealign (w0[0], w0[1], offset); w4[0] = hc_bytealign ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_bytealign (w7[3], 0, offset); c4[0] = hc_bytealign (w7[2], w7[3], offset); c3[3] = hc_bytealign (w7[1], w7[2], offset); c3[2] = hc_bytealign (w7[0], w7[1], offset); c3[1] = hc_bytealign (w6[3], w7[0], offset); c3[0] = hc_bytealign (w6[2], w6[3], offset); c2[3] = hc_bytealign (w6[1], w6[2], offset); c2[2] = hc_bytealign (w6[0], w6[1], offset); c2[1] = hc_bytealign (w5[3], w6[0], offset); c2[0] = hc_bytealign (w5[2], w5[3], offset); c1[3] = hc_bytealign (w5[1], w5[2], offset); c1[2] = hc_bytealign (w5[0], w5[1], offset); c1[1] = hc_bytealign (w4[3], w5[0], offset); c1[0] = hc_bytealign (w4[2], w4[3], offset); c0[3] = hc_bytealign (w4[1], w4[2], offset); c0[2] = hc_bytealign (w4[0], w4[1], offset); c0[1] = hc_bytealign (w3[3], w4[0], offset); c0[0] = hc_bytealign (w3[2], w3[3], offset); w7[3] = hc_bytealign (w3[1], w3[2], offset); w7[2] = hc_bytealign (w3[0], w3[1], offset); w7[1] = hc_bytealign (w2[3], w3[0], offset); w7[0] = hc_bytealign (w2[2], w2[3], offset); w6[3] = hc_bytealign (w2[1], w2[2], offset); w6[2] = hc_bytealign (w2[0], w2[1], offset); w6[1] = hc_bytealign (w1[3], w2[0], offset); w6[0] = hc_bytealign (w1[2], w1[3], offset); w5[3] = hc_bytealign (w1[1], w1[2], offset); w5[2] = hc_bytealign (w1[0], w1[1], offset); w5[1] = hc_bytealign (w0[3], w1[0], offset); w5[0] = hc_bytealign (w0[2], w0[3], offset); w4[3] = hc_bytealign (w0[1], w0[2], offset); w4[2] = hc_bytealign (w0[0], w0[1], offset); w4[1] = hc_bytealign ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_bytealign (w7[3], 0, offset); c4[1] = hc_bytealign (w7[2], w7[3], offset); c4[0] = hc_bytealign (w7[1], w7[2], offset); c3[3] = hc_bytealign (w7[0], w7[1], offset); c3[2] = hc_bytealign (w6[3], w7[0], offset); c3[1] = hc_bytealign (w6[2], w6[3], offset); c3[0] = hc_bytealign (w6[1], w6[2], offset); c2[3] = hc_bytealign (w6[0], w6[1], offset); c2[2] = hc_bytealign (w5[3], w6[0], offset); c2[1] = hc_bytealign (w5[2], w5[3], offset); c2[0] = hc_bytealign (w5[1], w5[2], offset); c1[3] = hc_bytealign (w5[0], w5[1], offset); c1[2] = hc_bytealign (w4[3], w5[0], offset); c1[1] = hc_bytealign (w4[2], w4[3], offset); c1[0] = hc_bytealign (w4[1], w4[2], offset); c0[3] = hc_bytealign (w4[0], w4[1], offset); c0[2] = hc_bytealign (w3[3], w4[0], offset); c0[1] = hc_bytealign (w3[2], w3[3], offset); c0[0] = hc_bytealign (w3[1], w3[2], offset); w7[3] = hc_bytealign (w3[0], w3[1], offset); w7[2] = hc_bytealign (w2[3], w3[0], offset); w7[1] = hc_bytealign (w2[2], w2[3], offset); w7[0] = hc_bytealign (w2[1], w2[2], offset); w6[3] = hc_bytealign (w2[0], w2[1], offset); w6[2] = hc_bytealign (w1[3], w2[0], offset); w6[1] = hc_bytealign (w1[2], w1[3], offset); w6[0] = hc_bytealign (w1[1], w1[2], offset); w5[3] = hc_bytealign (w1[0], w1[1], offset); w5[2] = hc_bytealign (w0[3], w1[0], offset); w5[1] = hc_bytealign (w0[2], w0[3], offset); w5[0] = hc_bytealign (w0[1], w0[2], offset); w4[3] = hc_bytealign (w0[0], w0[1], offset); w4[2] = hc_bytealign ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_bytealign (w7[3], 0, offset); c4[2] = hc_bytealign (w7[2], w7[3], offset); c4[1] = hc_bytealign (w7[1], w7[2], offset); c4[0] = hc_bytealign (w7[0], w7[1], offset); c3[3] = hc_bytealign (w6[3], w7[0], offset); c3[2] = hc_bytealign (w6[2], w6[3], offset); c3[1] = hc_bytealign (w6[1], w6[2], offset); c3[0] = hc_bytealign (w6[0], w6[1], offset); c2[3] = hc_bytealign (w5[3], w6[0], offset); c2[2] = hc_bytealign (w5[2], w5[3], offset); c2[1] = hc_bytealign (w5[1], w5[2], offset); c2[0] = hc_bytealign (w5[0], w5[1], offset); c1[3] = hc_bytealign (w4[3], w5[0], offset); c1[2] = hc_bytealign (w4[2], w4[3], offset); c1[1] = hc_bytealign (w4[1], w4[2], offset); c1[0] = hc_bytealign (w4[0], w4[1], offset); c0[3] = hc_bytealign (w3[3], w4[0], offset); c0[2] = hc_bytealign (w3[2], w3[3], offset); c0[1] = hc_bytealign (w3[1], w3[2], offset); c0[0] = hc_bytealign (w3[0], w3[1], offset); w7[3] = hc_bytealign (w2[3], w3[0], offset); w7[2] = hc_bytealign (w2[2], w2[3], offset); w7[1] = hc_bytealign (w2[1], w2[2], offset); w7[0] = hc_bytealign (w2[0], w2[1], offset); w6[3] = hc_bytealign (w1[3], w2[0], offset); w6[2] = hc_bytealign (w1[2], w1[3], offset); w6[1] = hc_bytealign (w1[1], w1[2], offset); w6[0] = hc_bytealign (w1[0], w1[1], offset); w5[3] = hc_bytealign (w0[3], w1[0], offset); w5[2] = hc_bytealign (w0[2], w0[3], offset); w5[1] = hc_bytealign (w0[1], w0[2], offset); w5[0] = hc_bytealign (w0[0], w0[1], offset); w4[3] = hc_bytealign ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_bytealign (w7[3], 0, offset); c4[3] = hc_bytealign (w7[2], w7[3], offset); c4[2] = hc_bytealign (w7[1], w7[2], offset); c4[1] = hc_bytealign (w7[0], w7[1], offset); c4[0] = hc_bytealign (w6[3], w7[0], offset); c3[3] = hc_bytealign (w6[2], w6[3], offset); c3[2] = hc_bytealign (w6[1], w6[2], offset); c3[1] = hc_bytealign (w6[0], w6[1], offset); c3[0] = hc_bytealign (w5[3], w6[0], offset); c2[3] = hc_bytealign (w5[2], w5[3], offset); c2[2] = hc_bytealign (w5[1], w5[2], offset); c2[1] = hc_bytealign (w5[0], w5[1], offset); c2[0] = hc_bytealign (w4[3], w5[0], offset); c1[3] = hc_bytealign (w4[2], w4[3], offset); c1[2] = hc_bytealign (w4[1], w4[2], offset); c1[1] = hc_bytealign (w4[0], w4[1], offset); c1[0] = hc_bytealign (w3[3], w4[0], offset); c0[3] = hc_bytealign (w3[2], w3[3], offset); c0[2] = hc_bytealign (w3[1], w3[2], offset); c0[1] = hc_bytealign (w3[0], w3[1], offset); c0[0] = hc_bytealign (w2[3], w3[0], offset); w7[3] = hc_bytealign (w2[2], w2[3], offset); w7[2] = hc_bytealign (w2[1], w2[2], offset); w7[1] = hc_bytealign (w2[0], w2[1], offset); w7[0] = hc_bytealign (w1[3], w2[0], offset); w6[3] = hc_bytealign (w1[2], w1[3], offset); w6[2] = hc_bytealign (w1[1], w1[2], offset); w6[1] = hc_bytealign (w1[0], w1[1], offset); w6[0] = hc_bytealign (w0[3], w1[0], offset); w5[3] = hc_bytealign (w0[2], w0[3], offset); w5[2] = hc_bytealign (w0[1], w0[2], offset); w5[1] = hc_bytealign (w0[0], w0[1], offset); w5[0] = hc_bytealign ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_bytealign (w7[3], 0, offset); c5[0] = hc_bytealign (w7[2], w7[3], offset); c4[3] = hc_bytealign (w7[1], w7[2], offset); c4[2] = hc_bytealign (w7[0], w7[1], offset); c4[1] = hc_bytealign (w6[3], w7[0], offset); c4[0] = hc_bytealign (w6[2], w6[3], offset); c3[3] = hc_bytealign (w6[1], w6[2], offset); c3[2] = hc_bytealign (w6[0], w6[1], offset); c3[1] = hc_bytealign (w5[3], w6[0], offset); c3[0] = hc_bytealign (w5[2], w5[3], offset); c2[3] = hc_bytealign (w5[1], w5[2], offset); c2[2] = hc_bytealign (w5[0], w5[1], offset); c2[1] = hc_bytealign (w4[3], w5[0], offset); c2[0] = hc_bytealign (w4[2], w4[3], offset); c1[3] = hc_bytealign (w4[1], w4[2], offset); c1[2] = hc_bytealign (w4[0], w4[1], offset); c1[1] = hc_bytealign (w3[3], w4[0], offset); c1[0] = hc_bytealign (w3[2], w3[3], offset); c0[3] = hc_bytealign (w3[1], w3[2], offset); c0[2] = hc_bytealign (w3[0], w3[1], offset); c0[1] = hc_bytealign (w2[3], w3[0], offset); c0[0] = hc_bytealign (w2[2], w2[3], offset); w7[3] = hc_bytealign (w2[1], w2[2], offset); w7[2] = hc_bytealign (w2[0], w2[1], offset); w7[1] = hc_bytealign (w1[3], w2[0], offset); w7[0] = hc_bytealign (w1[2], w1[3], offset); w6[3] = hc_bytealign (w1[1], w1[2], offset); w6[2] = hc_bytealign (w1[0], w1[1], offset); w6[1] = hc_bytealign (w0[3], w1[0], offset); w6[0] = hc_bytealign (w0[2], w0[3], offset); w5[3] = hc_bytealign (w0[1], w0[2], offset); w5[2] = hc_bytealign (w0[0], w0[1], offset); w5[1] = hc_bytealign ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_bytealign (w7[3], 0, offset); c5[1] = hc_bytealign (w7[2], w7[3], offset); c5[0] = hc_bytealign (w7[1], w7[2], offset); c4[3] = hc_bytealign (w7[0], w7[1], offset); c4[2] = hc_bytealign (w6[3], w7[0], offset); c4[1] = hc_bytealign (w6[2], w6[3], offset); c4[0] = hc_bytealign (w6[1], w6[2], offset); c3[3] = hc_bytealign (w6[0], w6[1], offset); c3[2] = hc_bytealign (w5[3], w6[0], offset); c3[1] = hc_bytealign (w5[2], w5[3], offset); c3[0] = hc_bytealign (w5[1], w5[2], offset); c2[3] = hc_bytealign (w5[0], w5[1], offset); c2[2] = hc_bytealign (w4[3], w5[0], offset); c2[1] = hc_bytealign (w4[2], w4[3], offset); c2[0] = hc_bytealign (w4[1], w4[2], offset); c1[3] = hc_bytealign (w4[0], w4[1], offset); c1[2] = hc_bytealign (w3[3], w4[0], offset); c1[1] = hc_bytealign (w3[2], w3[3], offset); c1[0] = hc_bytealign (w3[1], w3[2], offset); c0[3] = hc_bytealign (w3[0], w3[1], offset); c0[2] = hc_bytealign (w2[3], w3[0], offset); c0[1] = hc_bytealign (w2[2], w2[3], offset); c0[0] = hc_bytealign (w2[1], w2[2], offset); w7[3] = hc_bytealign (w2[0], w2[1], offset); w7[2] = hc_bytealign (w1[3], w2[0], offset); w7[1] = hc_bytealign (w1[2], w1[3], offset); w7[0] = hc_bytealign (w1[1], w1[2], offset); w6[3] = hc_bytealign (w1[0], w1[1], offset); w6[2] = hc_bytealign (w0[3], w1[0], offset); w6[1] = hc_bytealign (w0[2], w0[3], offset); w6[0] = hc_bytealign (w0[1], w0[2], offset); w5[3] = hc_bytealign (w0[0], w0[1], offset); w5[2] = hc_bytealign ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_bytealign (w7[3], 0, offset); c5[2] = hc_bytealign (w7[2], w7[3], offset); c5[1] = hc_bytealign (w7[1], w7[2], offset); c5[0] = hc_bytealign (w7[0], w7[1], offset); c4[3] = hc_bytealign (w6[3], w7[0], offset); c4[2] = hc_bytealign (w6[2], w6[3], offset); c4[1] = hc_bytealign (w6[1], w6[2], offset); c4[0] = hc_bytealign (w6[0], w6[1], offset); c3[3] = hc_bytealign (w5[3], w6[0], offset); c3[2] = hc_bytealign (w5[2], w5[3], offset); c3[1] = hc_bytealign (w5[1], w5[2], offset); c3[0] = hc_bytealign (w5[0], w5[1], offset); c2[3] = hc_bytealign (w4[3], w5[0], offset); c2[2] = hc_bytealign (w4[2], w4[3], offset); c2[1] = hc_bytealign (w4[1], w4[2], offset); c2[0] = hc_bytealign (w4[0], w4[1], offset); c1[3] = hc_bytealign (w3[3], w4[0], offset); c1[2] = hc_bytealign (w3[2], w3[3], offset); c1[1] = hc_bytealign (w3[1], w3[2], offset); c1[0] = hc_bytealign (w3[0], w3[1], offset); c0[3] = hc_bytealign (w2[3], w3[0], offset); c0[2] = hc_bytealign (w2[2], w2[3], offset); c0[1] = hc_bytealign (w2[1], w2[2], offset); c0[0] = hc_bytealign (w2[0], w2[1], offset); w7[3] = hc_bytealign (w1[3], w2[0], offset); w7[2] = hc_bytealign (w1[2], w1[3], offset); w7[1] = hc_bytealign (w1[1], w1[2], offset); w7[0] = hc_bytealign (w1[0], w1[1], offset); w6[3] = hc_bytealign (w0[3], w1[0], offset); w6[2] = hc_bytealign (w0[2], w0[3], offset); w6[1] = hc_bytealign (w0[1], w0[2], offset); w6[0] = hc_bytealign (w0[0], w0[1], offset); w5[3] = hc_bytealign ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_bytealign (w7[3], 0, offset); c5[3] = hc_bytealign (w7[2], w7[3], offset); c5[2] = hc_bytealign (w7[1], w7[2], offset); c5[1] = hc_bytealign (w7[0], w7[1], offset); c5[0] = hc_bytealign (w6[3], w7[0], offset); c4[3] = hc_bytealign (w6[2], w6[3], offset); c4[2] = hc_bytealign (w6[1], w6[2], offset); c4[1] = hc_bytealign (w6[0], w6[1], offset); c4[0] = hc_bytealign (w5[3], w6[0], offset); c3[3] = hc_bytealign (w5[2], w5[3], offset); c3[2] = hc_bytealign (w5[1], w5[2], offset); c3[1] = hc_bytealign (w5[0], w5[1], offset); c3[0] = hc_bytealign (w4[3], w5[0], offset); c2[3] = hc_bytealign (w4[2], w4[3], offset); c2[2] = hc_bytealign (w4[1], w4[2], offset); c2[1] = hc_bytealign (w4[0], w4[1], offset); c2[0] = hc_bytealign (w3[3], w4[0], offset); c1[3] = hc_bytealign (w3[2], w3[3], offset); c1[2] = hc_bytealign (w3[1], w3[2], offset); c1[1] = hc_bytealign (w3[0], w3[1], offset); c1[0] = hc_bytealign (w2[3], w3[0], offset); c0[3] = hc_bytealign (w2[2], w2[3], offset); c0[2] = hc_bytealign (w2[1], w2[2], offset); c0[1] = hc_bytealign (w2[0], w2[1], offset); c0[0] = hc_bytealign (w1[3], w2[0], offset); w7[3] = hc_bytealign (w1[2], w1[3], offset); w7[2] = hc_bytealign (w1[1], w1[2], offset); w7[1] = hc_bytealign (w1[0], w1[1], offset); w7[0] = hc_bytealign (w0[3], w1[0], offset); w6[3] = hc_bytealign (w0[2], w0[3], offset); w6[2] = hc_bytealign (w0[1], w0[2], offset); w6[1] = hc_bytealign (w0[0], w0[1], offset); w6[0] = hc_bytealign ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_bytealign (w7[3], 0, offset); c6[0] = hc_bytealign (w7[2], w7[3], offset); c5[3] = hc_bytealign (w7[1], w7[2], offset); c5[2] = hc_bytealign (w7[0], w7[1], offset); c5[1] = hc_bytealign (w6[3], w7[0], offset); c5[0] = hc_bytealign (w6[2], w6[3], offset); c4[3] = hc_bytealign (w6[1], w6[2], offset); c4[2] = hc_bytealign (w6[0], w6[1], offset); c4[1] = hc_bytealign (w5[3], w6[0], offset); c4[0] = hc_bytealign (w5[2], w5[3], offset); c3[3] = hc_bytealign (w5[1], w5[2], offset); c3[2] = hc_bytealign (w5[0], w5[1], offset); c3[1] = hc_bytealign (w4[3], w5[0], offset); c3[0] = hc_bytealign (w4[2], w4[3], offset); c2[3] = hc_bytealign (w4[1], w4[2], offset); c2[2] = hc_bytealign (w4[0], w4[1], offset); c2[1] = hc_bytealign (w3[3], w4[0], offset); c2[0] = hc_bytealign (w3[2], w3[3], offset); c1[3] = hc_bytealign (w3[1], w3[2], offset); c1[2] = hc_bytealign (w3[0], w3[1], offset); c1[1] = hc_bytealign (w2[3], w3[0], offset); c1[0] = hc_bytealign (w2[2], w2[3], offset); c0[3] = hc_bytealign (w2[1], w2[2], offset); c0[2] = hc_bytealign (w2[0], w2[1], offset); c0[1] = hc_bytealign (w1[3], w2[0], offset); c0[0] = hc_bytealign (w1[2], w1[3], offset); w7[3] = hc_bytealign (w1[1], w1[2], offset); w7[2] = hc_bytealign (w1[0], w1[1], offset); w7[1] = hc_bytealign (w0[3], w1[0], offset); w7[0] = hc_bytealign (w0[2], w0[3], offset); w6[3] = hc_bytealign (w0[1], w0[2], offset); w6[2] = hc_bytealign (w0[0], w0[1], offset); w6[1] = hc_bytealign ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_bytealign (w7[3], 0, offset); c6[1] = hc_bytealign (w7[2], w7[3], offset); c6[0] = hc_bytealign (w7[1], w7[2], offset); c5[3] = hc_bytealign (w7[0], w7[1], offset); c5[2] = hc_bytealign (w6[3], w7[0], offset); c5[1] = hc_bytealign (w6[2], w6[3], offset); c5[0] = hc_bytealign (w6[1], w6[2], offset); c4[3] = hc_bytealign (w6[0], w6[1], offset); c4[2] = hc_bytealign (w5[3], w6[0], offset); c4[1] = hc_bytealign (w5[2], w5[3], offset); c4[0] = hc_bytealign (w5[1], w5[2], offset); c3[3] = hc_bytealign (w5[0], w5[1], offset); c3[2] = hc_bytealign (w4[3], w5[0], offset); c3[1] = hc_bytealign (w4[2], w4[3], offset); c3[0] = hc_bytealign (w4[1], w4[2], offset); c2[3] = hc_bytealign (w4[0], w4[1], offset); c2[2] = hc_bytealign (w3[3], w4[0], offset); c2[1] = hc_bytealign (w3[2], w3[3], offset); c2[0] = hc_bytealign (w3[1], w3[2], offset); c1[3] = hc_bytealign (w3[0], w3[1], offset); c1[2] = hc_bytealign (w2[3], w3[0], offset); c1[1] = hc_bytealign (w2[2], w2[3], offset); c1[0] = hc_bytealign (w2[1], w2[2], offset); c0[3] = hc_bytealign (w2[0], w2[1], offset); c0[2] = hc_bytealign (w1[3], w2[0], offset); c0[1] = hc_bytealign (w1[2], w1[3], offset); c0[0] = hc_bytealign (w1[1], w1[2], offset); w7[3] = hc_bytealign (w1[0], w1[1], offset); w7[2] = hc_bytealign (w0[3], w1[0], offset); w7[1] = hc_bytealign (w0[2], w0[3], offset); w7[0] = hc_bytealign (w0[1], w0[2], offset); w6[3] = hc_bytealign (w0[0], w0[1], offset); w6[2] = hc_bytealign ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_bytealign (w7[3], 0, offset); c6[2] = hc_bytealign (w7[2], w7[3], offset); c6[1] = hc_bytealign (w7[1], w7[2], offset); c6[0] = hc_bytealign (w7[0], w7[1], offset); c5[3] = hc_bytealign (w6[3], w7[0], offset); c5[2] = hc_bytealign (w6[2], w6[3], offset); c5[1] = hc_bytealign (w6[1], w6[2], offset); c5[0] = hc_bytealign (w6[0], w6[1], offset); c4[3] = hc_bytealign (w5[3], w6[0], offset); c4[2] = hc_bytealign (w5[2], w5[3], offset); c4[1] = hc_bytealign (w5[1], w5[2], offset); c4[0] = hc_bytealign (w5[0], w5[1], offset); c3[3] = hc_bytealign (w4[3], w5[0], offset); c3[2] = hc_bytealign (w4[2], w4[3], offset); c3[1] = hc_bytealign (w4[1], w4[2], offset); c3[0] = hc_bytealign (w4[0], w4[1], offset); c2[3] = hc_bytealign (w3[3], w4[0], offset); c2[2] = hc_bytealign (w3[2], w3[3], offset); c2[1] = hc_bytealign (w3[1], w3[2], offset); c2[0] = hc_bytealign (w3[0], w3[1], offset); c1[3] = hc_bytealign (w2[3], w3[0], offset); c1[2] = hc_bytealign (w2[2], w2[3], offset); c1[1] = hc_bytealign (w2[1], w2[2], offset); c1[0] = hc_bytealign (w2[0], w2[1], offset); c0[3] = hc_bytealign (w1[3], w2[0], offset); c0[2] = hc_bytealign (w1[2], w1[3], offset); c0[1] = hc_bytealign (w1[1], w1[2], offset); c0[0] = hc_bytealign (w1[0], w1[1], offset); w7[3] = hc_bytealign (w0[3], w1[0], offset); w7[2] = hc_bytealign (w0[2], w0[3], offset); w7[1] = hc_bytealign (w0[1], w0[2], offset); w7[0] = hc_bytealign (w0[0], w0[1], offset); w6[3] = hc_bytealign ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_bytealign (w7[3], 0, offset); c6[3] = hc_bytealign (w7[2], w7[3], offset); c6[2] = hc_bytealign (w7[1], w7[2], offset); c6[1] = hc_bytealign (w7[0], w7[1], offset); c6[0] = hc_bytealign (w6[3], w7[0], offset); c5[3] = hc_bytealign (w6[2], w6[3], offset); c5[2] = hc_bytealign (w6[1], w6[2], offset); c5[1] = hc_bytealign (w6[0], w6[1], offset); c5[0] = hc_bytealign (w5[3], w6[0], offset); c4[3] = hc_bytealign (w5[2], w5[3], offset); c4[2] = hc_bytealign (w5[1], w5[2], offset); c4[1] = hc_bytealign (w5[0], w5[1], offset); c4[0] = hc_bytealign (w4[3], w5[0], offset); c3[3] = hc_bytealign (w4[2], w4[3], offset); c3[2] = hc_bytealign (w4[1], w4[2], offset); c3[1] = hc_bytealign (w4[0], w4[1], offset); c3[0] = hc_bytealign (w3[3], w4[0], offset); c2[3] = hc_bytealign (w3[2], w3[3], offset); c2[2] = hc_bytealign (w3[1], w3[2], offset); c2[1] = hc_bytealign (w3[0], w3[1], offset); c2[0] = hc_bytealign (w2[3], w3[0], offset); c1[3] = hc_bytealign (w2[2], w2[3], offset); c1[2] = hc_bytealign (w2[1], w2[2], offset); c1[1] = hc_bytealign (w2[0], w2[1], offset); c1[0] = hc_bytealign (w1[3], w2[0], offset); c0[3] = hc_bytealign (w1[2], w1[3], offset); c0[2] = hc_bytealign (w1[1], w1[2], offset); c0[1] = hc_bytealign (w1[0], w1[1], offset); c0[0] = hc_bytealign (w0[3], w1[0], offset); w7[3] = hc_bytealign (w0[2], w0[3], offset); w7[2] = hc_bytealign (w0[1], w0[2], offset); w7[1] = hc_bytealign (w0[0], w0[1], offset); w7[0] = hc_bytealign ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_bytealign (w7[3], 0, offset); c7[0] = hc_bytealign (w7[2], w7[3], offset); c6[3] = hc_bytealign (w7[1], w7[2], offset); c6[2] = hc_bytealign (w7[0], w7[1], offset); c6[1] = hc_bytealign (w6[3], w7[0], offset); c6[0] = hc_bytealign (w6[2], w6[3], offset); c5[3] = hc_bytealign (w6[1], w6[2], offset); c5[2] = hc_bytealign (w6[0], w6[1], offset); c5[1] = hc_bytealign (w5[3], w6[0], offset); c5[0] = hc_bytealign (w5[2], w5[3], offset); c4[3] = hc_bytealign (w5[1], w5[2], offset); c4[2] = hc_bytealign (w5[0], w5[1], offset); c4[1] = hc_bytealign (w4[3], w5[0], offset); c4[0] = hc_bytealign (w4[2], w4[3], offset); c3[3] = hc_bytealign (w4[1], w4[2], offset); c3[2] = hc_bytealign (w4[0], w4[1], offset); c3[1] = hc_bytealign (w3[3], w4[0], offset); c3[0] = hc_bytealign (w3[2], w3[3], offset); c2[3] = hc_bytealign (w3[1], w3[2], offset); c2[2] = hc_bytealign (w3[0], w3[1], offset); c2[1] = hc_bytealign (w2[3], w3[0], offset); c2[0] = hc_bytealign (w2[2], w2[3], offset); c1[3] = hc_bytealign (w2[1], w2[2], offset); c1[2] = hc_bytealign (w2[0], w2[1], offset); c1[1] = hc_bytealign (w1[3], w2[0], offset); c1[0] = hc_bytealign (w1[2], w1[3], offset); c0[3] = hc_bytealign (w1[1], w1[2], offset); c0[2] = hc_bytealign (w1[0], w1[1], offset); c0[1] = hc_bytealign (w0[3], w1[0], offset); c0[0] = hc_bytealign (w0[2], w0[3], offset); w7[3] = hc_bytealign (w0[1], w0[2], offset); w7[2] = hc_bytealign (w0[0], w0[1], offset); w7[1] = hc_bytealign ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_bytealign (w7[3], 0, offset); c7[1] = hc_bytealign (w7[2], w7[3], offset); c7[0] = hc_bytealign (w7[1], w7[2], offset); c6[3] = hc_bytealign (w7[0], w7[1], offset); c6[2] = hc_bytealign (w6[3], w7[0], offset); c6[1] = hc_bytealign (w6[2], w6[3], offset); c6[0] = hc_bytealign (w6[1], w6[2], offset); c5[3] = hc_bytealign (w6[0], w6[1], offset); c5[2] = hc_bytealign (w5[3], w6[0], offset); c5[1] = hc_bytealign (w5[2], w5[3], offset); c5[0] = hc_bytealign (w5[1], w5[2], offset); c4[3] = hc_bytealign (w5[0], w5[1], offset); c4[2] = hc_bytealign (w4[3], w5[0], offset); c4[1] = hc_bytealign (w4[2], w4[3], offset); c4[0] = hc_bytealign (w4[1], w4[2], offset); c3[3] = hc_bytealign (w4[0], w4[1], offset); c3[2] = hc_bytealign (w3[3], w4[0], offset); c3[1] = hc_bytealign (w3[2], w3[3], offset); c3[0] = hc_bytealign (w3[1], w3[2], offset); c2[3] = hc_bytealign (w3[0], w3[1], offset); c2[2] = hc_bytealign (w2[3], w3[0], offset); c2[1] = hc_bytealign (w2[2], w2[3], offset); c2[0] = hc_bytealign (w2[1], w2[2], offset); c1[3] = hc_bytealign (w2[0], w2[1], offset); c1[2] = hc_bytealign (w1[3], w2[0], offset); c1[1] = hc_bytealign (w1[2], w1[3], offset); c1[0] = hc_bytealign (w1[1], w1[2], offset); c0[3] = hc_bytealign (w1[0], w1[1], offset); c0[2] = hc_bytealign (w0[3], w1[0], offset); c0[1] = hc_bytealign (w0[2], w0[3], offset); c0[0] = hc_bytealign (w0[1], w0[2], offset); w7[3] = hc_bytealign (w0[0], w0[1], offset); w7[2] = hc_bytealign ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_bytealign (w7[3], 0, offset); c7[2] = hc_bytealign (w7[2], w7[3], offset); c7[1] = hc_bytealign (w7[1], w7[2], offset); c7[0] = hc_bytealign (w7[0], w7[1], offset); c6[3] = hc_bytealign (w6[3], w7[0], offset); c6[2] = hc_bytealign (w6[2], w6[3], offset); c6[1] = hc_bytealign (w6[1], w6[2], offset); c6[0] = hc_bytealign (w6[0], w6[1], offset); c5[3] = hc_bytealign (w5[3], w6[0], offset); c5[2] = hc_bytealign (w5[2], w5[3], offset); c5[1] = hc_bytealign (w5[1], w5[2], offset); c5[0] = hc_bytealign (w5[0], w5[1], offset); c4[3] = hc_bytealign (w4[3], w5[0], offset); c4[2] = hc_bytealign (w4[2], w4[3], offset); c4[1] = hc_bytealign (w4[1], w4[2], offset); c4[0] = hc_bytealign (w4[0], w4[1], offset); c3[3] = hc_bytealign (w3[3], w4[0], offset); c3[2] = hc_bytealign (w3[2], w3[3], offset); c3[1] = hc_bytealign (w3[1], w3[2], offset); c3[0] = hc_bytealign (w3[0], w3[1], offset); c2[3] = hc_bytealign (w2[3], w3[0], offset); c2[2] = hc_bytealign (w2[2], w2[3], offset); c2[1] = hc_bytealign (w2[1], w2[2], offset); c2[0] = hc_bytealign (w2[0], w2[1], offset); c1[3] = hc_bytealign (w1[3], w2[0], offset); c1[2] = hc_bytealign (w1[2], w1[3], offset); c1[1] = hc_bytealign (w1[1], w1[2], offset); c1[0] = hc_bytealign (w1[0], w1[1], offset); c0[3] = hc_bytealign (w0[3], w1[0], offset); c0[2] = hc_bytealign (w0[2], w0[3], offset); c0[1] = hc_bytealign (w0[1], w0[2], offset); c0[0] = hc_bytealign (w0[0], w0[1], offset); w7[3] = hc_bytealign ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm (w7[3], 0, selector); w7[3] = hc_byte_perm (w7[2], w7[3], selector); w7[2] = hc_byte_perm (w7[1], w7[2], selector); w7[1] = hc_byte_perm (w7[0], w7[1], selector); w7[0] = hc_byte_perm (w6[3], w7[0], selector); w6[3] = hc_byte_perm (w6[2], w6[3], selector); w6[2] = hc_byte_perm (w6[1], w6[2], selector); w6[1] = hc_byte_perm (w6[0], w6[1], selector); w6[0] = hc_byte_perm (w5[3], w6[0], selector); w5[3] = hc_byte_perm (w5[2], w5[3], selector); w5[2] = hc_byte_perm (w5[1], w5[2], selector); w5[1] = hc_byte_perm (w5[0], w5[1], selector); w5[0] = hc_byte_perm (w4[3], w5[0], selector); w4[3] = hc_byte_perm (w4[2], w4[3], selector); w4[2] = hc_byte_perm (w4[1], w4[2], selector); w4[1] = hc_byte_perm (w4[0], w4[1], selector); w4[0] = hc_byte_perm (w3[3], w4[0], selector); w3[3] = hc_byte_perm (w3[2], w3[3], selector); w3[2] = hc_byte_perm (w3[1], w3[2], selector); w3[1] = hc_byte_perm (w3[0], w3[1], selector); w3[0] = hc_byte_perm (w2[3], w3[0], selector); w2[3] = hc_byte_perm (w2[2], w2[3], selector); w2[2] = hc_byte_perm (w2[1], w2[2], selector); w2[1] = hc_byte_perm (w2[0], w2[1], selector); w2[0] = hc_byte_perm (w1[3], w2[0], selector); w1[3] = hc_byte_perm (w1[2], w1[3], selector); w1[2] = hc_byte_perm (w1[1], w1[2], selector); w1[1] = hc_byte_perm (w1[0], w1[1], selector); w1[0] = hc_byte_perm (w0[3], w1[0], selector); w0[3] = hc_byte_perm (w0[2], w0[3], selector); w0[2] = hc_byte_perm (w0[1], w0[2], selector); w0[1] = hc_byte_perm (w0[0], w0[1], selector); w0[0] = hc_byte_perm ( 0, w0[0], selector); break; case 1: c0[1] = hc_byte_perm (w7[3], 0, selector); c0[0] = hc_byte_perm (w7[2], w7[3], selector); w7[3] = hc_byte_perm (w7[1], w7[2], selector); w7[2] = hc_byte_perm (w7[0], w7[1], selector); w7[1] = hc_byte_perm (w6[3], w7[0], selector); w7[0] = hc_byte_perm (w6[2], w6[3], selector); w6[3] = hc_byte_perm (w6[1], w6[2], selector); w6[2] = hc_byte_perm (w6[0], w6[1], selector); w6[1] = hc_byte_perm (w5[3], w6[0], selector); w6[0] = hc_byte_perm (w5[2], w5[3], selector); w5[3] = hc_byte_perm (w5[1], w5[2], selector); w5[2] = hc_byte_perm (w5[0], w5[1], selector); w5[1] = hc_byte_perm (w4[3], w5[0], selector); w5[0] = hc_byte_perm (w4[2], w4[3], selector); w4[3] = hc_byte_perm (w4[1], w4[2], selector); w4[2] = hc_byte_perm (w4[0], w4[1], selector); w4[1] = hc_byte_perm (w3[3], w4[0], selector); w4[0] = hc_byte_perm (w3[2], w3[3], selector); w3[3] = hc_byte_perm (w3[1], w3[2], selector); w3[2] = hc_byte_perm (w3[0], w3[1], selector); w3[1] = hc_byte_perm (w2[3], w3[0], selector); w3[0] = hc_byte_perm (w2[2], w2[3], selector); w2[3] = hc_byte_perm (w2[1], w2[2], selector); w2[2] = hc_byte_perm (w2[0], w2[1], selector); w2[1] = hc_byte_perm (w1[3], w2[0], selector); w2[0] = hc_byte_perm (w1[2], w1[3], selector); w1[3] = hc_byte_perm (w1[1], w1[2], selector); w1[2] = hc_byte_perm (w1[0], w1[1], selector); w1[1] = hc_byte_perm (w0[3], w1[0], selector); w1[0] = hc_byte_perm (w0[2], w0[3], selector); w0[3] = hc_byte_perm (w0[1], w0[2], selector); w0[2] = hc_byte_perm (w0[0], w0[1], selector); w0[1] = hc_byte_perm ( 0, w0[0], selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm (w7[3], 0, selector); c0[1] = hc_byte_perm (w7[2], w7[3], selector); c0[0] = hc_byte_perm (w7[1], w7[2], selector); w7[3] = hc_byte_perm (w7[0], w7[1], selector); w7[2] = hc_byte_perm (w6[3], w7[0], selector); w7[1] = hc_byte_perm (w6[2], w6[3], selector); w7[0] = hc_byte_perm (w6[1], w6[2], selector); w6[3] = hc_byte_perm (w6[0], w6[1], selector); w6[2] = hc_byte_perm (w5[3], w6[0], selector); w6[1] = hc_byte_perm (w5[2], w5[3], selector); w6[0] = hc_byte_perm (w5[1], w5[2], selector); w5[3] = hc_byte_perm (w5[0], w5[1], selector); w5[2] = hc_byte_perm (w4[3], w5[0], selector); w5[1] = hc_byte_perm (w4[2], w4[3], selector); w5[0] = hc_byte_perm (w4[1], w4[2], selector); w4[3] = hc_byte_perm (w4[0], w4[1], selector); w4[2] = hc_byte_perm (w3[3], w4[0], selector); w4[1] = hc_byte_perm (w3[2], w3[3], selector); w4[0] = hc_byte_perm (w3[1], w3[2], selector); w3[3] = hc_byte_perm (w3[0], w3[1], selector); w3[2] = hc_byte_perm (w2[3], w3[0], selector); w3[1] = hc_byte_perm (w2[2], w2[3], selector); w3[0] = hc_byte_perm (w2[1], w2[2], selector); w2[3] = hc_byte_perm (w2[0], w2[1], selector); w2[2] = hc_byte_perm (w1[3], w2[0], selector); w2[1] = hc_byte_perm (w1[2], w1[3], selector); w2[0] = hc_byte_perm (w1[1], w1[2], selector); w1[3] = hc_byte_perm (w1[0], w1[1], selector); w1[2] = hc_byte_perm (w0[3], w1[0], selector); w1[1] = hc_byte_perm (w0[2], w0[3], selector); w1[0] = hc_byte_perm (w0[1], w0[2], selector); w0[3] = hc_byte_perm (w0[0], w0[1], selector); w0[2] = hc_byte_perm ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm (w7[3], 0, selector); c0[2] = hc_byte_perm (w7[2], w7[3], selector); c0[1] = hc_byte_perm (w7[1], w7[2], selector); c0[0] = hc_byte_perm (w7[0], w7[1], selector); w7[3] = hc_byte_perm (w6[3], w7[0], selector); w7[2] = hc_byte_perm (w6[2], w6[3], selector); w7[1] = hc_byte_perm (w6[1], w6[2], selector); w7[0] = hc_byte_perm (w6[0], w6[1], selector); w6[3] = hc_byte_perm (w5[3], w6[0], selector); w6[2] = hc_byte_perm (w5[2], w5[3], selector); w6[1] = hc_byte_perm (w5[1], w5[2], selector); w6[0] = hc_byte_perm (w5[0], w5[1], selector); w5[3] = hc_byte_perm (w4[3], w5[0], selector); w5[2] = hc_byte_perm (w4[2], w4[3], selector); w5[1] = hc_byte_perm (w4[1], w4[2], selector); w5[0] = hc_byte_perm (w4[0], w4[1], selector); w4[3] = hc_byte_perm (w3[3], w4[0], selector); w4[2] = hc_byte_perm (w3[2], w3[3], selector); w4[1] = hc_byte_perm (w3[1], w3[2], selector); w4[0] = hc_byte_perm (w3[0], w3[1], selector); w3[3] = hc_byte_perm (w2[3], w3[0], selector); w3[2] = hc_byte_perm (w2[2], w2[3], selector); w3[1] = hc_byte_perm (w2[1], w2[2], selector); w3[0] = hc_byte_perm (w2[0], w2[1], selector); w2[3] = hc_byte_perm (w1[3], w2[0], selector); w2[2] = hc_byte_perm (w1[2], w1[3], selector); w2[1] = hc_byte_perm (w1[1], w1[2], selector); w2[0] = hc_byte_perm (w1[0], w1[1], selector); w1[3] = hc_byte_perm (w0[3], w1[0], selector); w1[2] = hc_byte_perm (w0[2], w0[3], selector); w1[1] = hc_byte_perm (w0[1], w0[2], selector); w1[0] = hc_byte_perm (w0[0], w0[1], selector); w0[3] = hc_byte_perm ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm (w7[3], 0, selector); c0[3] = hc_byte_perm (w7[2], w7[3], selector); c0[2] = hc_byte_perm (w7[1], w7[2], selector); c0[1] = hc_byte_perm (w7[0], w7[1], selector); c0[0] = hc_byte_perm (w6[3], w7[0], selector); w7[3] = hc_byte_perm (w6[2], w6[3], selector); w7[2] = hc_byte_perm (w6[1], w6[2], selector); w7[1] = hc_byte_perm (w6[0], w6[1], selector); w7[0] = hc_byte_perm (w5[3], w6[0], selector); w6[3] = hc_byte_perm (w5[2], w5[3], selector); w6[2] = hc_byte_perm (w5[1], w5[2], selector); w6[1] = hc_byte_perm (w5[0], w5[1], selector); w6[0] = hc_byte_perm (w4[3], w5[0], selector); w5[3] = hc_byte_perm (w4[2], w4[3], selector); w5[2] = hc_byte_perm (w4[1], w4[2], selector); w5[1] = hc_byte_perm (w4[0], w4[1], selector); w5[0] = hc_byte_perm (w3[3], w4[0], selector); w4[3] = hc_byte_perm (w3[2], w3[3], selector); w4[2] = hc_byte_perm (w3[1], w3[2], selector); w4[1] = hc_byte_perm (w3[0], w3[1], selector); w4[0] = hc_byte_perm (w2[3], w3[0], selector); w3[3] = hc_byte_perm (w2[2], w2[3], selector); w3[2] = hc_byte_perm (w2[1], w2[2], selector); w3[1] = hc_byte_perm (w2[0], w2[1], selector); w3[0] = hc_byte_perm (w1[3], w2[0], selector); w2[3] = hc_byte_perm (w1[2], w1[3], selector); w2[2] = hc_byte_perm (w1[1], w1[2], selector); w2[1] = hc_byte_perm (w1[0], w1[1], selector); w2[0] = hc_byte_perm (w0[3], w1[0], selector); w1[3] = hc_byte_perm (w0[2], w0[3], selector); w1[2] = hc_byte_perm (w0[1], w0[2], selector); w1[1] = hc_byte_perm (w0[0], w0[1], selector); w1[0] = hc_byte_perm ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm (w7[3], 0, selector); c1[0] = hc_byte_perm (w7[2], w7[3], selector); c0[3] = hc_byte_perm (w7[1], w7[2], selector); c0[2] = hc_byte_perm (w7[0], w7[1], selector); c0[1] = hc_byte_perm (w6[3], w7[0], selector); c0[0] = hc_byte_perm (w6[2], w6[3], selector); w7[3] = hc_byte_perm (w6[1], w6[2], selector); w7[2] = hc_byte_perm (w6[0], w6[1], selector); w7[1] = hc_byte_perm (w5[3], w6[0], selector); w7[0] = hc_byte_perm (w5[2], w5[3], selector); w6[3] = hc_byte_perm (w5[1], w5[2], selector); w6[2] = hc_byte_perm (w5[0], w5[1], selector); w6[1] = hc_byte_perm (w4[3], w5[0], selector); w6[0] = hc_byte_perm (w4[2], w4[3], selector); w5[3] = hc_byte_perm (w4[1], w4[2], selector); w5[2] = hc_byte_perm (w4[0], w4[1], selector); w5[1] = hc_byte_perm (w3[3], w4[0], selector); w5[0] = hc_byte_perm (w3[2], w3[3], selector); w4[3] = hc_byte_perm (w3[1], w3[2], selector); w4[2] = hc_byte_perm (w3[0], w3[1], selector); w4[1] = hc_byte_perm (w2[3], w3[0], selector); w4[0] = hc_byte_perm (w2[2], w2[3], selector); w3[3] = hc_byte_perm (w2[1], w2[2], selector); w3[2] = hc_byte_perm (w2[0], w2[1], selector); w3[1] = hc_byte_perm (w1[3], w2[0], selector); w3[0] = hc_byte_perm (w1[2], w1[3], selector); w2[3] = hc_byte_perm (w1[1], w1[2], selector); w2[2] = hc_byte_perm (w1[0], w1[1], selector); w2[1] = hc_byte_perm (w0[3], w1[0], selector); w2[0] = hc_byte_perm (w0[2], w0[3], selector); w1[3] = hc_byte_perm (w0[1], w0[2], selector); w1[2] = hc_byte_perm (w0[0], w0[1], selector); w1[1] = hc_byte_perm ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm (w7[3], 0, selector); c1[1] = hc_byte_perm (w7[2], w7[3], selector); c1[0] = hc_byte_perm (w7[1], w7[2], selector); c0[3] = hc_byte_perm (w7[0], w7[1], selector); c0[2] = hc_byte_perm (w6[3], w7[0], selector); c0[1] = hc_byte_perm (w6[2], w6[3], selector); c0[0] = hc_byte_perm (w6[1], w6[2], selector); w7[3] = hc_byte_perm (w6[0], w6[1], selector); w7[2] = hc_byte_perm (w5[3], w6[0], selector); w7[1] = hc_byte_perm (w5[2], w5[3], selector); w7[0] = hc_byte_perm (w5[1], w5[2], selector); w6[3] = hc_byte_perm (w5[0], w5[1], selector); w6[2] = hc_byte_perm (w4[3], w5[0], selector); w6[1] = hc_byte_perm (w4[2], w4[3], selector); w6[0] = hc_byte_perm (w4[1], w4[2], selector); w5[3] = hc_byte_perm (w4[0], w4[1], selector); w5[2] = hc_byte_perm (w3[3], w4[0], selector); w5[1] = hc_byte_perm (w3[2], w3[3], selector); w5[0] = hc_byte_perm (w3[1], w3[2], selector); w4[3] = hc_byte_perm (w3[0], w3[1], selector); w4[2] = hc_byte_perm (w2[3], w3[0], selector); w4[1] = hc_byte_perm (w2[2], w2[3], selector); w4[0] = hc_byte_perm (w2[1], w2[2], selector); w3[3] = hc_byte_perm (w2[0], w2[1], selector); w3[2] = hc_byte_perm (w1[3], w2[0], selector); w3[1] = hc_byte_perm (w1[2], w1[3], selector); w3[0] = hc_byte_perm (w1[1], w1[2], selector); w2[3] = hc_byte_perm (w1[0], w1[1], selector); w2[2] = hc_byte_perm (w0[3], w1[0], selector); w2[1] = hc_byte_perm (w0[2], w0[3], selector); w2[0] = hc_byte_perm (w0[1], w0[2], selector); w1[3] = hc_byte_perm (w0[0], w0[1], selector); w1[2] = hc_byte_perm ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm (w7[3], 0, selector); c1[2] = hc_byte_perm (w7[2], w7[3], selector); c1[1] = hc_byte_perm (w7[1], w7[2], selector); c1[0] = hc_byte_perm (w7[0], w7[1], selector); c0[3] = hc_byte_perm (w6[3], w7[0], selector); c0[2] = hc_byte_perm (w6[2], w6[3], selector); c0[1] = hc_byte_perm (w6[1], w6[2], selector); c0[0] = hc_byte_perm (w6[0], w6[1], selector); w7[3] = hc_byte_perm (w5[3], w6[0], selector); w7[2] = hc_byte_perm (w5[2], w5[3], selector); w7[1] = hc_byte_perm (w5[1], w5[2], selector); w7[0] = hc_byte_perm (w5[0], w5[1], selector); w6[3] = hc_byte_perm (w4[3], w5[0], selector); w6[2] = hc_byte_perm (w4[2], w4[3], selector); w6[1] = hc_byte_perm (w4[1], w4[2], selector); w6[0] = hc_byte_perm (w4[0], w4[1], selector); w5[3] = hc_byte_perm (w3[3], w4[0], selector); w5[2] = hc_byte_perm (w3[2], w3[3], selector); w5[1] = hc_byte_perm (w3[1], w3[2], selector); w5[0] = hc_byte_perm (w3[0], w3[1], selector); w4[3] = hc_byte_perm (w2[3], w3[0], selector); w4[2] = hc_byte_perm (w2[2], w2[3], selector); w4[1] = hc_byte_perm (w2[1], w2[2], selector); w4[0] = hc_byte_perm (w2[0], w2[1], selector); w3[3] = hc_byte_perm (w1[3], w2[0], selector); w3[2] = hc_byte_perm (w1[2], w1[3], selector); w3[1] = hc_byte_perm (w1[1], w1[2], selector); w3[0] = hc_byte_perm (w1[0], w1[1], selector); w2[3] = hc_byte_perm (w0[3], w1[0], selector); w2[2] = hc_byte_perm (w0[2], w0[3], selector); w2[1] = hc_byte_perm (w0[1], w0[2], selector); w2[0] = hc_byte_perm (w0[0], w0[1], selector); w1[3] = hc_byte_perm ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm (w7[3], 0, selector); c1[3] = hc_byte_perm (w7[2], w7[3], selector); c1[2] = hc_byte_perm (w7[1], w7[2], selector); c1[1] = hc_byte_perm (w7[0], w7[1], selector); c1[0] = hc_byte_perm (w6[3], w7[0], selector); c0[3] = hc_byte_perm (w6[2], w6[3], selector); c0[2] = hc_byte_perm (w6[1], w6[2], selector); c0[1] = hc_byte_perm (w6[0], w6[1], selector); c0[0] = hc_byte_perm (w5[3], w6[0], selector); w7[3] = hc_byte_perm (w5[2], w5[3], selector); w7[2] = hc_byte_perm (w5[1], w5[2], selector); w7[1] = hc_byte_perm (w5[0], w5[1], selector); w7[0] = hc_byte_perm (w4[3], w5[0], selector); w6[3] = hc_byte_perm (w4[2], w4[3], selector); w6[2] = hc_byte_perm (w4[1], w4[2], selector); w6[1] = hc_byte_perm (w4[0], w4[1], selector); w6[0] = hc_byte_perm (w3[3], w4[0], selector); w5[3] = hc_byte_perm (w3[2], w3[3], selector); w5[2] = hc_byte_perm (w3[1], w3[2], selector); w5[1] = hc_byte_perm (w3[0], w3[1], selector); w5[0] = hc_byte_perm (w2[3], w3[0], selector); w4[3] = hc_byte_perm (w2[2], w2[3], selector); w4[2] = hc_byte_perm (w2[1], w2[2], selector); w4[1] = hc_byte_perm (w2[0], w2[1], selector); w4[0] = hc_byte_perm (w1[3], w2[0], selector); w3[3] = hc_byte_perm (w1[2], w1[3], selector); w3[2] = hc_byte_perm (w1[1], w1[2], selector); w3[1] = hc_byte_perm (w1[0], w1[1], selector); w3[0] = hc_byte_perm (w0[3], w1[0], selector); w2[3] = hc_byte_perm (w0[2], w0[3], selector); w2[2] = hc_byte_perm (w0[1], w0[2], selector); w2[1] = hc_byte_perm (w0[0], w0[1], selector); w2[0] = hc_byte_perm ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm (w7[3], 0, selector); c2[0] = hc_byte_perm (w7[2], w7[3], selector); c1[3] = hc_byte_perm (w7[1], w7[2], selector); c1[2] = hc_byte_perm (w7[0], w7[1], selector); c1[1] = hc_byte_perm (w6[3], w7[0], selector); c1[0] = hc_byte_perm (w6[2], w6[3], selector); c0[3] = hc_byte_perm (w6[1], w6[2], selector); c0[2] = hc_byte_perm (w6[0], w6[1], selector); c0[1] = hc_byte_perm (w5[3], w6[0], selector); c0[0] = hc_byte_perm (w5[2], w5[3], selector); w7[3] = hc_byte_perm (w5[1], w5[2], selector); w7[2] = hc_byte_perm (w5[0], w5[1], selector); w7[1] = hc_byte_perm (w4[3], w5[0], selector); w7[0] = hc_byte_perm (w4[2], w4[3], selector); w6[3] = hc_byte_perm (w4[1], w4[2], selector); w6[2] = hc_byte_perm (w4[0], w4[1], selector); w6[1] = hc_byte_perm (w3[3], w4[0], selector); w6[0] = hc_byte_perm (w3[2], w3[3], selector); w5[3] = hc_byte_perm (w3[1], w3[2], selector); w5[2] = hc_byte_perm (w3[0], w3[1], selector); w5[1] = hc_byte_perm (w2[3], w3[0], selector); w5[0] = hc_byte_perm (w2[2], w2[3], selector); w4[3] = hc_byte_perm (w2[1], w2[2], selector); w4[2] = hc_byte_perm (w2[0], w2[1], selector); w4[1] = hc_byte_perm (w1[3], w2[0], selector); w4[0] = hc_byte_perm (w1[2], w1[3], selector); w3[3] = hc_byte_perm (w1[1], w1[2], selector); w3[2] = hc_byte_perm (w1[0], w1[1], selector); w3[1] = hc_byte_perm (w0[3], w1[0], selector); w3[0] = hc_byte_perm (w0[2], w0[3], selector); w2[3] = hc_byte_perm (w0[1], w0[2], selector); w2[2] = hc_byte_perm (w0[0], w0[1], selector); w2[1] = hc_byte_perm ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm (w7[3], 0, selector); c2[1] = hc_byte_perm (w7[2], w7[3], selector); c2[0] = hc_byte_perm (w7[1], w7[2], selector); c1[3] = hc_byte_perm (w7[0], w7[1], selector); c1[2] = hc_byte_perm (w6[3], w7[0], selector); c1[1] = hc_byte_perm (w6[2], w6[3], selector); c1[0] = hc_byte_perm (w6[1], w6[2], selector); c0[3] = hc_byte_perm (w6[0], w6[1], selector); c0[2] = hc_byte_perm (w5[3], w6[0], selector); c0[1] = hc_byte_perm (w5[2], w5[3], selector); c0[0] = hc_byte_perm (w5[1], w5[2], selector); w7[3] = hc_byte_perm (w5[0], w5[1], selector); w7[2] = hc_byte_perm (w4[3], w5[0], selector); w7[1] = hc_byte_perm (w4[2], w4[3], selector); w7[0] = hc_byte_perm (w4[1], w4[2], selector); w6[3] = hc_byte_perm (w4[0], w4[1], selector); w6[2] = hc_byte_perm (w3[3], w4[0], selector); w6[1] = hc_byte_perm (w3[2], w3[3], selector); w6[0] = hc_byte_perm (w3[1], w3[2], selector); w5[3] = hc_byte_perm (w3[0], w3[1], selector); w5[2] = hc_byte_perm (w2[3], w3[0], selector); w5[1] = hc_byte_perm (w2[2], w2[3], selector); w5[0] = hc_byte_perm (w2[1], w2[2], selector); w4[3] = hc_byte_perm (w2[0], w2[1], selector); w4[2] = hc_byte_perm (w1[3], w2[0], selector); w4[1] = hc_byte_perm (w1[2], w1[3], selector); w4[0] = hc_byte_perm (w1[1], w1[2], selector); w3[3] = hc_byte_perm (w1[0], w1[1], selector); w3[2] = hc_byte_perm (w0[3], w1[0], selector); w3[1] = hc_byte_perm (w0[2], w0[3], selector); w3[0] = hc_byte_perm (w0[1], w0[2], selector); w2[3] = hc_byte_perm (w0[0], w0[1], selector); w2[2] = hc_byte_perm ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm (w7[3], 0, selector); c2[2] = hc_byte_perm (w7[2], w7[3], selector); c2[1] = hc_byte_perm (w7[1], w7[2], selector); c2[0] = hc_byte_perm (w7[0], w7[1], selector); c1[3] = hc_byte_perm (w6[3], w7[0], selector); c1[2] = hc_byte_perm (w6[2], w6[3], selector); c1[1] = hc_byte_perm (w6[1], w6[2], selector); c1[0] = hc_byte_perm (w6[0], w6[1], selector); c0[3] = hc_byte_perm (w5[3], w6[0], selector); c0[2] = hc_byte_perm (w5[2], w5[3], selector); c0[1] = hc_byte_perm (w5[1], w5[2], selector); c0[0] = hc_byte_perm (w5[0], w5[1], selector); w7[3] = hc_byte_perm (w4[3], w5[0], selector); w7[2] = hc_byte_perm (w4[2], w4[3], selector); w7[1] = hc_byte_perm (w4[1], w4[2], selector); w7[0] = hc_byte_perm (w4[0], w4[1], selector); w6[3] = hc_byte_perm (w3[3], w4[0], selector); w6[2] = hc_byte_perm (w3[2], w3[3], selector); w6[1] = hc_byte_perm (w3[1], w3[2], selector); w6[0] = hc_byte_perm (w3[0], w3[1], selector); w5[3] = hc_byte_perm (w2[3], w3[0], selector); w5[2] = hc_byte_perm (w2[2], w2[3], selector); w5[1] = hc_byte_perm (w2[1], w2[2], selector); w5[0] = hc_byte_perm (w2[0], w2[1], selector); w4[3] = hc_byte_perm (w1[3], w2[0], selector); w4[2] = hc_byte_perm (w1[2], w1[3], selector); w4[1] = hc_byte_perm (w1[1], w1[2], selector); w4[0] = hc_byte_perm (w1[0], w1[1], selector); w3[3] = hc_byte_perm (w0[3], w1[0], selector); w3[2] = hc_byte_perm (w0[2], w0[3], selector); w3[1] = hc_byte_perm (w0[1], w0[2], selector); w3[0] = hc_byte_perm (w0[0], w0[1], selector); w2[3] = hc_byte_perm ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm (w7[3], 0, selector); c2[3] = hc_byte_perm (w7[2], w7[3], selector); c2[2] = hc_byte_perm (w7[1], w7[2], selector); c2[1] = hc_byte_perm (w7[0], w7[1], selector); c2[0] = hc_byte_perm (w6[3], w7[0], selector); c1[3] = hc_byte_perm (w6[2], w6[3], selector); c1[2] = hc_byte_perm (w6[1], w6[2], selector); c1[1] = hc_byte_perm (w6[0], w6[1], selector); c1[0] = hc_byte_perm (w5[3], w6[0], selector); c0[3] = hc_byte_perm (w5[2], w5[3], selector); c0[2] = hc_byte_perm (w5[1], w5[2], selector); c0[1] = hc_byte_perm (w5[0], w5[1], selector); c0[0] = hc_byte_perm (w4[3], w5[0], selector); w7[3] = hc_byte_perm (w4[2], w4[3], selector); w7[2] = hc_byte_perm (w4[1], w4[2], selector); w7[1] = hc_byte_perm (w4[0], w4[1], selector); w7[0] = hc_byte_perm (w3[3], w4[0], selector); w6[3] = hc_byte_perm (w3[2], w3[3], selector); w6[2] = hc_byte_perm (w3[1], w3[2], selector); w6[1] = hc_byte_perm (w3[0], w3[1], selector); w6[0] = hc_byte_perm (w2[3], w3[0], selector); w5[3] = hc_byte_perm (w2[2], w2[3], selector); w5[2] = hc_byte_perm (w2[1], w2[2], selector); w5[1] = hc_byte_perm (w2[0], w2[1], selector); w5[0] = hc_byte_perm (w1[3], w2[0], selector); w4[3] = hc_byte_perm (w1[2], w1[3], selector); w4[2] = hc_byte_perm (w1[1], w1[2], selector); w4[1] = hc_byte_perm (w1[0], w1[1], selector); w4[0] = hc_byte_perm (w0[3], w1[0], selector); w3[3] = hc_byte_perm (w0[2], w0[3], selector); w3[2] = hc_byte_perm (w0[1], w0[2], selector); w3[1] = hc_byte_perm (w0[0], w0[1], selector); w3[0] = hc_byte_perm ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm (w7[3], 0, selector); c3[0] = hc_byte_perm (w7[2], w7[3], selector); c2[3] = hc_byte_perm (w7[1], w7[2], selector); c2[2] = hc_byte_perm (w7[0], w7[1], selector); c2[1] = hc_byte_perm (w6[3], w7[0], selector); c2[0] = hc_byte_perm (w6[2], w6[3], selector); c1[3] = hc_byte_perm (w6[1], w6[2], selector); c1[2] = hc_byte_perm (w6[0], w6[1], selector); c1[1] = hc_byte_perm (w5[3], w6[0], selector); c1[0] = hc_byte_perm (w5[2], w5[3], selector); c0[3] = hc_byte_perm (w5[1], w5[2], selector); c0[2] = hc_byte_perm (w5[0], w5[1], selector); c0[1] = hc_byte_perm (w4[3], w5[0], selector); c0[0] = hc_byte_perm (w4[2], w4[3], selector); w7[3] = hc_byte_perm (w4[1], w4[2], selector); w7[2] = hc_byte_perm (w4[0], w4[1], selector); w7[1] = hc_byte_perm (w3[3], w4[0], selector); w7[0] = hc_byte_perm (w3[2], w3[3], selector); w6[3] = hc_byte_perm (w3[1], w3[2], selector); w6[2] = hc_byte_perm (w3[0], w3[1], selector); w6[1] = hc_byte_perm (w2[3], w3[0], selector); w6[0] = hc_byte_perm (w2[2], w2[3], selector); w5[3] = hc_byte_perm (w2[1], w2[2], selector); w5[2] = hc_byte_perm (w2[0], w2[1], selector); w5[1] = hc_byte_perm (w1[3], w2[0], selector); w5[0] = hc_byte_perm (w1[2], w1[3], selector); w4[3] = hc_byte_perm (w1[1], w1[2], selector); w4[2] = hc_byte_perm (w1[0], w1[1], selector); w4[1] = hc_byte_perm (w0[3], w1[0], selector); w4[0] = hc_byte_perm (w0[2], w0[3], selector); w3[3] = hc_byte_perm (w0[1], w0[2], selector); w3[2] = hc_byte_perm (w0[0], w0[1], selector); w3[1] = hc_byte_perm ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm (w7[3], 0, selector); c3[1] = hc_byte_perm (w7[2], w7[3], selector); c3[0] = hc_byte_perm (w7[1], w7[2], selector); c2[3] = hc_byte_perm (w7[0], w7[1], selector); c2[2] = hc_byte_perm (w6[3], w7[0], selector); c2[1] = hc_byte_perm (w6[2], w6[3], selector); c2[0] = hc_byte_perm (w6[1], w6[2], selector); c1[3] = hc_byte_perm (w6[0], w6[1], selector); c1[2] = hc_byte_perm (w5[3], w6[0], selector); c1[1] = hc_byte_perm (w5[2], w5[3], selector); c1[0] = hc_byte_perm (w5[1], w5[2], selector); c0[3] = hc_byte_perm (w5[0], w5[1], selector); c0[2] = hc_byte_perm (w4[3], w5[0], selector); c0[1] = hc_byte_perm (w4[2], w4[3], selector); c0[0] = hc_byte_perm (w4[1], w4[2], selector); w7[3] = hc_byte_perm (w4[0], w4[1], selector); w7[2] = hc_byte_perm (w3[3], w4[0], selector); w7[1] = hc_byte_perm (w3[2], w3[3], selector); w7[0] = hc_byte_perm (w3[1], w3[2], selector); w6[3] = hc_byte_perm (w3[0], w3[1], selector); w6[2] = hc_byte_perm (w2[3], w3[0], selector); w6[1] = hc_byte_perm (w2[2], w2[3], selector); w6[0] = hc_byte_perm (w2[1], w2[2], selector); w5[3] = hc_byte_perm (w2[0], w2[1], selector); w5[2] = hc_byte_perm (w1[3], w2[0], selector); w5[1] = hc_byte_perm (w1[2], w1[3], selector); w5[0] = hc_byte_perm (w1[1], w1[2], selector); w4[3] = hc_byte_perm (w1[0], w1[1], selector); w4[2] = hc_byte_perm (w0[3], w1[0], selector); w4[1] = hc_byte_perm (w0[2], w0[3], selector); w4[0] = hc_byte_perm (w0[1], w0[2], selector); w3[3] = hc_byte_perm (w0[0], w0[1], selector); w3[2] = hc_byte_perm ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm (w7[3], 0, selector); c3[2] = hc_byte_perm (w7[2], w7[3], selector); c3[1] = hc_byte_perm (w7[1], w7[2], selector); c3[0] = hc_byte_perm (w7[0], w7[1], selector); c2[3] = hc_byte_perm (w6[3], w7[0], selector); c2[2] = hc_byte_perm (w6[2], w6[3], selector); c2[1] = hc_byte_perm (w6[1], w6[2], selector); c2[0] = hc_byte_perm (w6[0], w6[1], selector); c1[3] = hc_byte_perm (w5[3], w6[0], selector); c1[2] = hc_byte_perm (w5[2], w5[3], selector); c1[1] = hc_byte_perm (w5[1], w5[2], selector); c1[0] = hc_byte_perm (w5[0], w5[1], selector); c0[3] = hc_byte_perm (w4[3], w5[0], selector); c0[2] = hc_byte_perm (w4[2], w4[3], selector); c0[1] = hc_byte_perm (w4[1], w4[2], selector); c0[0] = hc_byte_perm (w4[0], w4[1], selector); w7[3] = hc_byte_perm (w3[3], w4[0], selector); w7[2] = hc_byte_perm (w3[2], w3[3], selector); w7[1] = hc_byte_perm (w3[1], w3[2], selector); w7[0] = hc_byte_perm (w3[0], w3[1], selector); w6[3] = hc_byte_perm (w2[3], w3[0], selector); w6[2] = hc_byte_perm (w2[2], w2[3], selector); w6[1] = hc_byte_perm (w2[1], w2[2], selector); w6[0] = hc_byte_perm (w2[0], w2[1], selector); w5[3] = hc_byte_perm (w1[3], w2[0], selector); w5[2] = hc_byte_perm (w1[2], w1[3], selector); w5[1] = hc_byte_perm (w1[1], w1[2], selector); w5[0] = hc_byte_perm (w1[0], w1[1], selector); w4[3] = hc_byte_perm (w0[3], w1[0], selector); w4[2] = hc_byte_perm (w0[2], w0[3], selector); w4[1] = hc_byte_perm (w0[1], w0[2], selector); w4[0] = hc_byte_perm (w0[0], w0[1], selector); w3[3] = hc_byte_perm ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_byte_perm (w7[3], 0, selector); c3[3] = hc_byte_perm (w7[2], w7[3], selector); c3[2] = hc_byte_perm (w7[1], w7[2], selector); c3[1] = hc_byte_perm (w7[0], w7[1], selector); c3[0] = hc_byte_perm (w6[3], w7[0], selector); c2[3] = hc_byte_perm (w6[2], w6[3], selector); c2[2] = hc_byte_perm (w6[1], w6[2], selector); c2[1] = hc_byte_perm (w6[0], w6[1], selector); c2[0] = hc_byte_perm (w5[3], w6[0], selector); c1[3] = hc_byte_perm (w5[2], w5[3], selector); c1[2] = hc_byte_perm (w5[1], w5[2], selector); c1[1] = hc_byte_perm (w5[0], w5[1], selector); c1[0] = hc_byte_perm (w4[3], w5[0], selector); c0[3] = hc_byte_perm (w4[2], w4[3], selector); c0[2] = hc_byte_perm (w4[1], w4[2], selector); c0[1] = hc_byte_perm (w4[0], w4[1], selector); c0[0] = hc_byte_perm (w3[3], w4[0], selector); w7[3] = hc_byte_perm (w3[2], w3[3], selector); w7[2] = hc_byte_perm (w3[1], w3[2], selector); w7[1] = hc_byte_perm (w3[0], w3[1], selector); w7[0] = hc_byte_perm (w2[3], w3[0], selector); w6[3] = hc_byte_perm (w2[2], w2[3], selector); w6[2] = hc_byte_perm (w2[1], w2[2], selector); w6[1] = hc_byte_perm (w2[0], w2[1], selector); w6[0] = hc_byte_perm (w1[3], w2[0], selector); w5[3] = hc_byte_perm (w1[2], w1[3], selector); w5[2] = hc_byte_perm (w1[1], w1[2], selector); w5[1] = hc_byte_perm (w1[0], w1[1], selector); w5[0] = hc_byte_perm (w0[3], w1[0], selector); w4[3] = hc_byte_perm (w0[2], w0[3], selector); w4[2] = hc_byte_perm (w0[1], w0[2], selector); w4[1] = hc_byte_perm (w0[0], w0[1], selector); w4[0] = hc_byte_perm ( 0, w0[0], selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_byte_perm (w7[3], 0, selector); c4[0] = hc_byte_perm (w7[2], w7[3], selector); c3[3] = hc_byte_perm (w7[1], w7[2], selector); c3[2] = hc_byte_perm (w7[0], w7[1], selector); c3[1] = hc_byte_perm (w6[3], w7[0], selector); c3[0] = hc_byte_perm (w6[2], w6[3], selector); c2[3] = hc_byte_perm (w6[1], w6[2], selector); c2[2] = hc_byte_perm (w6[0], w6[1], selector); c2[1] = hc_byte_perm (w5[3], w6[0], selector); c2[0] = hc_byte_perm (w5[2], w5[3], selector); c1[3] = hc_byte_perm (w5[1], w5[2], selector); c1[2] = hc_byte_perm (w5[0], w5[1], selector); c1[1] = hc_byte_perm (w4[3], w5[0], selector); c1[0] = hc_byte_perm (w4[2], w4[3], selector); c0[3] = hc_byte_perm (w4[1], w4[2], selector); c0[2] = hc_byte_perm (w4[0], w4[1], selector); c0[1] = hc_byte_perm (w3[3], w4[0], selector); c0[0] = hc_byte_perm (w3[2], w3[3], selector); w7[3] = hc_byte_perm (w3[1], w3[2], selector); w7[2] = hc_byte_perm (w3[0], w3[1], selector); w7[1] = hc_byte_perm (w2[3], w3[0], selector); w7[0] = hc_byte_perm (w2[2], w2[3], selector); w6[3] = hc_byte_perm (w2[1], w2[2], selector); w6[2] = hc_byte_perm (w2[0], w2[1], selector); w6[1] = hc_byte_perm (w1[3], w2[0], selector); w6[0] = hc_byte_perm (w1[2], w1[3], selector); w5[3] = hc_byte_perm (w1[1], w1[2], selector); w5[2] = hc_byte_perm (w1[0], w1[1], selector); w5[1] = hc_byte_perm (w0[3], w1[0], selector); w5[0] = hc_byte_perm (w0[2], w0[3], selector); w4[3] = hc_byte_perm (w0[1], w0[2], selector); w4[2] = hc_byte_perm (w0[0], w0[1], selector); w4[1] = hc_byte_perm ( 0, w0[0], selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_byte_perm (w7[3], 0, selector); c4[1] = hc_byte_perm (w7[2], w7[3], selector); c4[0] = hc_byte_perm (w7[1], w7[2], selector); c3[3] = hc_byte_perm (w7[0], w7[1], selector); c3[2] = hc_byte_perm (w6[3], w7[0], selector); c3[1] = hc_byte_perm (w6[2], w6[3], selector); c3[0] = hc_byte_perm (w6[1], w6[2], selector); c2[3] = hc_byte_perm (w6[0], w6[1], selector); c2[2] = hc_byte_perm (w5[3], w6[0], selector); c2[1] = hc_byte_perm (w5[2], w5[3], selector); c2[0] = hc_byte_perm (w5[1], w5[2], selector); c1[3] = hc_byte_perm (w5[0], w5[1], selector); c1[2] = hc_byte_perm (w4[3], w5[0], selector); c1[1] = hc_byte_perm (w4[2], w4[3], selector); c1[0] = hc_byte_perm (w4[1], w4[2], selector); c0[3] = hc_byte_perm (w4[0], w4[1], selector); c0[2] = hc_byte_perm (w3[3], w4[0], selector); c0[1] = hc_byte_perm (w3[2], w3[3], selector); c0[0] = hc_byte_perm (w3[1], w3[2], selector); w7[3] = hc_byte_perm (w3[0], w3[1], selector); w7[2] = hc_byte_perm (w2[3], w3[0], selector); w7[1] = hc_byte_perm (w2[2], w2[3], selector); w7[0] = hc_byte_perm (w2[1], w2[2], selector); w6[3] = hc_byte_perm (w2[0], w2[1], selector); w6[2] = hc_byte_perm (w1[3], w2[0], selector); w6[1] = hc_byte_perm (w1[2], w1[3], selector); w6[0] = hc_byte_perm (w1[1], w1[2], selector); w5[3] = hc_byte_perm (w1[0], w1[1], selector); w5[2] = hc_byte_perm (w0[3], w1[0], selector); w5[1] = hc_byte_perm (w0[2], w0[3], selector); w5[0] = hc_byte_perm (w0[1], w0[2], selector); w4[3] = hc_byte_perm (w0[0], w0[1], selector); w4[2] = hc_byte_perm ( 0, w0[0], selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_byte_perm (w7[3], 0, selector); c4[2] = hc_byte_perm (w7[2], w7[3], selector); c4[1] = hc_byte_perm (w7[1], w7[2], selector); c4[0] = hc_byte_perm (w7[0], w7[1], selector); c3[3] = hc_byte_perm (w6[3], w7[0], selector); c3[2] = hc_byte_perm (w6[2], w6[3], selector); c3[1] = hc_byte_perm (w6[1], w6[2], selector); c3[0] = hc_byte_perm (w6[0], w6[1], selector); c2[3] = hc_byte_perm (w5[3], w6[0], selector); c2[2] = hc_byte_perm (w5[2], w5[3], selector); c2[1] = hc_byte_perm (w5[1], w5[2], selector); c2[0] = hc_byte_perm (w5[0], w5[1], selector); c1[3] = hc_byte_perm (w4[3], w5[0], selector); c1[2] = hc_byte_perm (w4[2], w4[3], selector); c1[1] = hc_byte_perm (w4[1], w4[2], selector); c1[0] = hc_byte_perm (w4[0], w4[1], selector); c0[3] = hc_byte_perm (w3[3], w4[0], selector); c0[2] = hc_byte_perm (w3[2], w3[3], selector); c0[1] = hc_byte_perm (w3[1], w3[2], selector); c0[0] = hc_byte_perm (w3[0], w3[1], selector); w7[3] = hc_byte_perm (w2[3], w3[0], selector); w7[2] = hc_byte_perm (w2[2], w2[3], selector); w7[1] = hc_byte_perm (w2[1], w2[2], selector); w7[0] = hc_byte_perm (w2[0], w2[1], selector); w6[3] = hc_byte_perm (w1[3], w2[0], selector); w6[2] = hc_byte_perm (w1[2], w1[3], selector); w6[1] = hc_byte_perm (w1[1], w1[2], selector); w6[0] = hc_byte_perm (w1[0], w1[1], selector); w5[3] = hc_byte_perm (w0[3], w1[0], selector); w5[2] = hc_byte_perm (w0[2], w0[3], selector); w5[1] = hc_byte_perm (w0[1], w0[2], selector); w5[0] = hc_byte_perm (w0[0], w0[1], selector); w4[3] = hc_byte_perm ( 0, w0[0], selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_byte_perm (w7[3], 0, selector); c4[3] = hc_byte_perm (w7[2], w7[3], selector); c4[2] = hc_byte_perm (w7[1], w7[2], selector); c4[1] = hc_byte_perm (w7[0], w7[1], selector); c4[0] = hc_byte_perm (w6[3], w7[0], selector); c3[3] = hc_byte_perm (w6[2], w6[3], selector); c3[2] = hc_byte_perm (w6[1], w6[2], selector); c3[1] = hc_byte_perm (w6[0], w6[1], selector); c3[0] = hc_byte_perm (w5[3], w6[0], selector); c2[3] = hc_byte_perm (w5[2], w5[3], selector); c2[2] = hc_byte_perm (w5[1], w5[2], selector); c2[1] = hc_byte_perm (w5[0], w5[1], selector); c2[0] = hc_byte_perm (w4[3], w5[0], selector); c1[3] = hc_byte_perm (w4[2], w4[3], selector); c1[2] = hc_byte_perm (w4[1], w4[2], selector); c1[1] = hc_byte_perm (w4[0], w4[1], selector); c1[0] = hc_byte_perm (w3[3], w4[0], selector); c0[3] = hc_byte_perm (w3[2], w3[3], selector); c0[2] = hc_byte_perm (w3[1], w3[2], selector); c0[1] = hc_byte_perm (w3[0], w3[1], selector); c0[0] = hc_byte_perm (w2[3], w3[0], selector); w7[3] = hc_byte_perm (w2[2], w2[3], selector); w7[2] = hc_byte_perm (w2[1], w2[2], selector); w7[1] = hc_byte_perm (w2[0], w2[1], selector); w7[0] = hc_byte_perm (w1[3], w2[0], selector); w6[3] = hc_byte_perm (w1[2], w1[3], selector); w6[2] = hc_byte_perm (w1[1], w1[2], selector); w6[1] = hc_byte_perm (w1[0], w1[1], selector); w6[0] = hc_byte_perm (w0[3], w1[0], selector); w5[3] = hc_byte_perm (w0[2], w0[3], selector); w5[2] = hc_byte_perm (w0[1], w0[2], selector); w5[1] = hc_byte_perm (w0[0], w0[1], selector); w5[0] = hc_byte_perm ( 0, w0[0], selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_byte_perm (w7[3], 0, selector); c5[0] = hc_byte_perm (w7[2], w7[3], selector); c4[3] = hc_byte_perm (w7[1], w7[2], selector); c4[2] = hc_byte_perm (w7[0], w7[1], selector); c4[1] = hc_byte_perm (w6[3], w7[0], selector); c4[0] = hc_byte_perm (w6[2], w6[3], selector); c3[3] = hc_byte_perm (w6[1], w6[2], selector); c3[2] = hc_byte_perm (w6[0], w6[1], selector); c3[1] = hc_byte_perm (w5[3], w6[0], selector); c3[0] = hc_byte_perm (w5[2], w5[3], selector); c2[3] = hc_byte_perm (w5[1], w5[2], selector); c2[2] = hc_byte_perm (w5[0], w5[1], selector); c2[1] = hc_byte_perm (w4[3], w5[0], selector); c2[0] = hc_byte_perm (w4[2], w4[3], selector); c1[3] = hc_byte_perm (w4[1], w4[2], selector); c1[2] = hc_byte_perm (w4[0], w4[1], selector); c1[1] = hc_byte_perm (w3[3], w4[0], selector); c1[0] = hc_byte_perm (w3[2], w3[3], selector); c0[3] = hc_byte_perm (w3[1], w3[2], selector); c0[2] = hc_byte_perm (w3[0], w3[1], selector); c0[1] = hc_byte_perm (w2[3], w3[0], selector); c0[0] = hc_byte_perm (w2[2], w2[3], selector); w7[3] = hc_byte_perm (w2[1], w2[2], selector); w7[2] = hc_byte_perm (w2[0], w2[1], selector); w7[1] = hc_byte_perm (w1[3], w2[0], selector); w7[0] = hc_byte_perm (w1[2], w1[3], selector); w6[3] = hc_byte_perm (w1[1], w1[2], selector); w6[2] = hc_byte_perm (w1[0], w1[1], selector); w6[1] = hc_byte_perm (w0[3], w1[0], selector); w6[0] = hc_byte_perm (w0[2], w0[3], selector); w5[3] = hc_byte_perm (w0[1], w0[2], selector); w5[2] = hc_byte_perm (w0[0], w0[1], selector); w5[1] = hc_byte_perm ( 0, w0[0], selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_byte_perm (w7[3], 0, selector); c5[1] = hc_byte_perm (w7[2], w7[3], selector); c5[0] = hc_byte_perm (w7[1], w7[2], selector); c4[3] = hc_byte_perm (w7[0], w7[1], selector); c4[2] = hc_byte_perm (w6[3], w7[0], selector); c4[1] = hc_byte_perm (w6[2], w6[3], selector); c4[0] = hc_byte_perm (w6[1], w6[2], selector); c3[3] = hc_byte_perm (w6[0], w6[1], selector); c3[2] = hc_byte_perm (w5[3], w6[0], selector); c3[1] = hc_byte_perm (w5[2], w5[3], selector); c3[0] = hc_byte_perm (w5[1], w5[2], selector); c2[3] = hc_byte_perm (w5[0], w5[1], selector); c2[2] = hc_byte_perm (w4[3], w5[0], selector); c2[1] = hc_byte_perm (w4[2], w4[3], selector); c2[0] = hc_byte_perm (w4[1], w4[2], selector); c1[3] = hc_byte_perm (w4[0], w4[1], selector); c1[2] = hc_byte_perm (w3[3], w4[0], selector); c1[1] = hc_byte_perm (w3[2], w3[3], selector); c1[0] = hc_byte_perm (w3[1], w3[2], selector); c0[3] = hc_byte_perm (w3[0], w3[1], selector); c0[2] = hc_byte_perm (w2[3], w3[0], selector); c0[1] = hc_byte_perm (w2[2], w2[3], selector); c0[0] = hc_byte_perm (w2[1], w2[2], selector); w7[3] = hc_byte_perm (w2[0], w2[1], selector); w7[2] = hc_byte_perm (w1[3], w2[0], selector); w7[1] = hc_byte_perm (w1[2], w1[3], selector); w7[0] = hc_byte_perm (w1[1], w1[2], selector); w6[3] = hc_byte_perm (w1[0], w1[1], selector); w6[2] = hc_byte_perm (w0[3], w1[0], selector); w6[1] = hc_byte_perm (w0[2], w0[3], selector); w6[0] = hc_byte_perm (w0[1], w0[2], selector); w5[3] = hc_byte_perm (w0[0], w0[1], selector); w5[2] = hc_byte_perm ( 0, w0[0], selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_byte_perm (w7[3], 0, selector); c5[2] = hc_byte_perm (w7[2], w7[3], selector); c5[1] = hc_byte_perm (w7[1], w7[2], selector); c5[0] = hc_byte_perm (w7[0], w7[1], selector); c4[3] = hc_byte_perm (w6[3], w7[0], selector); c4[2] = hc_byte_perm (w6[2], w6[3], selector); c4[1] = hc_byte_perm (w6[1], w6[2], selector); c4[0] = hc_byte_perm (w6[0], w6[1], selector); c3[3] = hc_byte_perm (w5[3], w6[0], selector); c3[2] = hc_byte_perm (w5[2], w5[3], selector); c3[1] = hc_byte_perm (w5[1], w5[2], selector); c3[0] = hc_byte_perm (w5[0], w5[1], selector); c2[3] = hc_byte_perm (w4[3], w5[0], selector); c2[2] = hc_byte_perm (w4[2], w4[3], selector); c2[1] = hc_byte_perm (w4[1], w4[2], selector); c2[0] = hc_byte_perm (w4[0], w4[1], selector); c1[3] = hc_byte_perm (w3[3], w4[0], selector); c1[2] = hc_byte_perm (w3[2], w3[3], selector); c1[1] = hc_byte_perm (w3[1], w3[2], selector); c1[0] = hc_byte_perm (w3[0], w3[1], selector); c0[3] = hc_byte_perm (w2[3], w3[0], selector); c0[2] = hc_byte_perm (w2[2], w2[3], selector); c0[1] = hc_byte_perm (w2[1], w2[2], selector); c0[0] = hc_byte_perm (w2[0], w2[1], selector); w7[3] = hc_byte_perm (w1[3], w2[0], selector); w7[2] = hc_byte_perm (w1[2], w1[3], selector); w7[1] = hc_byte_perm (w1[1], w1[2], selector); w7[0] = hc_byte_perm (w1[0], w1[1], selector); w6[3] = hc_byte_perm (w0[3], w1[0], selector); w6[2] = hc_byte_perm (w0[2], w0[3], selector); w6[1] = hc_byte_perm (w0[1], w0[2], selector); w6[0] = hc_byte_perm (w0[0], w0[1], selector); w5[3] = hc_byte_perm ( 0, w0[0], selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_byte_perm (w7[3], 0, selector); c5[3] = hc_byte_perm (w7[2], w7[3], selector); c5[2] = hc_byte_perm (w7[1], w7[2], selector); c5[1] = hc_byte_perm (w7[0], w7[1], selector); c5[0] = hc_byte_perm (w6[3], w7[0], selector); c4[3] = hc_byte_perm (w6[2], w6[3], selector); c4[2] = hc_byte_perm (w6[1], w6[2], selector); c4[1] = hc_byte_perm (w6[0], w6[1], selector); c4[0] = hc_byte_perm (w5[3], w6[0], selector); c3[3] = hc_byte_perm (w5[2], w5[3], selector); c3[2] = hc_byte_perm (w5[1], w5[2], selector); c3[1] = hc_byte_perm (w5[0], w5[1], selector); c3[0] = hc_byte_perm (w4[3], w5[0], selector); c2[3] = hc_byte_perm (w4[2], w4[3], selector); c2[2] = hc_byte_perm (w4[1], w4[2], selector); c2[1] = hc_byte_perm (w4[0], w4[1], selector); c2[0] = hc_byte_perm (w3[3], w4[0], selector); c1[3] = hc_byte_perm (w3[2], w3[3], selector); c1[2] = hc_byte_perm (w3[1], w3[2], selector); c1[1] = hc_byte_perm (w3[0], w3[1], selector); c1[0] = hc_byte_perm (w2[3], w3[0], selector); c0[3] = hc_byte_perm (w2[2], w2[3], selector); c0[2] = hc_byte_perm (w2[1], w2[2], selector); c0[1] = hc_byte_perm (w2[0], w2[1], selector); c0[0] = hc_byte_perm (w1[3], w2[0], selector); w7[3] = hc_byte_perm (w1[2], w1[3], selector); w7[2] = hc_byte_perm (w1[1], w1[2], selector); w7[1] = hc_byte_perm (w1[0], w1[1], selector); w7[0] = hc_byte_perm (w0[3], w1[0], selector); w6[3] = hc_byte_perm (w0[2], w0[3], selector); w6[2] = hc_byte_perm (w0[1], w0[2], selector); w6[1] = hc_byte_perm (w0[0], w0[1], selector); w6[0] = hc_byte_perm ( 0, w0[0], selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_byte_perm (w7[3], 0, selector); c6[0] = hc_byte_perm (w7[2], w7[3], selector); c5[3] = hc_byte_perm (w7[1], w7[2], selector); c5[2] = hc_byte_perm (w7[0], w7[1], selector); c5[1] = hc_byte_perm (w6[3], w7[0], selector); c5[0] = hc_byte_perm (w6[2], w6[3], selector); c4[3] = hc_byte_perm (w6[1], w6[2], selector); c4[2] = hc_byte_perm (w6[0], w6[1], selector); c4[1] = hc_byte_perm (w5[3], w6[0], selector); c4[0] = hc_byte_perm (w5[2], w5[3], selector); c3[3] = hc_byte_perm (w5[1], w5[2], selector); c3[2] = hc_byte_perm (w5[0], w5[1], selector); c3[1] = hc_byte_perm (w4[3], w5[0], selector); c3[0] = hc_byte_perm (w4[2], w4[3], selector); c2[3] = hc_byte_perm (w4[1], w4[2], selector); c2[2] = hc_byte_perm (w4[0], w4[1], selector); c2[1] = hc_byte_perm (w3[3], w4[0], selector); c2[0] = hc_byte_perm (w3[2], w3[3], selector); c1[3] = hc_byte_perm (w3[1], w3[2], selector); c1[2] = hc_byte_perm (w3[0], w3[1], selector); c1[1] = hc_byte_perm (w2[3], w3[0], selector); c1[0] = hc_byte_perm (w2[2], w2[3], selector); c0[3] = hc_byte_perm (w2[1], w2[2], selector); c0[2] = hc_byte_perm (w2[0], w2[1], selector); c0[1] = hc_byte_perm (w1[3], w2[0], selector); c0[0] = hc_byte_perm (w1[2], w1[3], selector); w7[3] = hc_byte_perm (w1[1], w1[2], selector); w7[2] = hc_byte_perm (w1[0], w1[1], selector); w7[1] = hc_byte_perm (w0[3], w1[0], selector); w7[0] = hc_byte_perm (w0[2], w0[3], selector); w6[3] = hc_byte_perm (w0[1], w0[2], selector); w6[2] = hc_byte_perm (w0[0], w0[1], selector); w6[1] = hc_byte_perm ( 0, w0[0], selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_byte_perm (w7[3], 0, selector); c6[1] = hc_byte_perm (w7[2], w7[3], selector); c6[0] = hc_byte_perm (w7[1], w7[2], selector); c5[3] = hc_byte_perm (w7[0], w7[1], selector); c5[2] = hc_byte_perm (w6[3], w7[0], selector); c5[1] = hc_byte_perm (w6[2], w6[3], selector); c5[0] = hc_byte_perm (w6[1], w6[2], selector); c4[3] = hc_byte_perm (w6[0], w6[1], selector); c4[2] = hc_byte_perm (w5[3], w6[0], selector); c4[1] = hc_byte_perm (w5[2], w5[3], selector); c4[0] = hc_byte_perm (w5[1], w5[2], selector); c3[3] = hc_byte_perm (w5[0], w5[1], selector); c3[2] = hc_byte_perm (w4[3], w5[0], selector); c3[1] = hc_byte_perm (w4[2], w4[3], selector); c3[0] = hc_byte_perm (w4[1], w4[2], selector); c2[3] = hc_byte_perm (w4[0], w4[1], selector); c2[2] = hc_byte_perm (w3[3], w4[0], selector); c2[1] = hc_byte_perm (w3[2], w3[3], selector); c2[0] = hc_byte_perm (w3[1], w3[2], selector); c1[3] = hc_byte_perm (w3[0], w3[1], selector); c1[2] = hc_byte_perm (w2[3], w3[0], selector); c1[1] = hc_byte_perm (w2[2], w2[3], selector); c1[0] = hc_byte_perm (w2[1], w2[2], selector); c0[3] = hc_byte_perm (w2[0], w2[1], selector); c0[2] = hc_byte_perm (w1[3], w2[0], selector); c0[1] = hc_byte_perm (w1[2], w1[3], selector); c0[0] = hc_byte_perm (w1[1], w1[2], selector); w7[3] = hc_byte_perm (w1[0], w1[1], selector); w7[2] = hc_byte_perm (w0[3], w1[0], selector); w7[1] = hc_byte_perm (w0[2], w0[3], selector); w7[0] = hc_byte_perm (w0[1], w0[2], selector); w6[3] = hc_byte_perm (w0[0], w0[1], selector); w6[2] = hc_byte_perm ( 0, w0[0], selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_byte_perm (w7[3], 0, selector); c6[2] = hc_byte_perm (w7[2], w7[3], selector); c6[1] = hc_byte_perm (w7[1], w7[2], selector); c6[0] = hc_byte_perm (w7[0], w7[1], selector); c5[3] = hc_byte_perm (w6[3], w7[0], selector); c5[2] = hc_byte_perm (w6[2], w6[3], selector); c5[1] = hc_byte_perm (w6[1], w6[2], selector); c5[0] = hc_byte_perm (w6[0], w6[1], selector); c4[3] = hc_byte_perm (w5[3], w6[0], selector); c4[2] = hc_byte_perm (w5[2], w5[3], selector); c4[1] = hc_byte_perm (w5[1], w5[2], selector); c4[0] = hc_byte_perm (w5[0], w5[1], selector); c3[3] = hc_byte_perm (w4[3], w5[0], selector); c3[2] = hc_byte_perm (w4[2], w4[3], selector); c3[1] = hc_byte_perm (w4[1], w4[2], selector); c3[0] = hc_byte_perm (w4[0], w4[1], selector); c2[3] = hc_byte_perm (w3[3], w4[0], selector); c2[2] = hc_byte_perm (w3[2], w3[3], selector); c2[1] = hc_byte_perm (w3[1], w3[2], selector); c2[0] = hc_byte_perm (w3[0], w3[1], selector); c1[3] = hc_byte_perm (w2[3], w3[0], selector); c1[2] = hc_byte_perm (w2[2], w2[3], selector); c1[1] = hc_byte_perm (w2[1], w2[2], selector); c1[0] = hc_byte_perm (w2[0], w2[1], selector); c0[3] = hc_byte_perm (w1[3], w2[0], selector); c0[2] = hc_byte_perm (w1[2], w1[3], selector); c0[1] = hc_byte_perm (w1[1], w1[2], selector); c0[0] = hc_byte_perm (w1[0], w1[1], selector); w7[3] = hc_byte_perm (w0[3], w1[0], selector); w7[2] = hc_byte_perm (w0[2], w0[3], selector); w7[1] = hc_byte_perm (w0[1], w0[2], selector); w7[0] = hc_byte_perm (w0[0], w0[1], selector); w6[3] = hc_byte_perm ( 0, w0[0], selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_byte_perm (w7[3], 0, selector); c6[3] = hc_byte_perm (w7[2], w7[3], selector); c6[2] = hc_byte_perm (w7[1], w7[2], selector); c6[1] = hc_byte_perm (w7[0], w7[1], selector); c6[0] = hc_byte_perm (w6[3], w7[0], selector); c5[3] = hc_byte_perm (w6[2], w6[3], selector); c5[2] = hc_byte_perm (w6[1], w6[2], selector); c5[1] = hc_byte_perm (w6[0], w6[1], selector); c5[0] = hc_byte_perm (w5[3], w6[0], selector); c4[3] = hc_byte_perm (w5[2], w5[3], selector); c4[2] = hc_byte_perm (w5[1], w5[2], selector); c4[1] = hc_byte_perm (w5[0], w5[1], selector); c4[0] = hc_byte_perm (w4[3], w5[0], selector); c3[3] = hc_byte_perm (w4[2], w4[3], selector); c3[2] = hc_byte_perm (w4[1], w4[2], selector); c3[1] = hc_byte_perm (w4[0], w4[1], selector); c3[0] = hc_byte_perm (w3[3], w4[0], selector); c2[3] = hc_byte_perm (w3[2], w3[3], selector); c2[2] = hc_byte_perm (w3[1], w3[2], selector); c2[1] = hc_byte_perm (w3[0], w3[1], selector); c2[0] = hc_byte_perm (w2[3], w3[0], selector); c1[3] = hc_byte_perm (w2[2], w2[3], selector); c1[2] = hc_byte_perm (w2[1], w2[2], selector); c1[1] = hc_byte_perm (w2[0], w2[1], selector); c1[0] = hc_byte_perm (w1[3], w2[0], selector); c0[3] = hc_byte_perm (w1[2], w1[3], selector); c0[2] = hc_byte_perm (w1[1], w1[2], selector); c0[1] = hc_byte_perm (w1[0], w1[1], selector); c0[0] = hc_byte_perm (w0[3], w1[0], selector); w7[3] = hc_byte_perm (w0[2], w0[3], selector); w7[2] = hc_byte_perm (w0[1], w0[2], selector); w7[1] = hc_byte_perm (w0[0], w0[1], selector); w7[0] = hc_byte_perm ( 0, w0[0], selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_byte_perm (w7[3], 0, selector); c7[0] = hc_byte_perm (w7[2], w7[3], selector); c6[3] = hc_byte_perm (w7[1], w7[2], selector); c6[2] = hc_byte_perm (w7[0], w7[1], selector); c6[1] = hc_byte_perm (w6[3], w7[0], selector); c6[0] = hc_byte_perm (w6[2], w6[3], selector); c5[3] = hc_byte_perm (w6[1], w6[2], selector); c5[2] = hc_byte_perm (w6[0], w6[1], selector); c5[1] = hc_byte_perm (w5[3], w6[0], selector); c5[0] = hc_byte_perm (w5[2], w5[3], selector); c4[3] = hc_byte_perm (w5[1], w5[2], selector); c4[2] = hc_byte_perm (w5[0], w5[1], selector); c4[1] = hc_byte_perm (w4[3], w5[0], selector); c4[0] = hc_byte_perm (w4[2], w4[3], selector); c3[3] = hc_byte_perm (w4[1], w4[2], selector); c3[2] = hc_byte_perm (w4[0], w4[1], selector); c3[1] = hc_byte_perm (w3[3], w4[0], selector); c3[0] = hc_byte_perm (w3[2], w3[3], selector); c2[3] = hc_byte_perm (w3[1], w3[2], selector); c2[2] = hc_byte_perm (w3[0], w3[1], selector); c2[1] = hc_byte_perm (w2[3], w3[0], selector); c2[0] = hc_byte_perm (w2[2], w2[3], selector); c1[3] = hc_byte_perm (w2[1], w2[2], selector); c1[2] = hc_byte_perm (w2[0], w2[1], selector); c1[1] = hc_byte_perm (w1[3], w2[0], selector); c1[0] = hc_byte_perm (w1[2], w1[3], selector); c0[3] = hc_byte_perm (w1[1], w1[2], selector); c0[2] = hc_byte_perm (w1[0], w1[1], selector); c0[1] = hc_byte_perm (w0[3], w1[0], selector); c0[0] = hc_byte_perm (w0[2], w0[3], selector); w7[3] = hc_byte_perm (w0[1], w0[2], selector); w7[2] = hc_byte_perm (w0[0], w0[1], selector); w7[1] = hc_byte_perm ( 0, w0[0], selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_byte_perm (w7[3], 0, selector); c7[1] = hc_byte_perm (w7[2], w7[3], selector); c7[0] = hc_byte_perm (w7[1], w7[2], selector); c6[3] = hc_byte_perm (w7[0], w7[1], selector); c6[2] = hc_byte_perm (w6[3], w7[0], selector); c6[1] = hc_byte_perm (w6[2], w6[3], selector); c6[0] = hc_byte_perm (w6[1], w6[2], selector); c5[3] = hc_byte_perm (w6[0], w6[1], selector); c5[2] = hc_byte_perm (w5[3], w6[0], selector); c5[1] = hc_byte_perm (w5[2], w5[3], selector); c5[0] = hc_byte_perm (w5[1], w5[2], selector); c4[3] = hc_byte_perm (w5[0], w5[1], selector); c4[2] = hc_byte_perm (w4[3], w5[0], selector); c4[1] = hc_byte_perm (w4[2], w4[3], selector); c4[0] = hc_byte_perm (w4[1], w4[2], selector); c3[3] = hc_byte_perm (w4[0], w4[1], selector); c3[2] = hc_byte_perm (w3[3], w4[0], selector); c3[1] = hc_byte_perm (w3[2], w3[3], selector); c3[0] = hc_byte_perm (w3[1], w3[2], selector); c2[3] = hc_byte_perm (w3[0], w3[1], selector); c2[2] = hc_byte_perm (w2[3], w3[0], selector); c2[1] = hc_byte_perm (w2[2], w2[3], selector); c2[0] = hc_byte_perm (w2[1], w2[2], selector); c1[3] = hc_byte_perm (w2[0], w2[1], selector); c1[2] = hc_byte_perm (w1[3], w2[0], selector); c1[1] = hc_byte_perm (w1[2], w1[3], selector); c1[0] = hc_byte_perm (w1[1], w1[2], selector); c0[3] = hc_byte_perm (w1[0], w1[1], selector); c0[2] = hc_byte_perm (w0[3], w1[0], selector); c0[1] = hc_byte_perm (w0[2], w0[3], selector); c0[0] = hc_byte_perm (w0[1], w0[2], selector); w7[3] = hc_byte_perm (w0[0], w0[1], selector); w7[2] = hc_byte_perm ( 0, w0[0], selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_byte_perm (w7[3], 0, selector); c7[2] = hc_byte_perm (w7[2], w7[3], selector); c7[1] = hc_byte_perm (w7[1], w7[2], selector); c7[0] = hc_byte_perm (w7[0], w7[1], selector); c6[3] = hc_byte_perm (w6[3], w7[0], selector); c6[2] = hc_byte_perm (w6[2], w6[3], selector); c6[1] = hc_byte_perm (w6[1], w6[2], selector); c6[0] = hc_byte_perm (w6[0], w6[1], selector); c5[3] = hc_byte_perm (w5[3], w6[0], selector); c5[2] = hc_byte_perm (w5[2], w5[3], selector); c5[1] = hc_byte_perm (w5[1], w5[2], selector); c5[0] = hc_byte_perm (w5[0], w5[1], selector); c4[3] = hc_byte_perm (w4[3], w5[0], selector); c4[2] = hc_byte_perm (w4[2], w4[3], selector); c4[1] = hc_byte_perm (w4[1], w4[2], selector); c4[0] = hc_byte_perm (w4[0], w4[1], selector); c3[3] = hc_byte_perm (w3[3], w4[0], selector); c3[2] = hc_byte_perm (w3[2], w3[3], selector); c3[1] = hc_byte_perm (w3[1], w3[2], selector); c3[0] = hc_byte_perm (w3[0], w3[1], selector); c2[3] = hc_byte_perm (w2[3], w3[0], selector); c2[2] = hc_byte_perm (w2[2], w2[3], selector); c2[1] = hc_byte_perm (w2[1], w2[2], selector); c2[0] = hc_byte_perm (w2[0], w2[1], selector); c1[3] = hc_byte_perm (w1[3], w2[0], selector); c1[2] = hc_byte_perm (w1[2], w1[3], selector); c1[1] = hc_byte_perm (w1[1], w1[2], selector); c1[0] = hc_byte_perm (w1[0], w1[1], selector); c0[3] = hc_byte_perm (w0[3], w1[0], selector); c0[2] = hc_byte_perm (w0[2], w0[3], selector); c0[1] = hc_byte_perm (w0[1], w0[2], selector); c0[0] = hc_byte_perm (w0[0], w0[1], selector); w7[3] = hc_byte_perm ( 0, w0[0], selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w7[3] = hc_bytealign_be (w7[2], w7[3], offset); w7[2] = hc_bytealign_be (w7[1], w7[2], offset); w7[1] = hc_bytealign_be (w7[0], w7[1], offset); w7[0] = hc_bytealign_be (w6[3], w7[0], offset); w6[3] = hc_bytealign_be (w6[2], w6[3], offset); w6[2] = hc_bytealign_be (w6[1], w6[2], offset); w6[1] = hc_bytealign_be (w6[0], w6[1], offset); w6[0] = hc_bytealign_be (w5[3], w6[0], offset); w5[3] = hc_bytealign_be (w5[2], w5[3], offset); w5[2] = hc_bytealign_be (w5[1], w5[2], offset); w5[1] = hc_bytealign_be (w5[0], w5[1], offset); w5[0] = hc_bytealign_be (w4[3], w5[0], offset); w4[3] = hc_bytealign_be (w4[2], w4[3], offset); w4[2] = hc_bytealign_be (w4[1], w4[2], offset); w4[1] = hc_bytealign_be (w4[0], w4[1], offset); w4[0] = hc_bytealign_be (w3[3], w4[0], offset); w3[3] = hc_bytealign_be (w3[2], w3[3], offset); w3[2] = hc_bytealign_be (w3[1], w3[2], offset); w3[1] = hc_bytealign_be (w3[0], w3[1], offset); w3[0] = hc_bytealign_be (w2[3], w3[0], offset); w2[3] = hc_bytealign_be (w2[2], w2[3], offset); w2[2] = hc_bytealign_be (w2[1], w2[2], offset); w2[1] = hc_bytealign_be (w2[0], w2[1], offset); w2[0] = hc_bytealign_be (w1[3], w2[0], offset); w1[3] = hc_bytealign_be (w1[2], w1[3], offset); w1[2] = hc_bytealign_be (w1[1], w1[2], offset); w1[1] = hc_bytealign_be (w1[0], w1[1], offset); w1[0] = hc_bytealign_be (w0[3], w1[0], offset); w0[3] = hc_bytealign_be (w0[2], w0[3], offset); w0[2] = hc_bytealign_be (w0[1], w0[2], offset); w0[1] = hc_bytealign_be (w0[0], w0[1], offset); w0[0] = hc_bytealign_be ( 0, w0[0], offset); break; case 1: w7[3] = hc_bytealign_be (w7[1], w7[2], offset); w7[2] = hc_bytealign_be (w7[0], w7[1], offset); w7[1] = hc_bytealign_be (w6[3], w7[0], offset); w7[0] = hc_bytealign_be (w6[2], w6[3], offset); w6[3] = hc_bytealign_be (w6[1], w6[2], offset); w6[2] = hc_bytealign_be (w6[0], w6[1], offset); w6[1] = hc_bytealign_be (w5[3], w6[0], offset); w6[0] = hc_bytealign_be (w5[2], w5[3], offset); w5[3] = hc_bytealign_be (w5[1], w5[2], offset); w5[2] = hc_bytealign_be (w5[0], w5[1], offset); w5[1] = hc_bytealign_be (w4[3], w5[0], offset); w5[0] = hc_bytealign_be (w4[2], w4[3], offset); w4[3] = hc_bytealign_be (w4[1], w4[2], offset); w4[2] = hc_bytealign_be (w4[0], w4[1], offset); w4[1] = hc_bytealign_be (w3[3], w4[0], offset); w4[0] = hc_bytealign_be (w3[2], w3[3], offset); w3[3] = hc_bytealign_be (w3[1], w3[2], offset); w3[2] = hc_bytealign_be (w3[0], w3[1], offset); w3[1] = hc_bytealign_be (w2[3], w3[0], offset); w3[0] = hc_bytealign_be (w2[2], w2[3], offset); w2[3] = hc_bytealign_be (w2[1], w2[2], offset); w2[2] = hc_bytealign_be (w2[0], w2[1], offset); w2[1] = hc_bytealign_be (w1[3], w2[0], offset); w2[0] = hc_bytealign_be (w1[2], w1[3], offset); w1[3] = hc_bytealign_be (w1[1], w1[2], offset); w1[2] = hc_bytealign_be (w1[0], w1[1], offset); w1[1] = hc_bytealign_be (w0[3], w1[0], offset); w1[0] = hc_bytealign_be (w0[2], w0[3], offset); w0[3] = hc_bytealign_be (w0[1], w0[2], offset); w0[2] = hc_bytealign_be (w0[0], w0[1], offset); w0[1] = hc_bytealign_be ( 0, w0[0], offset); w0[0] = 0; break; case 2: w7[3] = hc_bytealign_be (w7[0], w7[1], offset); w7[2] = hc_bytealign_be (w6[3], w7[0], offset); w7[1] = hc_bytealign_be (w6[2], w6[3], offset); w7[0] = hc_bytealign_be (w6[1], w6[2], offset); w6[3] = hc_bytealign_be (w6[0], w6[1], offset); w6[2] = hc_bytealign_be (w5[3], w6[0], offset); w6[1] = hc_bytealign_be (w5[2], w5[3], offset); w6[0] = hc_bytealign_be (w5[1], w5[2], offset); w5[3] = hc_bytealign_be (w5[0], w5[1], offset); w5[2] = hc_bytealign_be (w4[3], w5[0], offset); w5[1] = hc_bytealign_be (w4[2], w4[3], offset); w5[0] = hc_bytealign_be (w4[1], w4[2], offset); w4[3] = hc_bytealign_be (w4[0], w4[1], offset); w4[2] = hc_bytealign_be (w3[3], w4[0], offset); w4[1] = hc_bytealign_be (w3[2], w3[3], offset); w4[0] = hc_bytealign_be (w3[1], w3[2], offset); w3[3] = hc_bytealign_be (w3[0], w3[1], offset); w3[2] = hc_bytealign_be (w2[3], w3[0], offset); w3[1] = hc_bytealign_be (w2[2], w2[3], offset); w3[0] = hc_bytealign_be (w2[1], w2[2], offset); w2[3] = hc_bytealign_be (w2[0], w2[1], offset); w2[2] = hc_bytealign_be (w1[3], w2[0], offset); w2[1] = hc_bytealign_be (w1[2], w1[3], offset); w2[0] = hc_bytealign_be (w1[1], w1[2], offset); w1[3] = hc_bytealign_be (w1[0], w1[1], offset); w1[2] = hc_bytealign_be (w0[3], w1[0], offset); w1[1] = hc_bytealign_be (w0[2], w0[3], offset); w1[0] = hc_bytealign_be (w0[1], w0[2], offset); w0[3] = hc_bytealign_be (w0[0], w0[1], offset); w0[2] = hc_bytealign_be ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_bytealign_be (w6[3], w7[0], offset); w7[2] = hc_bytealign_be (w6[2], w6[3], offset); w7[1] = hc_bytealign_be (w6[1], w6[2], offset); w7[0] = hc_bytealign_be (w6[0], w6[1], offset); w6[3] = hc_bytealign_be (w5[3], w6[0], offset); w6[2] = hc_bytealign_be (w5[2], w5[3], offset); w6[1] = hc_bytealign_be (w5[1], w5[2], offset); w6[0] = hc_bytealign_be (w5[0], w5[1], offset); w5[3] = hc_bytealign_be (w4[3], w5[0], offset); w5[2] = hc_bytealign_be (w4[2], w4[3], offset); w5[1] = hc_bytealign_be (w4[1], w4[2], offset); w5[0] = hc_bytealign_be (w4[0], w4[1], offset); w4[3] = hc_bytealign_be (w3[3], w4[0], offset); w4[2] = hc_bytealign_be (w3[2], w3[3], offset); w4[1] = hc_bytealign_be (w3[1], w3[2], offset); w4[0] = hc_bytealign_be (w3[0], w3[1], offset); w3[3] = hc_bytealign_be (w2[3], w3[0], offset); w3[2] = hc_bytealign_be (w2[2], w2[3], offset); w3[1] = hc_bytealign_be (w2[1], w2[2], offset); w3[0] = hc_bytealign_be (w2[0], w2[1], offset); w2[3] = hc_bytealign_be (w1[3], w2[0], offset); w2[2] = hc_bytealign_be (w1[2], w1[3], offset); w2[1] = hc_bytealign_be (w1[1], w1[2], offset); w2[0] = hc_bytealign_be (w1[0], w1[1], offset); w1[3] = hc_bytealign_be (w0[3], w1[0], offset); w1[2] = hc_bytealign_be (w0[2], w0[3], offset); w1[1] = hc_bytealign_be (w0[1], w0[2], offset); w1[0] = hc_bytealign_be (w0[0], w0[1], offset); w0[3] = hc_bytealign_be ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_bytealign_be (w6[2], w6[3], offset); w7[2] = hc_bytealign_be (w6[1], w6[2], offset); w7[1] = hc_bytealign_be (w6[0], w6[1], offset); w7[0] = hc_bytealign_be (w5[3], w6[0], offset); w6[3] = hc_bytealign_be (w5[2], w5[3], offset); w6[2] = hc_bytealign_be (w5[1], w5[2], offset); w6[1] = hc_bytealign_be (w5[0], w5[1], offset); w6[0] = hc_bytealign_be (w4[3], w5[0], offset); w5[3] = hc_bytealign_be (w4[2], w4[3], offset); w5[2] = hc_bytealign_be (w4[1], w4[2], offset); w5[1] = hc_bytealign_be (w4[0], w4[1], offset); w5[0] = hc_bytealign_be (w3[3], w4[0], offset); w4[3] = hc_bytealign_be (w3[2], w3[3], offset); w4[2] = hc_bytealign_be (w3[1], w3[2], offset); w4[1] = hc_bytealign_be (w3[0], w3[1], offset); w4[0] = hc_bytealign_be (w2[3], w3[0], offset); w3[3] = hc_bytealign_be (w2[2], w2[3], offset); w3[2] = hc_bytealign_be (w2[1], w2[2], offset); w3[1] = hc_bytealign_be (w2[0], w2[1], offset); w3[0] = hc_bytealign_be (w1[3], w2[0], offset); w2[3] = hc_bytealign_be (w1[2], w1[3], offset); w2[2] = hc_bytealign_be (w1[1], w1[2], offset); w2[1] = hc_bytealign_be (w1[0], w1[1], offset); w2[0] = hc_bytealign_be (w0[3], w1[0], offset); w1[3] = hc_bytealign_be (w0[2], w0[3], offset); w1[2] = hc_bytealign_be (w0[1], w0[2], offset); w1[1] = hc_bytealign_be (w0[0], w0[1], offset); w1[0] = hc_bytealign_be ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_bytealign_be (w6[1], w6[2], offset); w7[2] = hc_bytealign_be (w6[0], w6[1], offset); w7[1] = hc_bytealign_be (w5[3], w6[0], offset); w7[0] = hc_bytealign_be (w5[2], w5[3], offset); w6[3] = hc_bytealign_be (w5[1], w5[2], offset); w6[2] = hc_bytealign_be (w5[0], w5[1], offset); w6[1] = hc_bytealign_be (w4[3], w5[0], offset); w6[0] = hc_bytealign_be (w4[2], w4[3], offset); w5[3] = hc_bytealign_be (w4[1], w4[2], offset); w5[2] = hc_bytealign_be (w4[0], w4[1], offset); w5[1] = hc_bytealign_be (w3[3], w4[0], offset); w5[0] = hc_bytealign_be (w3[2], w3[3], offset); w4[3] = hc_bytealign_be (w3[1], w3[2], offset); w4[2] = hc_bytealign_be (w3[0], w3[1], offset); w4[1] = hc_bytealign_be (w2[3], w3[0], offset); w4[0] = hc_bytealign_be (w2[2], w2[3], offset); w3[3] = hc_bytealign_be (w2[1], w2[2], offset); w3[2] = hc_bytealign_be (w2[0], w2[1], offset); w3[1] = hc_bytealign_be (w1[3], w2[0], offset); w3[0] = hc_bytealign_be (w1[2], w1[3], offset); w2[3] = hc_bytealign_be (w1[1], w1[2], offset); w2[2] = hc_bytealign_be (w1[0], w1[1], offset); w2[1] = hc_bytealign_be (w0[3], w1[0], offset); w2[0] = hc_bytealign_be (w0[2], w0[3], offset); w1[3] = hc_bytealign_be (w0[1], w0[2], offset); w1[2] = hc_bytealign_be (w0[0], w0[1], offset); w1[1] = hc_bytealign_be ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_bytealign_be (w6[0], w6[1], offset); w7[2] = hc_bytealign_be (w5[3], w6[0], offset); w7[1] = hc_bytealign_be (w5[2], w5[3], offset); w7[0] = hc_bytealign_be (w5[1], w5[2], offset); w6[3] = hc_bytealign_be (w5[0], w5[1], offset); w6[2] = hc_bytealign_be (w4[3], w5[0], offset); w6[1] = hc_bytealign_be (w4[2], w4[3], offset); w6[0] = hc_bytealign_be (w4[1], w4[2], offset); w5[3] = hc_bytealign_be (w4[0], w4[1], offset); w5[2] = hc_bytealign_be (w3[3], w4[0], offset); w5[1] = hc_bytealign_be (w3[2], w3[3], offset); w5[0] = hc_bytealign_be (w3[1], w3[2], offset); w4[3] = hc_bytealign_be (w3[0], w3[1], offset); w4[2] = hc_bytealign_be (w2[3], w3[0], offset); w4[1] = hc_bytealign_be (w2[2], w2[3], offset); w4[0] = hc_bytealign_be (w2[1], w2[2], offset); w3[3] = hc_bytealign_be (w2[0], w2[1], offset); w3[2] = hc_bytealign_be (w1[3], w2[0], offset); w3[1] = hc_bytealign_be (w1[2], w1[3], offset); w3[0] = hc_bytealign_be (w1[1], w1[2], offset); w2[3] = hc_bytealign_be (w1[0], w1[1], offset); w2[2] = hc_bytealign_be (w0[3], w1[0], offset); w2[1] = hc_bytealign_be (w0[2], w0[3], offset); w2[0] = hc_bytealign_be (w0[1], w0[2], offset); w1[3] = hc_bytealign_be (w0[0], w0[1], offset); w1[2] = hc_bytealign_be ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_bytealign_be (w5[3], w6[0], offset); w7[2] = hc_bytealign_be (w5[2], w5[3], offset); w7[1] = hc_bytealign_be (w5[1], w5[2], offset); w7[0] = hc_bytealign_be (w5[0], w5[1], offset); w6[3] = hc_bytealign_be (w4[3], w5[0], offset); w6[2] = hc_bytealign_be (w4[2], w4[3], offset); w6[1] = hc_bytealign_be (w4[1], w4[2], offset); w6[0] = hc_bytealign_be (w4[0], w4[1], offset); w5[3] = hc_bytealign_be (w3[3], w4[0], offset); w5[2] = hc_bytealign_be (w3[2], w3[3], offset); w5[1] = hc_bytealign_be (w3[1], w3[2], offset); w5[0] = hc_bytealign_be (w3[0], w3[1], offset); w4[3] = hc_bytealign_be (w2[3], w3[0], offset); w4[2] = hc_bytealign_be (w2[2], w2[3], offset); w4[1] = hc_bytealign_be (w2[1], w2[2], offset); w4[0] = hc_bytealign_be (w2[0], w2[1], offset); w3[3] = hc_bytealign_be (w1[3], w2[0], offset); w3[2] = hc_bytealign_be (w1[2], w1[3], offset); w3[1] = hc_bytealign_be (w1[1], w1[2], offset); w3[0] = hc_bytealign_be (w1[0], w1[1], offset); w2[3] = hc_bytealign_be (w0[3], w1[0], offset); w2[2] = hc_bytealign_be (w0[2], w0[3], offset); w2[1] = hc_bytealign_be (w0[1], w0[2], offset); w2[0] = hc_bytealign_be (w0[0], w0[1], offset); w1[3] = hc_bytealign_be ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_bytealign_be (w5[2], w5[3], offset); w7[2] = hc_bytealign_be (w5[1], w5[2], offset); w7[1] = hc_bytealign_be (w5[0], w5[1], offset); w7[0] = hc_bytealign_be (w4[3], w5[0], offset); w6[3] = hc_bytealign_be (w4[2], w4[3], offset); w6[2] = hc_bytealign_be (w4[1], w4[2], offset); w6[1] = hc_bytealign_be (w4[0], w4[1], offset); w6[0] = hc_bytealign_be (w3[3], w4[0], offset); w5[3] = hc_bytealign_be (w3[2], w3[3], offset); w5[2] = hc_bytealign_be (w3[1], w3[2], offset); w5[1] = hc_bytealign_be (w3[0], w3[1], offset); w5[0] = hc_bytealign_be (w2[3], w3[0], offset); w4[3] = hc_bytealign_be (w2[2], w2[3], offset); w4[2] = hc_bytealign_be (w2[1], w2[2], offset); w4[1] = hc_bytealign_be (w2[0], w2[1], offset); w4[0] = hc_bytealign_be (w1[3], w2[0], offset); w3[3] = hc_bytealign_be (w1[2], w1[3], offset); w3[2] = hc_bytealign_be (w1[1], w1[2], offset); w3[1] = hc_bytealign_be (w1[0], w1[1], offset); w3[0] = hc_bytealign_be (w0[3], w1[0], offset); w2[3] = hc_bytealign_be (w0[2], w0[3], offset); w2[2] = hc_bytealign_be (w0[1], w0[2], offset); w2[1] = hc_bytealign_be (w0[0], w0[1], offset); w2[0] = hc_bytealign_be ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_bytealign_be (w5[1], w5[2], offset); w7[2] = hc_bytealign_be (w5[0], w5[1], offset); w7[1] = hc_bytealign_be (w4[3], w5[0], offset); w7[0] = hc_bytealign_be (w4[2], w4[3], offset); w6[3] = hc_bytealign_be (w4[1], w4[2], offset); w6[2] = hc_bytealign_be (w4[0], w4[1], offset); w6[1] = hc_bytealign_be (w3[3], w4[0], offset); w6[0] = hc_bytealign_be (w3[2], w3[3], offset); w5[3] = hc_bytealign_be (w3[1], w3[2], offset); w5[2] = hc_bytealign_be (w3[0], w3[1], offset); w5[1] = hc_bytealign_be (w2[3], w3[0], offset); w5[0] = hc_bytealign_be (w2[2], w2[3], offset); w4[3] = hc_bytealign_be (w2[1], w2[2], offset); w4[2] = hc_bytealign_be (w2[0], w2[1], offset); w4[1] = hc_bytealign_be (w1[3], w2[0], offset); w4[0] = hc_bytealign_be (w1[2], w1[3], offset); w3[3] = hc_bytealign_be (w1[1], w1[2], offset); w3[2] = hc_bytealign_be (w1[0], w1[1], offset); w3[1] = hc_bytealign_be (w0[3], w1[0], offset); w3[0] = hc_bytealign_be (w0[2], w0[3], offset); w2[3] = hc_bytealign_be (w0[1], w0[2], offset); w2[2] = hc_bytealign_be (w0[0], w0[1], offset); w2[1] = hc_bytealign_be ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_bytealign_be (w5[0], w5[1], offset); w7[2] = hc_bytealign_be (w4[3], w5[0], offset); w7[1] = hc_bytealign_be (w4[2], w4[3], offset); w7[0] = hc_bytealign_be (w4[1], w4[2], offset); w6[3] = hc_bytealign_be (w4[0], w4[1], offset); w6[2] = hc_bytealign_be (w3[3], w4[0], offset); w6[1] = hc_bytealign_be (w3[2], w3[3], offset); w6[0] = hc_bytealign_be (w3[1], w3[2], offset); w5[3] = hc_bytealign_be (w3[0], w3[1], offset); w5[2] = hc_bytealign_be (w2[3], w3[0], offset); w5[1] = hc_bytealign_be (w2[2], w2[3], offset); w5[0] = hc_bytealign_be (w2[1], w2[2], offset); w4[3] = hc_bytealign_be (w2[0], w2[1], offset); w4[2] = hc_bytealign_be (w1[3], w2[0], offset); w4[1] = hc_bytealign_be (w1[2], w1[3], offset); w4[0] = hc_bytealign_be (w1[1], w1[2], offset); w3[3] = hc_bytealign_be (w1[0], w1[1], offset); w3[2] = hc_bytealign_be (w0[3], w1[0], offset); w3[1] = hc_bytealign_be (w0[2], w0[3], offset); w3[0] = hc_bytealign_be (w0[1], w0[2], offset); w2[3] = hc_bytealign_be (w0[0], w0[1], offset); w2[2] = hc_bytealign_be ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_bytealign_be (w4[3], w5[0], offset); w7[2] = hc_bytealign_be (w4[2], w4[3], offset); w7[1] = hc_bytealign_be (w4[1], w4[2], offset); w7[0] = hc_bytealign_be (w4[0], w4[1], offset); w6[3] = hc_bytealign_be (w3[3], w4[0], offset); w6[2] = hc_bytealign_be (w3[2], w3[3], offset); w6[1] = hc_bytealign_be (w3[1], w3[2], offset); w6[0] = hc_bytealign_be (w3[0], w3[1], offset); w5[3] = hc_bytealign_be (w2[3], w3[0], offset); w5[2] = hc_bytealign_be (w2[2], w2[3], offset); w5[1] = hc_bytealign_be (w2[1], w2[2], offset); w5[0] = hc_bytealign_be (w2[0], w2[1], offset); w4[3] = hc_bytealign_be (w1[3], w2[0], offset); w4[2] = hc_bytealign_be (w1[2], w1[3], offset); w4[1] = hc_bytealign_be (w1[1], w1[2], offset); w4[0] = hc_bytealign_be (w1[0], w1[1], offset); w3[3] = hc_bytealign_be (w0[3], w1[0], offset); w3[2] = hc_bytealign_be (w0[2], w0[3], offset); w3[1] = hc_bytealign_be (w0[1], w0[2], offset); w3[0] = hc_bytealign_be (w0[0], w0[1], offset); w2[3] = hc_bytealign_be ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_bytealign_be (w4[2], w4[3], offset); w7[2] = hc_bytealign_be (w4[1], w4[2], offset); w7[1] = hc_bytealign_be (w4[0], w4[1], offset); w7[0] = hc_bytealign_be (w3[3], w4[0], offset); w6[3] = hc_bytealign_be (w3[2], w3[3], offset); w6[2] = hc_bytealign_be (w3[1], w3[2], offset); w6[1] = hc_bytealign_be (w3[0], w3[1], offset); w6[0] = hc_bytealign_be (w2[3], w3[0], offset); w5[3] = hc_bytealign_be (w2[2], w2[3], offset); w5[2] = hc_bytealign_be (w2[1], w2[2], offset); w5[1] = hc_bytealign_be (w2[0], w2[1], offset); w5[0] = hc_bytealign_be (w1[3], w2[0], offset); w4[3] = hc_bytealign_be (w1[2], w1[3], offset); w4[2] = hc_bytealign_be (w1[1], w1[2], offset); w4[1] = hc_bytealign_be (w1[0], w1[1], offset); w4[0] = hc_bytealign_be (w0[3], w1[0], offset); w3[3] = hc_bytealign_be (w0[2], w0[3], offset); w3[2] = hc_bytealign_be (w0[1], w0[2], offset); w3[1] = hc_bytealign_be (w0[0], w0[1], offset); w3[0] = hc_bytealign_be ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_bytealign_be (w4[1], w4[2], offset); w7[2] = hc_bytealign_be (w4[0], w4[1], offset); w7[1] = hc_bytealign_be (w3[3], w4[0], offset); w7[0] = hc_bytealign_be (w3[2], w3[3], offset); w6[3] = hc_bytealign_be (w3[1], w3[2], offset); w6[2] = hc_bytealign_be (w3[0], w3[1], offset); w6[1] = hc_bytealign_be (w2[3], w3[0], offset); w6[0] = hc_bytealign_be (w2[2], w2[3], offset); w5[3] = hc_bytealign_be (w2[1], w2[2], offset); w5[2] = hc_bytealign_be (w2[0], w2[1], offset); w5[1] = hc_bytealign_be (w1[3], w2[0], offset); w5[0] = hc_bytealign_be (w1[2], w1[3], offset); w4[3] = hc_bytealign_be (w1[1], w1[2], offset); w4[2] = hc_bytealign_be (w1[0], w1[1], offset); w4[1] = hc_bytealign_be (w0[3], w1[0], offset); w4[0] = hc_bytealign_be (w0[2], w0[3], offset); w3[3] = hc_bytealign_be (w0[1], w0[2], offset); w3[2] = hc_bytealign_be (w0[0], w0[1], offset); w3[1] = hc_bytealign_be ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_bytealign_be (w4[0], w4[1], offset); w7[2] = hc_bytealign_be (w3[3], w4[0], offset); w7[1] = hc_bytealign_be (w3[2], w3[3], offset); w7[0] = hc_bytealign_be (w3[1], w3[2], offset); w6[3] = hc_bytealign_be (w3[0], w3[1], offset); w6[2] = hc_bytealign_be (w2[3], w3[0], offset); w6[1] = hc_bytealign_be (w2[2], w2[3], offset); w6[0] = hc_bytealign_be (w2[1], w2[2], offset); w5[3] = hc_bytealign_be (w2[0], w2[1], offset); w5[2] = hc_bytealign_be (w1[3], w2[0], offset); w5[1] = hc_bytealign_be (w1[2], w1[3], offset); w5[0] = hc_bytealign_be (w1[1], w1[2], offset); w4[3] = hc_bytealign_be (w1[0], w1[1], offset); w4[2] = hc_bytealign_be (w0[3], w1[0], offset); w4[1] = hc_bytealign_be (w0[2], w0[3], offset); w4[0] = hc_bytealign_be (w0[1], w0[2], offset); w3[3] = hc_bytealign_be (w0[0], w0[1], offset); w3[2] = hc_bytealign_be ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_bytealign_be (w3[3], w4[0], offset); w7[2] = hc_bytealign_be (w3[2], w3[3], offset); w7[1] = hc_bytealign_be (w3[1], w3[2], offset); w7[0] = hc_bytealign_be (w3[0], w3[1], offset); w6[3] = hc_bytealign_be (w2[3], w3[0], offset); w6[2] = hc_bytealign_be (w2[2], w2[3], offset); w6[1] = hc_bytealign_be (w2[1], w2[2], offset); w6[0] = hc_bytealign_be (w2[0], w2[1], offset); w5[3] = hc_bytealign_be (w1[3], w2[0], offset); w5[2] = hc_bytealign_be (w1[2], w1[3], offset); w5[1] = hc_bytealign_be (w1[1], w1[2], offset); w5[0] = hc_bytealign_be (w1[0], w1[1], offset); w4[3] = hc_bytealign_be (w0[3], w1[0], offset); w4[2] = hc_bytealign_be (w0[2], w0[3], offset); w4[1] = hc_bytealign_be (w0[1], w0[2], offset); w4[0] = hc_bytealign_be (w0[0], w0[1], offset); w3[3] = hc_bytealign_be ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_bytealign_be (w3[2], w3[3], offset); w7[2] = hc_bytealign_be (w3[1], w3[2], offset); w7[1] = hc_bytealign_be (w3[0], w3[1], offset); w7[0] = hc_bytealign_be (w2[3], w3[0], offset); w6[3] = hc_bytealign_be (w2[2], w2[3], offset); w6[2] = hc_bytealign_be (w2[1], w2[2], offset); w6[1] = hc_bytealign_be (w2[0], w2[1], offset); w6[0] = hc_bytealign_be (w1[3], w2[0], offset); w5[3] = hc_bytealign_be (w1[2], w1[3], offset); w5[2] = hc_bytealign_be (w1[1], w1[2], offset); w5[1] = hc_bytealign_be (w1[0], w1[1], offset); w5[0] = hc_bytealign_be (w0[3], w1[0], offset); w4[3] = hc_bytealign_be (w0[2], w0[3], offset); w4[2] = hc_bytealign_be (w0[1], w0[2], offset); w4[1] = hc_bytealign_be (w0[0], w0[1], offset); w4[0] = hc_bytealign_be ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_bytealign_be (w3[1], w3[2], offset); w7[2] = hc_bytealign_be (w3[0], w3[1], offset); w7[1] = hc_bytealign_be (w2[3], w3[0], offset); w7[0] = hc_bytealign_be (w2[2], w2[3], offset); w6[3] = hc_bytealign_be (w2[1], w2[2], offset); w6[2] = hc_bytealign_be (w2[0], w2[1], offset); w6[1] = hc_bytealign_be (w1[3], w2[0], offset); w6[0] = hc_bytealign_be (w1[2], w1[3], offset); w5[3] = hc_bytealign_be (w1[1], w1[2], offset); w5[2] = hc_bytealign_be (w1[0], w1[1], offset); w5[1] = hc_bytealign_be (w0[3], w1[0], offset); w5[0] = hc_bytealign_be (w0[2], w0[3], offset); w4[3] = hc_bytealign_be (w0[1], w0[2], offset); w4[2] = hc_bytealign_be (w0[0], w0[1], offset); w4[1] = hc_bytealign_be ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_bytealign_be (w3[0], w3[1], offset); w7[2] = hc_bytealign_be (w2[3], w3[0], offset); w7[1] = hc_bytealign_be (w2[2], w2[3], offset); w7[0] = hc_bytealign_be (w2[1], w2[2], offset); w6[3] = hc_bytealign_be (w2[0], w2[1], offset); w6[2] = hc_bytealign_be (w1[3], w2[0], offset); w6[1] = hc_bytealign_be (w1[2], w1[3], offset); w6[0] = hc_bytealign_be (w1[1], w1[2], offset); w5[3] = hc_bytealign_be (w1[0], w1[1], offset); w5[2] = hc_bytealign_be (w0[3], w1[0], offset); w5[1] = hc_bytealign_be (w0[2], w0[3], offset); w5[0] = hc_bytealign_be (w0[1], w0[2], offset); w4[3] = hc_bytealign_be (w0[0], w0[1], offset); w4[2] = hc_bytealign_be ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_bytealign_be (w2[3], w3[0], offset); w7[2] = hc_bytealign_be (w2[2], w2[3], offset); w7[1] = hc_bytealign_be (w2[1], w2[2], offset); w7[0] = hc_bytealign_be (w2[0], w2[1], offset); w6[3] = hc_bytealign_be (w1[3], w2[0], offset); w6[2] = hc_bytealign_be (w1[2], w1[3], offset); w6[1] = hc_bytealign_be (w1[1], w1[2], offset); w6[0] = hc_bytealign_be (w1[0], w1[1], offset); w5[3] = hc_bytealign_be (w0[3], w1[0], offset); w5[2] = hc_bytealign_be (w0[2], w0[3], offset); w5[1] = hc_bytealign_be (w0[1], w0[2], offset); w5[0] = hc_bytealign_be (w0[0], w0[1], offset); w4[3] = hc_bytealign_be ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_bytealign_be (w2[2], w2[3], offset); w7[2] = hc_bytealign_be (w2[1], w2[2], offset); w7[1] = hc_bytealign_be (w2[0], w2[1], offset); w7[0] = hc_bytealign_be (w1[3], w2[0], offset); w6[3] = hc_bytealign_be (w1[2], w1[3], offset); w6[2] = hc_bytealign_be (w1[1], w1[2], offset); w6[1] = hc_bytealign_be (w1[0], w1[1], offset); w6[0] = hc_bytealign_be (w0[3], w1[0], offset); w5[3] = hc_bytealign_be (w0[2], w0[3], offset); w5[2] = hc_bytealign_be (w0[1], w0[2], offset); w5[1] = hc_bytealign_be (w0[0], w0[1], offset); w5[0] = hc_bytealign_be ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_bytealign_be (w2[1], w2[2], offset); w7[2] = hc_bytealign_be (w2[0], w2[1], offset); w7[1] = hc_bytealign_be (w1[3], w2[0], offset); w7[0] = hc_bytealign_be (w1[2], w1[3], offset); w6[3] = hc_bytealign_be (w1[1], w1[2], offset); w6[2] = hc_bytealign_be (w1[0], w1[1], offset); w6[1] = hc_bytealign_be (w0[3], w1[0], offset); w6[0] = hc_bytealign_be (w0[2], w0[3], offset); w5[3] = hc_bytealign_be (w0[1], w0[2], offset); w5[2] = hc_bytealign_be (w0[0], w0[1], offset); w5[1] = hc_bytealign_be ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_bytealign_be (w2[0], w2[1], offset); w7[2] = hc_bytealign_be (w1[3], w2[0], offset); w7[1] = hc_bytealign_be (w1[2], w1[3], offset); w7[0] = hc_bytealign_be (w1[1], w1[2], offset); w6[3] = hc_bytealign_be (w1[0], w1[1], offset); w6[2] = hc_bytealign_be (w0[3], w1[0], offset); w6[1] = hc_bytealign_be (w0[2], w0[3], offset); w6[0] = hc_bytealign_be (w0[1], w0[2], offset); w5[3] = hc_bytealign_be (w0[0], w0[1], offset); w5[2] = hc_bytealign_be ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_bytealign_be (w1[3], w2[0], offset); w7[2] = hc_bytealign_be (w1[2], w1[3], offset); w7[1] = hc_bytealign_be (w1[1], w1[2], offset); w7[0] = hc_bytealign_be (w1[0], w1[1], offset); w6[3] = hc_bytealign_be (w0[3], w1[0], offset); w6[2] = hc_bytealign_be (w0[2], w0[3], offset); w6[1] = hc_bytealign_be (w0[1], w0[2], offset); w6[0] = hc_bytealign_be (w0[0], w0[1], offset); w5[3] = hc_bytealign_be ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_bytealign_be (w1[2], w1[3], offset); w7[2] = hc_bytealign_be (w1[1], w1[2], offset); w7[1] = hc_bytealign_be (w1[0], w1[1], offset); w7[0] = hc_bytealign_be (w0[3], w1[0], offset); w6[3] = hc_bytealign_be (w0[2], w0[3], offset); w6[2] = hc_bytealign_be (w0[1], w0[2], offset); w6[1] = hc_bytealign_be (w0[0], w0[1], offset); w6[0] = hc_bytealign_be ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_bytealign_be (w1[1], w1[2], offset); w7[2] = hc_bytealign_be (w1[0], w1[1], offset); w7[1] = hc_bytealign_be (w0[3], w1[0], offset); w7[0] = hc_bytealign_be (w0[2], w0[3], offset); w6[3] = hc_bytealign_be (w0[1], w0[2], offset); w6[2] = hc_bytealign_be (w0[0], w0[1], offset); w6[1] = hc_bytealign_be ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_bytealign_be (w1[0], w1[1], offset); w7[2] = hc_bytealign_be (w0[3], w1[0], offset); w7[1] = hc_bytealign_be (w0[2], w0[3], offset); w7[0] = hc_bytealign_be (w0[1], w0[2], offset); w6[3] = hc_bytealign_be (w0[0], w0[1], offset); w6[2] = hc_bytealign_be ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_bytealign_be (w0[3], w1[0], offset); w7[2] = hc_bytealign_be (w0[2], w0[3], offset); w7[1] = hc_bytealign_be (w0[1], w0[2], offset); w7[0] = hc_bytealign_be (w0[0], w0[1], offset); w6[3] = hc_bytealign_be ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_bytealign_be (w0[2], w0[3], offset); w7[2] = hc_bytealign_be (w0[1], w0[2], offset); w7[1] = hc_bytealign_be (w0[0], w0[1], offset); w7[0] = hc_bytealign_be ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_bytealign_be (w0[1], w0[2], offset); w7[2] = hc_bytealign_be (w0[0], w0[1], offset); w7[1] = hc_bytealign_be ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_bytealign_be (w0[0], w0[1], offset); w7[2] = hc_bytealign_be ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_bytealign_be ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w7[3] = hc_byte_perm (w7[3], w7[2], selector); w7[2] = hc_byte_perm (w7[2], w7[1], selector); w7[1] = hc_byte_perm (w7[1], w7[0], selector); w7[0] = hc_byte_perm (w7[0], w6[3], selector); w6[3] = hc_byte_perm (w6[3], w6[2], selector); w6[2] = hc_byte_perm (w6[2], w6[1], selector); w6[1] = hc_byte_perm (w6[1], w6[0], selector); w6[0] = hc_byte_perm (w6[0], w5[3], selector); w5[3] = hc_byte_perm (w5[3], w5[2], selector); w5[2] = hc_byte_perm (w5[2], w5[1], selector); w5[1] = hc_byte_perm (w5[1], w5[0], selector); w5[0] = hc_byte_perm (w5[0], w4[3], selector); w4[3] = hc_byte_perm (w4[3], w4[2], selector); w4[2] = hc_byte_perm (w4[2], w4[1], selector); w4[1] = hc_byte_perm (w4[1], w4[0], selector); w4[0] = hc_byte_perm (w4[0], w3[3], selector); w3[3] = hc_byte_perm (w3[3], w3[2], selector); w3[2] = hc_byte_perm (w3[2], w3[1], selector); w3[1] = hc_byte_perm (w3[1], w3[0], selector); w3[0] = hc_byte_perm (w3[0], w2[3], selector); w2[3] = hc_byte_perm (w2[3], w2[2], selector); w2[2] = hc_byte_perm (w2[2], w2[1], selector); w2[1] = hc_byte_perm (w2[1], w2[0], selector); w2[0] = hc_byte_perm (w2[0], w1[3], selector); w1[3] = hc_byte_perm (w1[3], w1[2], selector); w1[2] = hc_byte_perm (w1[2], w1[1], selector); w1[1] = hc_byte_perm (w1[1], w1[0], selector); w1[0] = hc_byte_perm (w1[0], w0[3], selector); w0[3] = hc_byte_perm (w0[3], w0[2], selector); w0[2] = hc_byte_perm (w0[2], w0[1], selector); w0[1] = hc_byte_perm (w0[1], w0[0], selector); w0[0] = hc_byte_perm (w0[0], 0, selector); break; case 1: w7[3] = hc_byte_perm (w7[2], w7[1], selector); w7[2] = hc_byte_perm (w7[1], w7[0], selector); w7[1] = hc_byte_perm (w7[0], w6[3], selector); w7[0] = hc_byte_perm (w6[3], w6[2], selector); w6[3] = hc_byte_perm (w6[2], w6[1], selector); w6[2] = hc_byte_perm (w6[1], w6[0], selector); w6[1] = hc_byte_perm (w6[0], w5[3], selector); w6[0] = hc_byte_perm (w5[3], w5[2], selector); w5[3] = hc_byte_perm (w5[2], w5[1], selector); w5[2] = hc_byte_perm (w5[1], w5[0], selector); w5[1] = hc_byte_perm (w5[0], w4[3], selector); w5[0] = hc_byte_perm (w4[3], w4[2], selector); w4[3] = hc_byte_perm (w4[2], w4[1], selector); w4[2] = hc_byte_perm (w4[1], w4[0], selector); w4[1] = hc_byte_perm (w4[0], w3[3], selector); w4[0] = hc_byte_perm (w3[3], w3[2], selector); w3[3] = hc_byte_perm (w3[2], w3[1], selector); w3[2] = hc_byte_perm (w3[1], w3[0], selector); w3[1] = hc_byte_perm (w3[0], w2[3], selector); w3[0] = hc_byte_perm (w2[3], w2[2], selector); w2[3] = hc_byte_perm (w2[2], w2[1], selector); w2[2] = hc_byte_perm (w2[1], w2[0], selector); w2[1] = hc_byte_perm (w2[0], w1[3], selector); w2[0] = hc_byte_perm (w1[3], w1[2], selector); w1[3] = hc_byte_perm (w1[2], w1[1], selector); w1[2] = hc_byte_perm (w1[1], w1[0], selector); w1[1] = hc_byte_perm (w1[0], w0[3], selector); w1[0] = hc_byte_perm (w0[3], w0[2], selector); w0[3] = hc_byte_perm (w0[2], w0[1], selector); w0[2] = hc_byte_perm (w0[1], w0[0], selector); w0[1] = hc_byte_perm (w0[0], 0, selector); w0[0] = 0; break; case 2: w7[3] = hc_byte_perm (w7[1], w7[0], selector); w7[2] = hc_byte_perm (w7[0], w6[3], selector); w7[1] = hc_byte_perm (w6[3], w6[2], selector); w7[0] = hc_byte_perm (w6[2], w6[1], selector); w6[3] = hc_byte_perm (w6[1], w6[0], selector); w6[2] = hc_byte_perm (w6[0], w5[3], selector); w6[1] = hc_byte_perm (w5[3], w5[2], selector); w6[0] = hc_byte_perm (w5[2], w5[1], selector); w5[3] = hc_byte_perm (w5[1], w5[0], selector); w5[2] = hc_byte_perm (w5[0], w4[3], selector); w5[1] = hc_byte_perm (w4[3], w4[2], selector); w5[0] = hc_byte_perm (w4[2], w4[1], selector); w4[3] = hc_byte_perm (w4[1], w4[0], selector); w4[2] = hc_byte_perm (w4[0], w3[3], selector); w4[1] = hc_byte_perm (w3[3], w3[2], selector); w4[0] = hc_byte_perm (w3[2], w3[1], selector); w3[3] = hc_byte_perm (w3[1], w3[0], selector); w3[2] = hc_byte_perm (w3[0], w2[3], selector); w3[1] = hc_byte_perm (w2[3], w2[2], selector); w3[0] = hc_byte_perm (w2[2], w2[1], selector); w2[3] = hc_byte_perm (w2[1], w2[0], selector); w2[2] = hc_byte_perm (w2[0], w1[3], selector); w2[1] = hc_byte_perm (w1[3], w1[2], selector); w2[0] = hc_byte_perm (w1[2], w1[1], selector); w1[3] = hc_byte_perm (w1[1], w1[0], selector); w1[2] = hc_byte_perm (w1[0], w0[3], selector); w1[1] = hc_byte_perm (w0[3], w0[2], selector); w1[0] = hc_byte_perm (w0[2], w0[1], selector); w0[3] = hc_byte_perm (w0[1], w0[0], selector); w0[2] = hc_byte_perm (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_byte_perm (w7[0], w6[3], selector); w7[2] = hc_byte_perm (w6[3], w6[2], selector); w7[1] = hc_byte_perm (w6[2], w6[1], selector); w7[0] = hc_byte_perm (w6[1], w6[0], selector); w6[3] = hc_byte_perm (w6[0], w5[3], selector); w6[2] = hc_byte_perm (w5[3], w5[2], selector); w6[1] = hc_byte_perm (w5[2], w5[1], selector); w6[0] = hc_byte_perm (w5[1], w5[0], selector); w5[3] = hc_byte_perm (w5[0], w4[3], selector); w5[2] = hc_byte_perm (w4[3], w4[2], selector); w5[1] = hc_byte_perm (w4[2], w4[1], selector); w5[0] = hc_byte_perm (w4[1], w4[0], selector); w4[3] = hc_byte_perm (w4[0], w3[3], selector); w4[2] = hc_byte_perm (w3[3], w3[2], selector); w4[1] = hc_byte_perm (w3[2], w3[1], selector); w4[0] = hc_byte_perm (w3[1], w3[0], selector); w3[3] = hc_byte_perm (w3[0], w2[3], selector); w3[2] = hc_byte_perm (w2[3], w2[2], selector); w3[1] = hc_byte_perm (w2[2], w2[1], selector); w3[0] = hc_byte_perm (w2[1], w2[0], selector); w2[3] = hc_byte_perm (w2[0], w1[3], selector); w2[2] = hc_byte_perm (w1[3], w1[2], selector); w2[1] = hc_byte_perm (w1[2], w1[1], selector); w2[0] = hc_byte_perm (w1[1], w1[0], selector); w1[3] = hc_byte_perm (w1[0], w0[3], selector); w1[2] = hc_byte_perm (w0[3], w0[2], selector); w1[1] = hc_byte_perm (w0[2], w0[1], selector); w1[0] = hc_byte_perm (w0[1], w0[0], selector); w0[3] = hc_byte_perm (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_byte_perm (w6[3], w6[2], selector); w7[2] = hc_byte_perm (w6[2], w6[1], selector); w7[1] = hc_byte_perm (w6[1], w6[0], selector); w7[0] = hc_byte_perm (w6[0], w5[3], selector); w6[3] = hc_byte_perm (w5[3], w5[2], selector); w6[2] = hc_byte_perm (w5[2], w5[1], selector); w6[1] = hc_byte_perm (w5[1], w5[0], selector); w6[0] = hc_byte_perm (w5[0], w4[3], selector); w5[3] = hc_byte_perm (w4[3], w4[2], selector); w5[2] = hc_byte_perm (w4[2], w4[1], selector); w5[1] = hc_byte_perm (w4[1], w4[0], selector); w5[0] = hc_byte_perm (w4[0], w3[3], selector); w4[3] = hc_byte_perm (w3[3], w3[2], selector); w4[2] = hc_byte_perm (w3[2], w3[1], selector); w4[1] = hc_byte_perm (w3[1], w3[0], selector); w4[0] = hc_byte_perm (w3[0], w2[3], selector); w3[3] = hc_byte_perm (w2[3], w2[2], selector); w3[2] = hc_byte_perm (w2[2], w2[1], selector); w3[1] = hc_byte_perm (w2[1], w2[0], selector); w3[0] = hc_byte_perm (w2[0], w1[3], selector); w2[3] = hc_byte_perm (w1[3], w1[2], selector); w2[2] = hc_byte_perm (w1[2], w1[1], selector); w2[1] = hc_byte_perm (w1[1], w1[0], selector); w2[0] = hc_byte_perm (w1[0], w0[3], selector); w1[3] = hc_byte_perm (w0[3], w0[2], selector); w1[2] = hc_byte_perm (w0[2], w0[1], selector); w1[1] = hc_byte_perm (w0[1], w0[0], selector); w1[0] = hc_byte_perm (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_byte_perm (w6[2], w6[1], selector); w7[2] = hc_byte_perm (w6[1], w6[0], selector); w7[1] = hc_byte_perm (w6[0], w5[3], selector); w7[0] = hc_byte_perm (w5[3], w5[2], selector); w6[3] = hc_byte_perm (w5[2], w5[1], selector); w6[2] = hc_byte_perm (w5[1], w5[0], selector); w6[1] = hc_byte_perm (w5[0], w4[3], selector); w6[0] = hc_byte_perm (w4[3], w4[2], selector); w5[3] = hc_byte_perm (w4[2], w4[1], selector); w5[2] = hc_byte_perm (w4[1], w4[0], selector); w5[1] = hc_byte_perm (w4[0], w3[3], selector); w5[0] = hc_byte_perm (w3[3], w3[2], selector); w4[3] = hc_byte_perm (w3[2], w3[1], selector); w4[2] = hc_byte_perm (w3[1], w3[0], selector); w4[1] = hc_byte_perm (w3[0], w2[3], selector); w4[0] = hc_byte_perm (w2[3], w2[2], selector); w3[3] = hc_byte_perm (w2[2], w2[1], selector); w3[2] = hc_byte_perm (w2[1], w2[0], selector); w3[1] = hc_byte_perm (w2[0], w1[3], selector); w3[0] = hc_byte_perm (w1[3], w1[2], selector); w2[3] = hc_byte_perm (w1[2], w1[1], selector); w2[2] = hc_byte_perm (w1[1], w1[0], selector); w2[1] = hc_byte_perm (w1[0], w0[3], selector); w2[0] = hc_byte_perm (w0[3], w0[2], selector); w1[3] = hc_byte_perm (w0[2], w0[1], selector); w1[2] = hc_byte_perm (w0[1], w0[0], selector); w1[1] = hc_byte_perm (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_byte_perm (w6[1], w6[0], selector); w7[2] = hc_byte_perm (w6[0], w5[3], selector); w7[1] = hc_byte_perm (w5[3], w5[2], selector); w7[0] = hc_byte_perm (w5[2], w5[1], selector); w6[3] = hc_byte_perm (w5[1], w5[0], selector); w6[2] = hc_byte_perm (w5[0], w4[3], selector); w6[1] = hc_byte_perm (w4[3], w4[2], selector); w6[0] = hc_byte_perm (w4[2], w4[1], selector); w5[3] = hc_byte_perm (w4[1], w4[0], selector); w5[2] = hc_byte_perm (w4[0], w3[3], selector); w5[1] = hc_byte_perm (w3[3], w3[2], selector); w5[0] = hc_byte_perm (w3[2], w3[1], selector); w4[3] = hc_byte_perm (w3[1], w3[0], selector); w4[2] = hc_byte_perm (w3[0], w2[3], selector); w4[1] = hc_byte_perm (w2[3], w2[2], selector); w4[0] = hc_byte_perm (w2[2], w2[1], selector); w3[3] = hc_byte_perm (w2[1], w2[0], selector); w3[2] = hc_byte_perm (w2[0], w1[3], selector); w3[1] = hc_byte_perm (w1[3], w1[2], selector); w3[0] = hc_byte_perm (w1[2], w1[1], selector); w2[3] = hc_byte_perm (w1[1], w1[0], selector); w2[2] = hc_byte_perm (w1[0], w0[3], selector); w2[1] = hc_byte_perm (w0[3], w0[2], selector); w2[0] = hc_byte_perm (w0[2], w0[1], selector); w1[3] = hc_byte_perm (w0[1], w0[0], selector); w1[2] = hc_byte_perm (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_byte_perm (w6[0], w5[3], selector); w7[2] = hc_byte_perm (w5[3], w5[2], selector); w7[1] = hc_byte_perm (w5[2], w5[1], selector); w7[0] = hc_byte_perm (w5[1], w5[0], selector); w6[3] = hc_byte_perm (w5[0], w4[3], selector); w6[2] = hc_byte_perm (w4[3], w4[2], selector); w6[1] = hc_byte_perm (w4[2], w4[1], selector); w6[0] = hc_byte_perm (w4[1], w4[0], selector); w5[3] = hc_byte_perm (w4[0], w3[3], selector); w5[2] = hc_byte_perm (w3[3], w3[2], selector); w5[1] = hc_byte_perm (w3[2], w3[1], selector); w5[0] = hc_byte_perm (w3[1], w3[0], selector); w4[3] = hc_byte_perm (w3[0], w2[3], selector); w4[2] = hc_byte_perm (w2[3], w2[2], selector); w4[1] = hc_byte_perm (w2[2], w2[1], selector); w4[0] = hc_byte_perm (w2[1], w2[0], selector); w3[3] = hc_byte_perm (w2[0], w1[3], selector); w3[2] = hc_byte_perm (w1[3], w1[2], selector); w3[1] = hc_byte_perm (w1[2], w1[1], selector); w3[0] = hc_byte_perm (w1[1], w1[0], selector); w2[3] = hc_byte_perm (w1[0], w0[3], selector); w2[2] = hc_byte_perm (w0[3], w0[2], selector); w2[1] = hc_byte_perm (w0[2], w0[1], selector); w2[0] = hc_byte_perm (w0[1], w0[0], selector); w1[3] = hc_byte_perm (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_byte_perm (w5[3], w5[2], selector); w7[2] = hc_byte_perm (w5[2], w5[1], selector); w7[1] = hc_byte_perm (w5[1], w5[0], selector); w7[0] = hc_byte_perm (w5[0], w4[3], selector); w6[3] = hc_byte_perm (w4[3], w4[2], selector); w6[2] = hc_byte_perm (w4[2], w4[1], selector); w6[1] = hc_byte_perm (w4[1], w4[0], selector); w6[0] = hc_byte_perm (w4[0], w3[3], selector); w5[3] = hc_byte_perm (w3[3], w3[2], selector); w5[2] = hc_byte_perm (w3[2], w3[1], selector); w5[1] = hc_byte_perm (w3[1], w3[0], selector); w5[0] = hc_byte_perm (w3[0], w2[3], selector); w4[3] = hc_byte_perm (w2[3], w2[2], selector); w4[2] = hc_byte_perm (w2[2], w2[1], selector); w4[1] = hc_byte_perm (w2[1], w2[0], selector); w4[0] = hc_byte_perm (w2[0], w1[3], selector); w3[3] = hc_byte_perm (w1[3], w1[2], selector); w3[2] = hc_byte_perm (w1[2], w1[1], selector); w3[1] = hc_byte_perm (w1[1], w1[0], selector); w3[0] = hc_byte_perm (w1[0], w0[3], selector); w2[3] = hc_byte_perm (w0[3], w0[2], selector); w2[2] = hc_byte_perm (w0[2], w0[1], selector); w2[1] = hc_byte_perm (w0[1], w0[0], selector); w2[0] = hc_byte_perm (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_byte_perm (w5[2], w5[1], selector); w7[2] = hc_byte_perm (w5[1], w5[0], selector); w7[1] = hc_byte_perm (w5[0], w4[3], selector); w7[0] = hc_byte_perm (w4[3], w4[2], selector); w6[3] = hc_byte_perm (w4[2], w4[1], selector); w6[2] = hc_byte_perm (w4[1], w4[0], selector); w6[1] = hc_byte_perm (w4[0], w3[3], selector); w6[0] = hc_byte_perm (w3[3], w3[2], selector); w5[3] = hc_byte_perm (w3[2], w3[1], selector); w5[2] = hc_byte_perm (w3[1], w3[0], selector); w5[1] = hc_byte_perm (w3[0], w2[3], selector); w5[0] = hc_byte_perm (w2[3], w2[2], selector); w4[3] = hc_byte_perm (w2[2], w2[1], selector); w4[2] = hc_byte_perm (w2[1], w2[0], selector); w4[1] = hc_byte_perm (w2[0], w1[3], selector); w4[0] = hc_byte_perm (w1[3], w1[2], selector); w3[3] = hc_byte_perm (w1[2], w1[1], selector); w3[2] = hc_byte_perm (w1[1], w1[0], selector); w3[1] = hc_byte_perm (w1[0], w0[3], selector); w3[0] = hc_byte_perm (w0[3], w0[2], selector); w2[3] = hc_byte_perm (w0[2], w0[1], selector); w2[2] = hc_byte_perm (w0[1], w0[0], selector); w2[1] = hc_byte_perm (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_byte_perm (w5[1], w5[0], selector); w7[2] = hc_byte_perm (w5[0], w4[3], selector); w7[1] = hc_byte_perm (w4[3], w4[2], selector); w7[0] = hc_byte_perm (w4[2], w4[1], selector); w6[3] = hc_byte_perm (w4[1], w4[0], selector); w6[2] = hc_byte_perm (w4[0], w3[3], selector); w6[1] = hc_byte_perm (w3[3], w3[2], selector); w6[0] = hc_byte_perm (w3[2], w3[1], selector); w5[3] = hc_byte_perm (w3[1], w3[0], selector); w5[2] = hc_byte_perm (w3[0], w2[3], selector); w5[1] = hc_byte_perm (w2[3], w2[2], selector); w5[0] = hc_byte_perm (w2[2], w2[1], selector); w4[3] = hc_byte_perm (w2[1], w2[0], selector); w4[2] = hc_byte_perm (w2[0], w1[3], selector); w4[1] = hc_byte_perm (w1[3], w1[2], selector); w4[0] = hc_byte_perm (w1[2], w1[1], selector); w3[3] = hc_byte_perm (w1[1], w1[0], selector); w3[2] = hc_byte_perm (w1[0], w0[3], selector); w3[1] = hc_byte_perm (w0[3], w0[2], selector); w3[0] = hc_byte_perm (w0[2], w0[1], selector); w2[3] = hc_byte_perm (w0[1], w0[0], selector); w2[2] = hc_byte_perm (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_byte_perm (w5[0], w4[3], selector); w7[2] = hc_byte_perm (w4[3], w4[2], selector); w7[1] = hc_byte_perm (w4[2], w4[1], selector); w7[0] = hc_byte_perm (w4[1], w4[0], selector); w6[3] = hc_byte_perm (w4[0], w3[3], selector); w6[2] = hc_byte_perm (w3[3], w3[2], selector); w6[1] = hc_byte_perm (w3[2], w3[1], selector); w6[0] = hc_byte_perm (w3[1], w3[0], selector); w5[3] = hc_byte_perm (w3[0], w2[3], selector); w5[2] = hc_byte_perm (w2[3], w2[2], selector); w5[1] = hc_byte_perm (w2[2], w2[1], selector); w5[0] = hc_byte_perm (w2[1], w2[0], selector); w4[3] = hc_byte_perm (w2[0], w1[3], selector); w4[2] = hc_byte_perm (w1[3], w1[2], selector); w4[1] = hc_byte_perm (w1[2], w1[1], selector); w4[0] = hc_byte_perm (w1[1], w1[0], selector); w3[3] = hc_byte_perm (w1[0], w0[3], selector); w3[2] = hc_byte_perm (w0[3], w0[2], selector); w3[1] = hc_byte_perm (w0[2], w0[1], selector); w3[0] = hc_byte_perm (w0[1], w0[0], selector); w2[3] = hc_byte_perm (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_byte_perm (w4[3], w4[2], selector); w7[2] = hc_byte_perm (w4[2], w4[1], selector); w7[1] = hc_byte_perm (w4[1], w4[0], selector); w7[0] = hc_byte_perm (w4[0], w3[3], selector); w6[3] = hc_byte_perm (w3[3], w3[2], selector); w6[2] = hc_byte_perm (w3[2], w3[1], selector); w6[1] = hc_byte_perm (w3[1], w3[0], selector); w6[0] = hc_byte_perm (w3[0], w2[3], selector); w5[3] = hc_byte_perm (w2[3], w2[2], selector); w5[2] = hc_byte_perm (w2[2], w2[1], selector); w5[1] = hc_byte_perm (w2[1], w2[0], selector); w5[0] = hc_byte_perm (w2[0], w1[3], selector); w4[3] = hc_byte_perm (w1[3], w1[2], selector); w4[2] = hc_byte_perm (w1[2], w1[1], selector); w4[1] = hc_byte_perm (w1[1], w1[0], selector); w4[0] = hc_byte_perm (w1[0], w0[3], selector); w3[3] = hc_byte_perm (w0[3], w0[2], selector); w3[2] = hc_byte_perm (w0[2], w0[1], selector); w3[1] = hc_byte_perm (w0[1], w0[0], selector); w3[0] = hc_byte_perm (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_byte_perm (w4[2], w4[1], selector); w7[2] = hc_byte_perm (w4[1], w4[0], selector); w7[1] = hc_byte_perm (w4[0], w3[3], selector); w7[0] = hc_byte_perm (w3[3], w3[2], selector); w6[3] = hc_byte_perm (w3[2], w3[1], selector); w6[2] = hc_byte_perm (w3[1], w3[0], selector); w6[1] = hc_byte_perm (w3[0], w2[3], selector); w6[0] = hc_byte_perm (w2[3], w2[2], selector); w5[3] = hc_byte_perm (w2[2], w2[1], selector); w5[2] = hc_byte_perm (w2[1], w2[0], selector); w5[1] = hc_byte_perm (w2[0], w1[3], selector); w5[0] = hc_byte_perm (w1[3], w1[2], selector); w4[3] = hc_byte_perm (w1[2], w1[1], selector); w4[2] = hc_byte_perm (w1[1], w1[0], selector); w4[1] = hc_byte_perm (w1[0], w0[3], selector); w4[0] = hc_byte_perm (w0[3], w0[2], selector); w3[3] = hc_byte_perm (w0[2], w0[1], selector); w3[2] = hc_byte_perm (w0[1], w0[0], selector); w3[1] = hc_byte_perm (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_byte_perm (w4[1], w4[0], selector); w7[2] = hc_byte_perm (w4[0], w3[3], selector); w7[1] = hc_byte_perm (w3[3], w3[2], selector); w7[0] = hc_byte_perm (w3[2], w3[1], selector); w6[3] = hc_byte_perm (w3[1], w3[0], selector); w6[2] = hc_byte_perm (w3[0], w2[3], selector); w6[1] = hc_byte_perm (w2[3], w2[2], selector); w6[0] = hc_byte_perm (w2[2], w2[1], selector); w5[3] = hc_byte_perm (w2[1], w2[0], selector); w5[2] = hc_byte_perm (w2[0], w1[3], selector); w5[1] = hc_byte_perm (w1[3], w1[2], selector); w5[0] = hc_byte_perm (w1[2], w1[1], selector); w4[3] = hc_byte_perm (w1[1], w1[0], selector); w4[2] = hc_byte_perm (w1[0], w0[3], selector); w4[1] = hc_byte_perm (w0[3], w0[2], selector); w4[0] = hc_byte_perm (w0[2], w0[1], selector); w3[3] = hc_byte_perm (w0[1], w0[0], selector); w3[2] = hc_byte_perm (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_byte_perm (w4[0], w3[3], selector); w7[2] = hc_byte_perm (w3[3], w3[2], selector); w7[1] = hc_byte_perm (w3[2], w3[1], selector); w7[0] = hc_byte_perm (w3[1], w3[0], selector); w6[3] = hc_byte_perm (w3[0], w2[3], selector); w6[2] = hc_byte_perm (w2[3], w2[2], selector); w6[1] = hc_byte_perm (w2[2], w2[1], selector); w6[0] = hc_byte_perm (w2[1], w2[0], selector); w5[3] = hc_byte_perm (w2[0], w1[3], selector); w5[2] = hc_byte_perm (w1[3], w1[2], selector); w5[1] = hc_byte_perm (w1[2], w1[1], selector); w5[0] = hc_byte_perm (w1[1], w1[0], selector); w4[3] = hc_byte_perm (w1[0], w0[3], selector); w4[2] = hc_byte_perm (w0[3], w0[2], selector); w4[1] = hc_byte_perm (w0[2], w0[1], selector); w4[0] = hc_byte_perm (w0[1], w0[0], selector); w3[3] = hc_byte_perm (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_byte_perm (w3[3], w3[2], selector); w7[2] = hc_byte_perm (w3[2], w3[1], selector); w7[1] = hc_byte_perm (w3[1], w3[0], selector); w7[0] = hc_byte_perm (w3[0], w2[3], selector); w6[3] = hc_byte_perm (w2[3], w2[2], selector); w6[2] = hc_byte_perm (w2[2], w2[1], selector); w6[1] = hc_byte_perm (w2[1], w2[0], selector); w6[0] = hc_byte_perm (w2[0], w1[3], selector); w5[3] = hc_byte_perm (w1[3], w1[2], selector); w5[2] = hc_byte_perm (w1[2], w1[1], selector); w5[1] = hc_byte_perm (w1[1], w1[0], selector); w5[0] = hc_byte_perm (w1[0], w0[3], selector); w4[3] = hc_byte_perm (w0[3], w0[2], selector); w4[2] = hc_byte_perm (w0[2], w0[1], selector); w4[1] = hc_byte_perm (w0[1], w0[0], selector); w4[0] = hc_byte_perm (w0[0], 0, selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_byte_perm (w3[2], w3[1], selector); w7[2] = hc_byte_perm (w3[1], w3[0], selector); w7[1] = hc_byte_perm (w3[0], w2[3], selector); w7[0] = hc_byte_perm (w2[3], w2[2], selector); w6[3] = hc_byte_perm (w2[2], w2[1], selector); w6[2] = hc_byte_perm (w2[1], w2[0], selector); w6[1] = hc_byte_perm (w2[0], w1[3], selector); w6[0] = hc_byte_perm (w1[3], w1[2], selector); w5[3] = hc_byte_perm (w1[2], w1[1], selector); w5[2] = hc_byte_perm (w1[1], w1[0], selector); w5[1] = hc_byte_perm (w1[0], w0[3], selector); w5[0] = hc_byte_perm (w0[3], w0[2], selector); w4[3] = hc_byte_perm (w0[2], w0[1], selector); w4[2] = hc_byte_perm (w0[1], w0[0], selector); w4[1] = hc_byte_perm (w0[0], 0, selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_byte_perm (w3[1], w3[0], selector); w7[2] = hc_byte_perm (w3[0], w2[3], selector); w7[1] = hc_byte_perm (w2[3], w2[2], selector); w7[0] = hc_byte_perm (w2[2], w2[1], selector); w6[3] = hc_byte_perm (w2[1], w2[0], selector); w6[2] = hc_byte_perm (w2[0], w1[3], selector); w6[1] = hc_byte_perm (w1[3], w1[2], selector); w6[0] = hc_byte_perm (w1[2], w1[1], selector); w5[3] = hc_byte_perm (w1[1], w1[0], selector); w5[2] = hc_byte_perm (w1[0], w0[3], selector); w5[1] = hc_byte_perm (w0[3], w0[2], selector); w5[0] = hc_byte_perm (w0[2], w0[1], selector); w4[3] = hc_byte_perm (w0[1], w0[0], selector); w4[2] = hc_byte_perm (w0[0], 0, selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_byte_perm (w3[0], w2[3], selector); w7[2] = hc_byte_perm (w2[3], w2[2], selector); w7[1] = hc_byte_perm (w2[2], w2[1], selector); w7[0] = hc_byte_perm (w2[1], w2[0], selector); w6[3] = hc_byte_perm (w2[0], w1[3], selector); w6[2] = hc_byte_perm (w1[3], w1[2], selector); w6[1] = hc_byte_perm (w1[2], w1[1], selector); w6[0] = hc_byte_perm (w1[1], w1[0], selector); w5[3] = hc_byte_perm (w1[0], w0[3], selector); w5[2] = hc_byte_perm (w0[3], w0[2], selector); w5[1] = hc_byte_perm (w0[2], w0[1], selector); w5[0] = hc_byte_perm (w0[1], w0[0], selector); w4[3] = hc_byte_perm (w0[0], 0, selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_byte_perm (w2[3], w2[2], selector); w7[2] = hc_byte_perm (w2[2], w2[1], selector); w7[1] = hc_byte_perm (w2[1], w2[0], selector); w7[0] = hc_byte_perm (w2[0], w1[3], selector); w6[3] = hc_byte_perm (w1[3], w1[2], selector); w6[2] = hc_byte_perm (w1[2], w1[1], selector); w6[1] = hc_byte_perm (w1[1], w1[0], selector); w6[0] = hc_byte_perm (w1[0], w0[3], selector); w5[3] = hc_byte_perm (w0[3], w0[2], selector); w5[2] = hc_byte_perm (w0[2], w0[1], selector); w5[1] = hc_byte_perm (w0[1], w0[0], selector); w5[0] = hc_byte_perm (w0[0], 0, selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_byte_perm (w2[2], w2[1], selector); w7[2] = hc_byte_perm (w2[1], w2[0], selector); w7[1] = hc_byte_perm (w2[0], w1[3], selector); w7[0] = hc_byte_perm (w1[3], w1[2], selector); w6[3] = hc_byte_perm (w1[2], w1[1], selector); w6[2] = hc_byte_perm (w1[1], w1[0], selector); w6[1] = hc_byte_perm (w1[0], w0[3], selector); w6[0] = hc_byte_perm (w0[3], w0[2], selector); w5[3] = hc_byte_perm (w0[2], w0[1], selector); w5[2] = hc_byte_perm (w0[1], w0[0], selector); w5[1] = hc_byte_perm (w0[0], 0, selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_byte_perm (w2[1], w2[0], selector); w7[2] = hc_byte_perm (w2[0], w1[3], selector); w7[1] = hc_byte_perm (w1[3], w1[2], selector); w7[0] = hc_byte_perm (w1[2], w1[1], selector); w6[3] = hc_byte_perm (w1[1], w1[0], selector); w6[2] = hc_byte_perm (w1[0], w0[3], selector); w6[1] = hc_byte_perm (w0[3], w0[2], selector); w6[0] = hc_byte_perm (w0[2], w0[1], selector); w5[3] = hc_byte_perm (w0[1], w0[0], selector); w5[2] = hc_byte_perm (w0[0], 0, selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_byte_perm (w2[0], w1[3], selector); w7[2] = hc_byte_perm (w1[3], w1[2], selector); w7[1] = hc_byte_perm (w1[2], w1[1], selector); w7[0] = hc_byte_perm (w1[1], w1[0], selector); w6[3] = hc_byte_perm (w1[0], w0[3], selector); w6[2] = hc_byte_perm (w0[3], w0[2], selector); w6[1] = hc_byte_perm (w0[2], w0[1], selector); w6[0] = hc_byte_perm (w0[1], w0[0], selector); w5[3] = hc_byte_perm (w0[0], 0, selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_byte_perm (w1[3], w1[2], selector); w7[2] = hc_byte_perm (w1[2], w1[1], selector); w7[1] = hc_byte_perm (w1[1], w1[0], selector); w7[0] = hc_byte_perm (w1[0], w0[3], selector); w6[3] = hc_byte_perm (w0[3], w0[2], selector); w6[2] = hc_byte_perm (w0[2], w0[1], selector); w6[1] = hc_byte_perm (w0[1], w0[0], selector); w6[0] = hc_byte_perm (w0[0], 0, selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_byte_perm (w1[2], w1[1], selector); w7[2] = hc_byte_perm (w1[1], w1[0], selector); w7[1] = hc_byte_perm (w1[0], w0[3], selector); w7[0] = hc_byte_perm (w0[3], w0[2], selector); w6[3] = hc_byte_perm (w0[2], w0[1], selector); w6[2] = hc_byte_perm (w0[1], w0[0], selector); w6[1] = hc_byte_perm (w0[0], 0, selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_byte_perm (w1[1], w1[0], selector); w7[2] = hc_byte_perm (w1[0], w0[3], selector); w7[1] = hc_byte_perm (w0[3], w0[2], selector); w7[0] = hc_byte_perm (w0[2], w0[1], selector); w6[3] = hc_byte_perm (w0[1], w0[0], selector); w6[2] = hc_byte_perm (w0[0], 0, selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_byte_perm (w1[0], w0[3], selector); w7[2] = hc_byte_perm (w0[3], w0[2], selector); w7[1] = hc_byte_perm (w0[2], w0[1], selector); w7[0] = hc_byte_perm (w0[1], w0[0], selector); w6[3] = hc_byte_perm (w0[0], 0, selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_byte_perm (w0[3], w0[2], selector); w7[2] = hc_byte_perm (w0[2], w0[1], selector); w7[1] = hc_byte_perm (w0[1], w0[0], selector); w7[0] = hc_byte_perm (w0[0], 0, selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_byte_perm (w0[2], w0[1], selector); w7[2] = hc_byte_perm (w0[1], w0[0], selector); w7[1] = hc_byte_perm (w0[0], 0, selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_byte_perm (w0[1], w0[0], selector); w7[2] = hc_byte_perm (w0[0], 0, selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_byte_perm (w0[0], 0, selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_be (w7[3], 0, offset); w7[3] = hc_bytealign_be (w7[2], w7[3], offset); w7[2] = hc_bytealign_be (w7[1], w7[2], offset); w7[1] = hc_bytealign_be (w7[0], w7[1], offset); w7[0] = hc_bytealign_be (w6[3], w7[0], offset); w6[3] = hc_bytealign_be (w6[2], w6[3], offset); w6[2] = hc_bytealign_be (w6[1], w6[2], offset); w6[1] = hc_bytealign_be (w6[0], w6[1], offset); w6[0] = hc_bytealign_be (w5[3], w6[0], offset); w5[3] = hc_bytealign_be (w5[2], w5[3], offset); w5[2] = hc_bytealign_be (w5[1], w5[2], offset); w5[1] = hc_bytealign_be (w5[0], w5[1], offset); w5[0] = hc_bytealign_be (w4[3], w5[0], offset); w4[3] = hc_bytealign_be (w4[2], w4[3], offset); w4[2] = hc_bytealign_be (w4[1], w4[2], offset); w4[1] = hc_bytealign_be (w4[0], w4[1], offset); w4[0] = hc_bytealign_be (w3[3], w4[0], offset); w3[3] = hc_bytealign_be (w3[2], w3[3], offset); w3[2] = hc_bytealign_be (w3[1], w3[2], offset); w3[1] = hc_bytealign_be (w3[0], w3[1], offset); w3[0] = hc_bytealign_be (w2[3], w3[0], offset); w2[3] = hc_bytealign_be (w2[2], w2[3], offset); w2[2] = hc_bytealign_be (w2[1], w2[2], offset); w2[1] = hc_bytealign_be (w2[0], w2[1], offset); w2[0] = hc_bytealign_be (w1[3], w2[0], offset); w1[3] = hc_bytealign_be (w1[2], w1[3], offset); w1[2] = hc_bytealign_be (w1[1], w1[2], offset); w1[1] = hc_bytealign_be (w1[0], w1[1], offset); w1[0] = hc_bytealign_be (w0[3], w1[0], offset); w0[3] = hc_bytealign_be (w0[2], w0[3], offset); w0[2] = hc_bytealign_be (w0[1], w0[2], offset); w0[1] = hc_bytealign_be (w0[0], w0[1], offset); w0[0] = hc_bytealign_be ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_be (w7[3], 0, offset); c0[0] = hc_bytealign_be (w7[2], w7[3], offset); w7[3] = hc_bytealign_be (w7[1], w7[2], offset); w7[2] = hc_bytealign_be (w7[0], w7[1], offset); w7[1] = hc_bytealign_be (w6[3], w7[0], offset); w7[0] = hc_bytealign_be (w6[2], w6[3], offset); w6[3] = hc_bytealign_be (w6[1], w6[2], offset); w6[2] = hc_bytealign_be (w6[0], w6[1], offset); w6[1] = hc_bytealign_be (w5[3], w6[0], offset); w6[0] = hc_bytealign_be (w5[2], w5[3], offset); w5[3] = hc_bytealign_be (w5[1], w5[2], offset); w5[2] = hc_bytealign_be (w5[0], w5[1], offset); w5[1] = hc_bytealign_be (w4[3], w5[0], offset); w5[0] = hc_bytealign_be (w4[2], w4[3], offset); w4[3] = hc_bytealign_be (w4[1], w4[2], offset); w4[2] = hc_bytealign_be (w4[0], w4[1], offset); w4[1] = hc_bytealign_be (w3[3], w4[0], offset); w4[0] = hc_bytealign_be (w3[2], w3[3], offset); w3[3] = hc_bytealign_be (w3[1], w3[2], offset); w3[2] = hc_bytealign_be (w3[0], w3[1], offset); w3[1] = hc_bytealign_be (w2[3], w3[0], offset); w3[0] = hc_bytealign_be (w2[2], w2[3], offset); w2[3] = hc_bytealign_be (w2[1], w2[2], offset); w2[2] = hc_bytealign_be (w2[0], w2[1], offset); w2[1] = hc_bytealign_be (w1[3], w2[0], offset); w2[0] = hc_bytealign_be (w1[2], w1[3], offset); w1[3] = hc_bytealign_be (w1[1], w1[2], offset); w1[2] = hc_bytealign_be (w1[0], w1[1], offset); w1[1] = hc_bytealign_be (w0[3], w1[0], offset); w1[0] = hc_bytealign_be (w0[2], w0[3], offset); w0[3] = hc_bytealign_be (w0[1], w0[2], offset); w0[2] = hc_bytealign_be (w0[0], w0[1], offset); w0[1] = hc_bytealign_be ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_be (w7[3], 0, offset); c0[1] = hc_bytealign_be (w7[2], w7[3], offset); c0[0] = hc_bytealign_be (w7[1], w7[2], offset); w7[3] = hc_bytealign_be (w7[0], w7[1], offset); w7[2] = hc_bytealign_be (w6[3], w7[0], offset); w7[1] = hc_bytealign_be (w6[2], w6[3], offset); w7[0] = hc_bytealign_be (w6[1], w6[2], offset); w6[3] = hc_bytealign_be (w6[0], w6[1], offset); w6[2] = hc_bytealign_be (w5[3], w6[0], offset); w6[1] = hc_bytealign_be (w5[2], w5[3], offset); w6[0] = hc_bytealign_be (w5[1], w5[2], offset); w5[3] = hc_bytealign_be (w5[0], w5[1], offset); w5[2] = hc_bytealign_be (w4[3], w5[0], offset); w5[1] = hc_bytealign_be (w4[2], w4[3], offset); w5[0] = hc_bytealign_be (w4[1], w4[2], offset); w4[3] = hc_bytealign_be (w4[0], w4[1], offset); w4[2] = hc_bytealign_be (w3[3], w4[0], offset); w4[1] = hc_bytealign_be (w3[2], w3[3], offset); w4[0] = hc_bytealign_be (w3[1], w3[2], offset); w3[3] = hc_bytealign_be (w3[0], w3[1], offset); w3[2] = hc_bytealign_be (w2[3], w3[0], offset); w3[1] = hc_bytealign_be (w2[2], w2[3], offset); w3[0] = hc_bytealign_be (w2[1], w2[2], offset); w2[3] = hc_bytealign_be (w2[0], w2[1], offset); w2[2] = hc_bytealign_be (w1[3], w2[0], offset); w2[1] = hc_bytealign_be (w1[2], w1[3], offset); w2[0] = hc_bytealign_be (w1[1], w1[2], offset); w1[3] = hc_bytealign_be (w1[0], w1[1], offset); w1[2] = hc_bytealign_be (w0[3], w1[0], offset); w1[1] = hc_bytealign_be (w0[2], w0[3], offset); w1[0] = hc_bytealign_be (w0[1], w0[2], offset); w0[3] = hc_bytealign_be (w0[0], w0[1], offset); w0[2] = hc_bytealign_be ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_be (w7[3], 0, offset); c0[2] = hc_bytealign_be (w7[2], w7[3], offset); c0[1] = hc_bytealign_be (w7[1], w7[2], offset); c0[0] = hc_bytealign_be (w7[0], w7[1], offset); w7[3] = hc_bytealign_be (w6[3], w7[0], offset); w7[2] = hc_bytealign_be (w6[2], w6[3], offset); w7[1] = hc_bytealign_be (w6[1], w6[2], offset); w7[0] = hc_bytealign_be (w6[0], w6[1], offset); w6[3] = hc_bytealign_be (w5[3], w6[0], offset); w6[2] = hc_bytealign_be (w5[2], w5[3], offset); w6[1] = hc_bytealign_be (w5[1], w5[2], offset); w6[0] = hc_bytealign_be (w5[0], w5[1], offset); w5[3] = hc_bytealign_be (w4[3], w5[0], offset); w5[2] = hc_bytealign_be (w4[2], w4[3], offset); w5[1] = hc_bytealign_be (w4[1], w4[2], offset); w5[0] = hc_bytealign_be (w4[0], w4[1], offset); w4[3] = hc_bytealign_be (w3[3], w4[0], offset); w4[2] = hc_bytealign_be (w3[2], w3[3], offset); w4[1] = hc_bytealign_be (w3[1], w3[2], offset); w4[0] = hc_bytealign_be (w3[0], w3[1], offset); w3[3] = hc_bytealign_be (w2[3], w3[0], offset); w3[2] = hc_bytealign_be (w2[2], w2[3], offset); w3[1] = hc_bytealign_be (w2[1], w2[2], offset); w3[0] = hc_bytealign_be (w2[0], w2[1], offset); w2[3] = hc_bytealign_be (w1[3], w2[0], offset); w2[2] = hc_bytealign_be (w1[2], w1[3], offset); w2[1] = hc_bytealign_be (w1[1], w1[2], offset); w2[0] = hc_bytealign_be (w1[0], w1[1], offset); w1[3] = hc_bytealign_be (w0[3], w1[0], offset); w1[2] = hc_bytealign_be (w0[2], w0[3], offset); w1[1] = hc_bytealign_be (w0[1], w0[2], offset); w1[0] = hc_bytealign_be (w0[0], w0[1], offset); w0[3] = hc_bytealign_be ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_be (w7[3], 0, offset); c0[3] = hc_bytealign_be (w7[2], w7[3], offset); c0[2] = hc_bytealign_be (w7[1], w7[2], offset); c0[1] = hc_bytealign_be (w7[0], w7[1], offset); c0[0] = hc_bytealign_be (w6[3], w7[0], offset); w7[3] = hc_bytealign_be (w6[2], w6[3], offset); w7[2] = hc_bytealign_be (w6[1], w6[2], offset); w7[1] = hc_bytealign_be (w6[0], w6[1], offset); w7[0] = hc_bytealign_be (w5[3], w6[0], offset); w6[3] = hc_bytealign_be (w5[2], w5[3], offset); w6[2] = hc_bytealign_be (w5[1], w5[2], offset); w6[1] = hc_bytealign_be (w5[0], w5[1], offset); w6[0] = hc_bytealign_be (w4[3], w5[0], offset); w5[3] = hc_bytealign_be (w4[2], w4[3], offset); w5[2] = hc_bytealign_be (w4[1], w4[2], offset); w5[1] = hc_bytealign_be (w4[0], w4[1], offset); w5[0] = hc_bytealign_be (w3[3], w4[0], offset); w4[3] = hc_bytealign_be (w3[2], w3[3], offset); w4[2] = hc_bytealign_be (w3[1], w3[2], offset); w4[1] = hc_bytealign_be (w3[0], w3[1], offset); w4[0] = hc_bytealign_be (w2[3], w3[0], offset); w3[3] = hc_bytealign_be (w2[2], w2[3], offset); w3[2] = hc_bytealign_be (w2[1], w2[2], offset); w3[1] = hc_bytealign_be (w2[0], w2[1], offset); w3[0] = hc_bytealign_be (w1[3], w2[0], offset); w2[3] = hc_bytealign_be (w1[2], w1[3], offset); w2[2] = hc_bytealign_be (w1[1], w1[2], offset); w2[1] = hc_bytealign_be (w1[0], w1[1], offset); w2[0] = hc_bytealign_be (w0[3], w1[0], offset); w1[3] = hc_bytealign_be (w0[2], w0[3], offset); w1[2] = hc_bytealign_be (w0[1], w0[2], offset); w1[1] = hc_bytealign_be (w0[0], w0[1], offset); w1[0] = hc_bytealign_be ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_be (w7[3], 0, offset); c1[0] = hc_bytealign_be (w7[2], w7[3], offset); c0[3] = hc_bytealign_be (w7[1], w7[2], offset); c0[2] = hc_bytealign_be (w7[0], w7[1], offset); c0[1] = hc_bytealign_be (w6[3], w7[0], offset); c0[0] = hc_bytealign_be (w6[2], w6[3], offset); w7[3] = hc_bytealign_be (w6[1], w6[2], offset); w7[2] = hc_bytealign_be (w6[0], w6[1], offset); w7[1] = hc_bytealign_be (w5[3], w6[0], offset); w7[0] = hc_bytealign_be (w5[2], w5[3], offset); w6[3] = hc_bytealign_be (w5[1], w5[2], offset); w6[2] = hc_bytealign_be (w5[0], w5[1], offset); w6[1] = hc_bytealign_be (w4[3], w5[0], offset); w6[0] = hc_bytealign_be (w4[2], w4[3], offset); w5[3] = hc_bytealign_be (w4[1], w4[2], offset); w5[2] = hc_bytealign_be (w4[0], w4[1], offset); w5[1] = hc_bytealign_be (w3[3], w4[0], offset); w5[0] = hc_bytealign_be (w3[2], w3[3], offset); w4[3] = hc_bytealign_be (w3[1], w3[2], offset); w4[2] = hc_bytealign_be (w3[0], w3[1], offset); w4[1] = hc_bytealign_be (w2[3], w3[0], offset); w4[0] = hc_bytealign_be (w2[2], w2[3], offset); w3[3] = hc_bytealign_be (w2[1], w2[2], offset); w3[2] = hc_bytealign_be (w2[0], w2[1], offset); w3[1] = hc_bytealign_be (w1[3], w2[0], offset); w3[0] = hc_bytealign_be (w1[2], w1[3], offset); w2[3] = hc_bytealign_be (w1[1], w1[2], offset); w2[2] = hc_bytealign_be (w1[0], w1[1], offset); w2[1] = hc_bytealign_be (w0[3], w1[0], offset); w2[0] = hc_bytealign_be (w0[2], w0[3], offset); w1[3] = hc_bytealign_be (w0[1], w0[2], offset); w1[2] = hc_bytealign_be (w0[0], w0[1], offset); w1[1] = hc_bytealign_be ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_be (w7[3], 0, offset); c1[1] = hc_bytealign_be (w7[2], w7[3], offset); c1[0] = hc_bytealign_be (w7[1], w7[2], offset); c0[3] = hc_bytealign_be (w7[0], w7[1], offset); c0[2] = hc_bytealign_be (w6[3], w7[0], offset); c0[1] = hc_bytealign_be (w6[2], w6[3], offset); c0[0] = hc_bytealign_be (w6[1], w6[2], offset); w7[3] = hc_bytealign_be (w6[0], w6[1], offset); w7[2] = hc_bytealign_be (w5[3], w6[0], offset); w7[1] = hc_bytealign_be (w5[2], w5[3], offset); w7[0] = hc_bytealign_be (w5[1], w5[2], offset); w6[3] = hc_bytealign_be (w5[0], w5[1], offset); w6[2] = hc_bytealign_be (w4[3], w5[0], offset); w6[1] = hc_bytealign_be (w4[2], w4[3], offset); w6[0] = hc_bytealign_be (w4[1], w4[2], offset); w5[3] = hc_bytealign_be (w4[0], w4[1], offset); w5[2] = hc_bytealign_be (w3[3], w4[0], offset); w5[1] = hc_bytealign_be (w3[2], w3[3], offset); w5[0] = hc_bytealign_be (w3[1], w3[2], offset); w4[3] = hc_bytealign_be (w3[0], w3[1], offset); w4[2] = hc_bytealign_be (w2[3], w3[0], offset); w4[1] = hc_bytealign_be (w2[2], w2[3], offset); w4[0] = hc_bytealign_be (w2[1], w2[2], offset); w3[3] = hc_bytealign_be (w2[0], w2[1], offset); w3[2] = hc_bytealign_be (w1[3], w2[0], offset); w3[1] = hc_bytealign_be (w1[2], w1[3], offset); w3[0] = hc_bytealign_be (w1[1], w1[2], offset); w2[3] = hc_bytealign_be (w1[0], w1[1], offset); w2[2] = hc_bytealign_be (w0[3], w1[0], offset); w2[1] = hc_bytealign_be (w0[2], w0[3], offset); w2[0] = hc_bytealign_be (w0[1], w0[2], offset); w1[3] = hc_bytealign_be (w0[0], w0[1], offset); w1[2] = hc_bytealign_be ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_be (w7[3], 0, offset); c1[2] = hc_bytealign_be (w7[2], w7[3], offset); c1[1] = hc_bytealign_be (w7[1], w7[2], offset); c1[0] = hc_bytealign_be (w7[0], w7[1], offset); c0[3] = hc_bytealign_be (w6[3], w7[0], offset); c0[2] = hc_bytealign_be (w6[2], w6[3], offset); c0[1] = hc_bytealign_be (w6[1], w6[2], offset); c0[0] = hc_bytealign_be (w6[0], w6[1], offset); w7[3] = hc_bytealign_be (w5[3], w6[0], offset); w7[2] = hc_bytealign_be (w5[2], w5[3], offset); w7[1] = hc_bytealign_be (w5[1], w5[2], offset); w7[0] = hc_bytealign_be (w5[0], w5[1], offset); w6[3] = hc_bytealign_be (w4[3], w5[0], offset); w6[2] = hc_bytealign_be (w4[2], w4[3], offset); w6[1] = hc_bytealign_be (w4[1], w4[2], offset); w6[0] = hc_bytealign_be (w4[0], w4[1], offset); w5[3] = hc_bytealign_be (w3[3], w4[0], offset); w5[2] = hc_bytealign_be (w3[2], w3[3], offset); w5[1] = hc_bytealign_be (w3[1], w3[2], offset); w5[0] = hc_bytealign_be (w3[0], w3[1], offset); w4[3] = hc_bytealign_be (w2[3], w3[0], offset); w4[2] = hc_bytealign_be (w2[2], w2[3], offset); w4[1] = hc_bytealign_be (w2[1], w2[2], offset); w4[0] = hc_bytealign_be (w2[0], w2[1], offset); w3[3] = hc_bytealign_be (w1[3], w2[0], offset); w3[2] = hc_bytealign_be (w1[2], w1[3], offset); w3[1] = hc_bytealign_be (w1[1], w1[2], offset); w3[0] = hc_bytealign_be (w1[0], w1[1], offset); w2[3] = hc_bytealign_be (w0[3], w1[0], offset); w2[2] = hc_bytealign_be (w0[2], w0[3], offset); w2[1] = hc_bytealign_be (w0[1], w0[2], offset); w2[0] = hc_bytealign_be (w0[0], w0[1], offset); w1[3] = hc_bytealign_be ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_be (w7[3], 0, offset); c1[3] = hc_bytealign_be (w7[2], w7[3], offset); c1[2] = hc_bytealign_be (w7[1], w7[2], offset); c1[1] = hc_bytealign_be (w7[0], w7[1], offset); c1[0] = hc_bytealign_be (w6[3], w7[0], offset); c0[3] = hc_bytealign_be (w6[2], w6[3], offset); c0[2] = hc_bytealign_be (w6[1], w6[2], offset); c0[1] = hc_bytealign_be (w6[0], w6[1], offset); c0[0] = hc_bytealign_be (w5[3], w6[0], offset); w7[3] = hc_bytealign_be (w5[2], w5[3], offset); w7[2] = hc_bytealign_be (w5[1], w5[2], offset); w7[1] = hc_bytealign_be (w5[0], w5[1], offset); w7[0] = hc_bytealign_be (w4[3], w5[0], offset); w6[3] = hc_bytealign_be (w4[2], w4[3], offset); w6[2] = hc_bytealign_be (w4[1], w4[2], offset); w6[1] = hc_bytealign_be (w4[0], w4[1], offset); w6[0] = hc_bytealign_be (w3[3], w4[0], offset); w5[3] = hc_bytealign_be (w3[2], w3[3], offset); w5[2] = hc_bytealign_be (w3[1], w3[2], offset); w5[1] = hc_bytealign_be (w3[0], w3[1], offset); w5[0] = hc_bytealign_be (w2[3], w3[0], offset); w4[3] = hc_bytealign_be (w2[2], w2[3], offset); w4[2] = hc_bytealign_be (w2[1], w2[2], offset); w4[1] = hc_bytealign_be (w2[0], w2[1], offset); w4[0] = hc_bytealign_be (w1[3], w2[0], offset); w3[3] = hc_bytealign_be (w1[2], w1[3], offset); w3[2] = hc_bytealign_be (w1[1], w1[2], offset); w3[1] = hc_bytealign_be (w1[0], w1[1], offset); w3[0] = hc_bytealign_be (w0[3], w1[0], offset); w2[3] = hc_bytealign_be (w0[2], w0[3], offset); w2[2] = hc_bytealign_be (w0[1], w0[2], offset); w2[1] = hc_bytealign_be (w0[0], w0[1], offset); w2[0] = hc_bytealign_be ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_be (w7[3], 0, offset); c2[0] = hc_bytealign_be (w7[2], w7[3], offset); c1[3] = hc_bytealign_be (w7[1], w7[2], offset); c1[2] = hc_bytealign_be (w7[0], w7[1], offset); c1[1] = hc_bytealign_be (w6[3], w7[0], offset); c1[0] = hc_bytealign_be (w6[2], w6[3], offset); c0[3] = hc_bytealign_be (w6[1], w6[2], offset); c0[2] = hc_bytealign_be (w6[0], w6[1], offset); c0[1] = hc_bytealign_be (w5[3], w6[0], offset); c0[0] = hc_bytealign_be (w5[2], w5[3], offset); w7[3] = hc_bytealign_be (w5[1], w5[2], offset); w7[2] = hc_bytealign_be (w5[0], w5[1], offset); w7[1] = hc_bytealign_be (w4[3], w5[0], offset); w7[0] = hc_bytealign_be (w4[2], w4[3], offset); w6[3] = hc_bytealign_be (w4[1], w4[2], offset); w6[2] = hc_bytealign_be (w4[0], w4[1], offset); w6[1] = hc_bytealign_be (w3[3], w4[0], offset); w6[0] = hc_bytealign_be (w3[2], w3[3], offset); w5[3] = hc_bytealign_be (w3[1], w3[2], offset); w5[2] = hc_bytealign_be (w3[0], w3[1], offset); w5[1] = hc_bytealign_be (w2[3], w3[0], offset); w5[0] = hc_bytealign_be (w2[2], w2[3], offset); w4[3] = hc_bytealign_be (w2[1], w2[2], offset); w4[2] = hc_bytealign_be (w2[0], w2[1], offset); w4[1] = hc_bytealign_be (w1[3], w2[0], offset); w4[0] = hc_bytealign_be (w1[2], w1[3], offset); w3[3] = hc_bytealign_be (w1[1], w1[2], offset); w3[2] = hc_bytealign_be (w1[0], w1[1], offset); w3[1] = hc_bytealign_be (w0[3], w1[0], offset); w3[0] = hc_bytealign_be (w0[2], w0[3], offset); w2[3] = hc_bytealign_be (w0[1], w0[2], offset); w2[2] = hc_bytealign_be (w0[0], w0[1], offset); w2[1] = hc_bytealign_be ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_be (w7[3], 0, offset); c2[1] = hc_bytealign_be (w7[2], w7[3], offset); c2[0] = hc_bytealign_be (w7[1], w7[2], offset); c1[3] = hc_bytealign_be (w7[0], w7[1], offset); c1[2] = hc_bytealign_be (w6[3], w7[0], offset); c1[1] = hc_bytealign_be (w6[2], w6[3], offset); c1[0] = hc_bytealign_be (w6[1], w6[2], offset); c0[3] = hc_bytealign_be (w6[0], w6[1], offset); c0[2] = hc_bytealign_be (w5[3], w6[0], offset); c0[1] = hc_bytealign_be (w5[2], w5[3], offset); c0[0] = hc_bytealign_be (w5[1], w5[2], offset); w7[3] = hc_bytealign_be (w5[0], w5[1], offset); w7[2] = hc_bytealign_be (w4[3], w5[0], offset); w7[1] = hc_bytealign_be (w4[2], w4[3], offset); w7[0] = hc_bytealign_be (w4[1], w4[2], offset); w6[3] = hc_bytealign_be (w4[0], w4[1], offset); w6[2] = hc_bytealign_be (w3[3], w4[0], offset); w6[1] = hc_bytealign_be (w3[2], w3[3], offset); w6[0] = hc_bytealign_be (w3[1], w3[2], offset); w5[3] = hc_bytealign_be (w3[0], w3[1], offset); w5[2] = hc_bytealign_be (w2[3], w3[0], offset); w5[1] = hc_bytealign_be (w2[2], w2[3], offset); w5[0] = hc_bytealign_be (w2[1], w2[2], offset); w4[3] = hc_bytealign_be (w2[0], w2[1], offset); w4[2] = hc_bytealign_be (w1[3], w2[0], offset); w4[1] = hc_bytealign_be (w1[2], w1[3], offset); w4[0] = hc_bytealign_be (w1[1], w1[2], offset); w3[3] = hc_bytealign_be (w1[0], w1[1], offset); w3[2] = hc_bytealign_be (w0[3], w1[0], offset); w3[1] = hc_bytealign_be (w0[2], w0[3], offset); w3[0] = hc_bytealign_be (w0[1], w0[2], offset); w2[3] = hc_bytealign_be (w0[0], w0[1], offset); w2[2] = hc_bytealign_be ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_be (w7[3], 0, offset); c2[2] = hc_bytealign_be (w7[2], w7[3], offset); c2[1] = hc_bytealign_be (w7[1], w7[2], offset); c2[0] = hc_bytealign_be (w7[0], w7[1], offset); c1[3] = hc_bytealign_be (w6[3], w7[0], offset); c1[2] = hc_bytealign_be (w6[2], w6[3], offset); c1[1] = hc_bytealign_be (w6[1], w6[2], offset); c1[0] = hc_bytealign_be (w6[0], w6[1], offset); c0[3] = hc_bytealign_be (w5[3], w6[0], offset); c0[2] = hc_bytealign_be (w5[2], w5[3], offset); c0[1] = hc_bytealign_be (w5[1], w5[2], offset); c0[0] = hc_bytealign_be (w5[0], w5[1], offset); w7[3] = hc_bytealign_be (w4[3], w5[0], offset); w7[2] = hc_bytealign_be (w4[2], w4[3], offset); w7[1] = hc_bytealign_be (w4[1], w4[2], offset); w7[0] = hc_bytealign_be (w4[0], w4[1], offset); w6[3] = hc_bytealign_be (w3[3], w4[0], offset); w6[2] = hc_bytealign_be (w3[2], w3[3], offset); w6[1] = hc_bytealign_be (w3[1], w3[2], offset); w6[0] = hc_bytealign_be (w3[0], w3[1], offset); w5[3] = hc_bytealign_be (w2[3], w3[0], offset); w5[2] = hc_bytealign_be (w2[2], w2[3], offset); w5[1] = hc_bytealign_be (w2[1], w2[2], offset); w5[0] = hc_bytealign_be (w2[0], w2[1], offset); w4[3] = hc_bytealign_be (w1[3], w2[0], offset); w4[2] = hc_bytealign_be (w1[2], w1[3], offset); w4[1] = hc_bytealign_be (w1[1], w1[2], offset); w4[0] = hc_bytealign_be (w1[0], w1[1], offset); w3[3] = hc_bytealign_be (w0[3], w1[0], offset); w3[2] = hc_bytealign_be (w0[2], w0[3], offset); w3[1] = hc_bytealign_be (w0[1], w0[2], offset); w3[0] = hc_bytealign_be (w0[0], w0[1], offset); w2[3] = hc_bytealign_be ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_be (w7[3], 0, offset); c2[3] = hc_bytealign_be (w7[2], w7[3], offset); c2[2] = hc_bytealign_be (w7[1], w7[2], offset); c2[1] = hc_bytealign_be (w7[0], w7[1], offset); c2[0] = hc_bytealign_be (w6[3], w7[0], offset); c1[3] = hc_bytealign_be (w6[2], w6[3], offset); c1[2] = hc_bytealign_be (w6[1], w6[2], offset); c1[1] = hc_bytealign_be (w6[0], w6[1], offset); c1[0] = hc_bytealign_be (w5[3], w6[0], offset); c0[3] = hc_bytealign_be (w5[2], w5[3], offset); c0[2] = hc_bytealign_be (w5[1], w5[2], offset); c0[1] = hc_bytealign_be (w5[0], w5[1], offset); c0[0] = hc_bytealign_be (w4[3], w5[0], offset); w7[3] = hc_bytealign_be (w4[2], w4[3], offset); w7[2] = hc_bytealign_be (w4[1], w4[2], offset); w7[1] = hc_bytealign_be (w4[0], w4[1], offset); w7[0] = hc_bytealign_be (w3[3], w4[0], offset); w6[3] = hc_bytealign_be (w3[2], w3[3], offset); w6[2] = hc_bytealign_be (w3[1], w3[2], offset); w6[1] = hc_bytealign_be (w3[0], w3[1], offset); w6[0] = hc_bytealign_be (w2[3], w3[0], offset); w5[3] = hc_bytealign_be (w2[2], w2[3], offset); w5[2] = hc_bytealign_be (w2[1], w2[2], offset); w5[1] = hc_bytealign_be (w2[0], w2[1], offset); w5[0] = hc_bytealign_be (w1[3], w2[0], offset); w4[3] = hc_bytealign_be (w1[2], w1[3], offset); w4[2] = hc_bytealign_be (w1[1], w1[2], offset); w4[1] = hc_bytealign_be (w1[0], w1[1], offset); w4[0] = hc_bytealign_be (w0[3], w1[0], offset); w3[3] = hc_bytealign_be (w0[2], w0[3], offset); w3[2] = hc_bytealign_be (w0[1], w0[2], offset); w3[1] = hc_bytealign_be (w0[0], w0[1], offset); w3[0] = hc_bytealign_be ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_be (w7[3], 0, offset); c3[0] = hc_bytealign_be (w7[2], w7[3], offset); c2[3] = hc_bytealign_be (w7[1], w7[2], offset); c2[2] = hc_bytealign_be (w7[0], w7[1], offset); c2[1] = hc_bytealign_be (w6[3], w7[0], offset); c2[0] = hc_bytealign_be (w6[2], w6[3], offset); c1[3] = hc_bytealign_be (w6[1], w6[2], offset); c1[2] = hc_bytealign_be (w6[0], w6[1], offset); c1[1] = hc_bytealign_be (w5[3], w6[0], offset); c1[0] = hc_bytealign_be (w5[2], w5[3], offset); c0[3] = hc_bytealign_be (w5[1], w5[2], offset); c0[2] = hc_bytealign_be (w5[0], w5[1], offset); c0[1] = hc_bytealign_be (w4[3], w5[0], offset); c0[0] = hc_bytealign_be (w4[2], w4[3], offset); w7[3] = hc_bytealign_be (w4[1], w4[2], offset); w7[2] = hc_bytealign_be (w4[0], w4[1], offset); w7[1] = hc_bytealign_be (w3[3], w4[0], offset); w7[0] = hc_bytealign_be (w3[2], w3[3], offset); w6[3] = hc_bytealign_be (w3[1], w3[2], offset); w6[2] = hc_bytealign_be (w3[0], w3[1], offset); w6[1] = hc_bytealign_be (w2[3], w3[0], offset); w6[0] = hc_bytealign_be (w2[2], w2[3], offset); w5[3] = hc_bytealign_be (w2[1], w2[2], offset); w5[2] = hc_bytealign_be (w2[0], w2[1], offset); w5[1] = hc_bytealign_be (w1[3], w2[0], offset); w5[0] = hc_bytealign_be (w1[2], w1[3], offset); w4[3] = hc_bytealign_be (w1[1], w1[2], offset); w4[2] = hc_bytealign_be (w1[0], w1[1], offset); w4[1] = hc_bytealign_be (w0[3], w1[0], offset); w4[0] = hc_bytealign_be (w0[2], w0[3], offset); w3[3] = hc_bytealign_be (w0[1], w0[2], offset); w3[2] = hc_bytealign_be (w0[0], w0[1], offset); w3[1] = hc_bytealign_be ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_be (w7[3], 0, offset); c3[1] = hc_bytealign_be (w7[2], w7[3], offset); c3[0] = hc_bytealign_be (w7[1], w7[2], offset); c2[3] = hc_bytealign_be (w7[0], w7[1], offset); c2[2] = hc_bytealign_be (w6[3], w7[0], offset); c2[1] = hc_bytealign_be (w6[2], w6[3], offset); c2[0] = hc_bytealign_be (w6[1], w6[2], offset); c1[3] = hc_bytealign_be (w6[0], w6[1], offset); c1[2] = hc_bytealign_be (w5[3], w6[0], offset); c1[1] = hc_bytealign_be (w5[2], w5[3], offset); c1[0] = hc_bytealign_be (w5[1], w5[2], offset); c0[3] = hc_bytealign_be (w5[0], w5[1], offset); c0[2] = hc_bytealign_be (w4[3], w5[0], offset); c0[1] = hc_bytealign_be (w4[2], w4[3], offset); c0[0] = hc_bytealign_be (w4[1], w4[2], offset); w7[3] = hc_bytealign_be (w4[0], w4[1], offset); w7[2] = hc_bytealign_be (w3[3], w4[0], offset); w7[1] = hc_bytealign_be (w3[2], w3[3], offset); w7[0] = hc_bytealign_be (w3[1], w3[2], offset); w6[3] = hc_bytealign_be (w3[0], w3[1], offset); w6[2] = hc_bytealign_be (w2[3], w3[0], offset); w6[1] = hc_bytealign_be (w2[2], w2[3], offset); w6[0] = hc_bytealign_be (w2[1], w2[2], offset); w5[3] = hc_bytealign_be (w2[0], w2[1], offset); w5[2] = hc_bytealign_be (w1[3], w2[0], offset); w5[1] = hc_bytealign_be (w1[2], w1[3], offset); w5[0] = hc_bytealign_be (w1[1], w1[2], offset); w4[3] = hc_bytealign_be (w1[0], w1[1], offset); w4[2] = hc_bytealign_be (w0[3], w1[0], offset); w4[1] = hc_bytealign_be (w0[2], w0[3], offset); w4[0] = hc_bytealign_be (w0[1], w0[2], offset); w3[3] = hc_bytealign_be (w0[0], w0[1], offset); w3[2] = hc_bytealign_be ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_be (w7[3], 0, offset); c3[2] = hc_bytealign_be (w7[2], w7[3], offset); c3[1] = hc_bytealign_be (w7[1], w7[2], offset); c3[0] = hc_bytealign_be (w7[0], w7[1], offset); c2[3] = hc_bytealign_be (w6[3], w7[0], offset); c2[2] = hc_bytealign_be (w6[2], w6[3], offset); c2[1] = hc_bytealign_be (w6[1], w6[2], offset); c2[0] = hc_bytealign_be (w6[0], w6[1], offset); c1[3] = hc_bytealign_be (w5[3], w6[0], offset); c1[2] = hc_bytealign_be (w5[2], w5[3], offset); c1[1] = hc_bytealign_be (w5[1], w5[2], offset); c1[0] = hc_bytealign_be (w5[0], w5[1], offset); c0[3] = hc_bytealign_be (w4[3], w5[0], offset); c0[2] = hc_bytealign_be (w4[2], w4[3], offset); c0[1] = hc_bytealign_be (w4[1], w4[2], offset); c0[0] = hc_bytealign_be (w4[0], w4[1], offset); w7[3] = hc_bytealign_be (w3[3], w4[0], offset); w7[2] = hc_bytealign_be (w3[2], w3[3], offset); w7[1] = hc_bytealign_be (w3[1], w3[2], offset); w7[0] = hc_bytealign_be (w3[0], w3[1], offset); w6[3] = hc_bytealign_be (w2[3], w3[0], offset); w6[2] = hc_bytealign_be (w2[2], w2[3], offset); w6[1] = hc_bytealign_be (w2[1], w2[2], offset); w6[0] = hc_bytealign_be (w2[0], w2[1], offset); w5[3] = hc_bytealign_be (w1[3], w2[0], offset); w5[2] = hc_bytealign_be (w1[2], w1[3], offset); w5[1] = hc_bytealign_be (w1[1], w1[2], offset); w5[0] = hc_bytealign_be (w1[0], w1[1], offset); w4[3] = hc_bytealign_be (w0[3], w1[0], offset); w4[2] = hc_bytealign_be (w0[2], w0[3], offset); w4[1] = hc_bytealign_be (w0[1], w0[2], offset); w4[0] = hc_bytealign_be (w0[0], w0[1], offset); w3[3] = hc_bytealign_be ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_bytealign_be (w7[3], 0, offset); c3[3] = hc_bytealign_be (w7[2], w7[3], offset); c3[2] = hc_bytealign_be (w7[1], w7[2], offset); c3[1] = hc_bytealign_be (w7[0], w7[1], offset); c3[0] = hc_bytealign_be (w6[3], w7[0], offset); c2[3] = hc_bytealign_be (w6[2], w6[3], offset); c2[2] = hc_bytealign_be (w6[1], w6[2], offset); c2[1] = hc_bytealign_be (w6[0], w6[1], offset); c2[0] = hc_bytealign_be (w5[3], w6[0], offset); c1[3] = hc_bytealign_be (w5[2], w5[3], offset); c1[2] = hc_bytealign_be (w5[1], w5[2], offset); c1[1] = hc_bytealign_be (w5[0], w5[1], offset); c1[0] = hc_bytealign_be (w4[3], w5[0], offset); c0[3] = hc_bytealign_be (w4[2], w4[3], offset); c0[2] = hc_bytealign_be (w4[1], w4[2], offset); c0[1] = hc_bytealign_be (w4[0], w4[1], offset); c0[0] = hc_bytealign_be (w3[3], w4[0], offset); w7[3] = hc_bytealign_be (w3[2], w3[3], offset); w7[2] = hc_bytealign_be (w3[1], w3[2], offset); w7[1] = hc_bytealign_be (w3[0], w3[1], offset); w7[0] = hc_bytealign_be (w2[3], w3[0], offset); w6[3] = hc_bytealign_be (w2[2], w2[3], offset); w6[2] = hc_bytealign_be (w2[1], w2[2], offset); w6[1] = hc_bytealign_be (w2[0], w2[1], offset); w6[0] = hc_bytealign_be (w1[3], w2[0], offset); w5[3] = hc_bytealign_be (w1[2], w1[3], offset); w5[2] = hc_bytealign_be (w1[1], w1[2], offset); w5[1] = hc_bytealign_be (w1[0], w1[1], offset); w5[0] = hc_bytealign_be (w0[3], w1[0], offset); w4[3] = hc_bytealign_be (w0[2], w0[3], offset); w4[2] = hc_bytealign_be (w0[1], w0[2], offset); w4[1] = hc_bytealign_be (w0[0], w0[1], offset); w4[0] = hc_bytealign_be ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_bytealign_be (w7[3], 0, offset); c4[0] = hc_bytealign_be (w7[2], w7[3], offset); c3[3] = hc_bytealign_be (w7[1], w7[2], offset); c3[2] = hc_bytealign_be (w7[0], w7[1], offset); c3[1] = hc_bytealign_be (w6[3], w7[0], offset); c3[0] = hc_bytealign_be (w6[2], w6[3], offset); c2[3] = hc_bytealign_be (w6[1], w6[2], offset); c2[2] = hc_bytealign_be (w6[0], w6[1], offset); c2[1] = hc_bytealign_be (w5[3], w6[0], offset); c2[0] = hc_bytealign_be (w5[2], w5[3], offset); c1[3] = hc_bytealign_be (w5[1], w5[2], offset); c1[2] = hc_bytealign_be (w5[0], w5[1], offset); c1[1] = hc_bytealign_be (w4[3], w5[0], offset); c1[0] = hc_bytealign_be (w4[2], w4[3], offset); c0[3] = hc_bytealign_be (w4[1], w4[2], offset); c0[2] = hc_bytealign_be (w4[0], w4[1], offset); c0[1] = hc_bytealign_be (w3[3], w4[0], offset); c0[0] = hc_bytealign_be (w3[2], w3[3], offset); w7[3] = hc_bytealign_be (w3[1], w3[2], offset); w7[2] = hc_bytealign_be (w3[0], w3[1], offset); w7[1] = hc_bytealign_be (w2[3], w3[0], offset); w7[0] = hc_bytealign_be (w2[2], w2[3], offset); w6[3] = hc_bytealign_be (w2[1], w2[2], offset); w6[2] = hc_bytealign_be (w2[0], w2[1], offset); w6[1] = hc_bytealign_be (w1[3], w2[0], offset); w6[0] = hc_bytealign_be (w1[2], w1[3], offset); w5[3] = hc_bytealign_be (w1[1], w1[2], offset); w5[2] = hc_bytealign_be (w1[0], w1[1], offset); w5[1] = hc_bytealign_be (w0[3], w1[0], offset); w5[0] = hc_bytealign_be (w0[2], w0[3], offset); w4[3] = hc_bytealign_be (w0[1], w0[2], offset); w4[2] = hc_bytealign_be (w0[0], w0[1], offset); w4[1] = hc_bytealign_be ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_bytealign_be (w7[3], 0, offset); c4[1] = hc_bytealign_be (w7[2], w7[3], offset); c4[0] = hc_bytealign_be (w7[1], w7[2], offset); c3[3] = hc_bytealign_be (w7[0], w7[1], offset); c3[2] = hc_bytealign_be (w6[3], w7[0], offset); c3[1] = hc_bytealign_be (w6[2], w6[3], offset); c3[0] = hc_bytealign_be (w6[1], w6[2], offset); c2[3] = hc_bytealign_be (w6[0], w6[1], offset); c2[2] = hc_bytealign_be (w5[3], w6[0], offset); c2[1] = hc_bytealign_be (w5[2], w5[3], offset); c2[0] = hc_bytealign_be (w5[1], w5[2], offset); c1[3] = hc_bytealign_be (w5[0], w5[1], offset); c1[2] = hc_bytealign_be (w4[3], w5[0], offset); c1[1] = hc_bytealign_be (w4[2], w4[3], offset); c1[0] = hc_bytealign_be (w4[1], w4[2], offset); c0[3] = hc_bytealign_be (w4[0], w4[1], offset); c0[2] = hc_bytealign_be (w3[3], w4[0], offset); c0[1] = hc_bytealign_be (w3[2], w3[3], offset); c0[0] = hc_bytealign_be (w3[1], w3[2], offset); w7[3] = hc_bytealign_be (w3[0], w3[1], offset); w7[2] = hc_bytealign_be (w2[3], w3[0], offset); w7[1] = hc_bytealign_be (w2[2], w2[3], offset); w7[0] = hc_bytealign_be (w2[1], w2[2], offset); w6[3] = hc_bytealign_be (w2[0], w2[1], offset); w6[2] = hc_bytealign_be (w1[3], w2[0], offset); w6[1] = hc_bytealign_be (w1[2], w1[3], offset); w6[0] = hc_bytealign_be (w1[1], w1[2], offset); w5[3] = hc_bytealign_be (w1[0], w1[1], offset); w5[2] = hc_bytealign_be (w0[3], w1[0], offset); w5[1] = hc_bytealign_be (w0[2], w0[3], offset); w5[0] = hc_bytealign_be (w0[1], w0[2], offset); w4[3] = hc_bytealign_be (w0[0], w0[1], offset); w4[2] = hc_bytealign_be ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_bytealign_be (w7[3], 0, offset); c4[2] = hc_bytealign_be (w7[2], w7[3], offset); c4[1] = hc_bytealign_be (w7[1], w7[2], offset); c4[0] = hc_bytealign_be (w7[0], w7[1], offset); c3[3] = hc_bytealign_be (w6[3], w7[0], offset); c3[2] = hc_bytealign_be (w6[2], w6[3], offset); c3[1] = hc_bytealign_be (w6[1], w6[2], offset); c3[0] = hc_bytealign_be (w6[0], w6[1], offset); c2[3] = hc_bytealign_be (w5[3], w6[0], offset); c2[2] = hc_bytealign_be (w5[2], w5[3], offset); c2[1] = hc_bytealign_be (w5[1], w5[2], offset); c2[0] = hc_bytealign_be (w5[0], w5[1], offset); c1[3] = hc_bytealign_be (w4[3], w5[0], offset); c1[2] = hc_bytealign_be (w4[2], w4[3], offset); c1[1] = hc_bytealign_be (w4[1], w4[2], offset); c1[0] = hc_bytealign_be (w4[0], w4[1], offset); c0[3] = hc_bytealign_be (w3[3], w4[0], offset); c0[2] = hc_bytealign_be (w3[2], w3[3], offset); c0[1] = hc_bytealign_be (w3[1], w3[2], offset); c0[0] = hc_bytealign_be (w3[0], w3[1], offset); w7[3] = hc_bytealign_be (w2[3], w3[0], offset); w7[2] = hc_bytealign_be (w2[2], w2[3], offset); w7[1] = hc_bytealign_be (w2[1], w2[2], offset); w7[0] = hc_bytealign_be (w2[0], w2[1], offset); w6[3] = hc_bytealign_be (w1[3], w2[0], offset); w6[2] = hc_bytealign_be (w1[2], w1[3], offset); w6[1] = hc_bytealign_be (w1[1], w1[2], offset); w6[0] = hc_bytealign_be (w1[0], w1[1], offset); w5[3] = hc_bytealign_be (w0[3], w1[0], offset); w5[2] = hc_bytealign_be (w0[2], w0[3], offset); w5[1] = hc_bytealign_be (w0[1], w0[2], offset); w5[0] = hc_bytealign_be (w0[0], w0[1], offset); w4[3] = hc_bytealign_be ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_bytealign_be (w7[3], 0, offset); c4[3] = hc_bytealign_be (w7[2], w7[3], offset); c4[2] = hc_bytealign_be (w7[1], w7[2], offset); c4[1] = hc_bytealign_be (w7[0], w7[1], offset); c4[0] = hc_bytealign_be (w6[3], w7[0], offset); c3[3] = hc_bytealign_be (w6[2], w6[3], offset); c3[2] = hc_bytealign_be (w6[1], w6[2], offset); c3[1] = hc_bytealign_be (w6[0], w6[1], offset); c3[0] = hc_bytealign_be (w5[3], w6[0], offset); c2[3] = hc_bytealign_be (w5[2], w5[3], offset); c2[2] = hc_bytealign_be (w5[1], w5[2], offset); c2[1] = hc_bytealign_be (w5[0], w5[1], offset); c2[0] = hc_bytealign_be (w4[3], w5[0], offset); c1[3] = hc_bytealign_be (w4[2], w4[3], offset); c1[2] = hc_bytealign_be (w4[1], w4[2], offset); c1[1] = hc_bytealign_be (w4[0], w4[1], offset); c1[0] = hc_bytealign_be (w3[3], w4[0], offset); c0[3] = hc_bytealign_be (w3[2], w3[3], offset); c0[2] = hc_bytealign_be (w3[1], w3[2], offset); c0[1] = hc_bytealign_be (w3[0], w3[1], offset); c0[0] = hc_bytealign_be (w2[3], w3[0], offset); w7[3] = hc_bytealign_be (w2[2], w2[3], offset); w7[2] = hc_bytealign_be (w2[1], w2[2], offset); w7[1] = hc_bytealign_be (w2[0], w2[1], offset); w7[0] = hc_bytealign_be (w1[3], w2[0], offset); w6[3] = hc_bytealign_be (w1[2], w1[3], offset); w6[2] = hc_bytealign_be (w1[1], w1[2], offset); w6[1] = hc_bytealign_be (w1[0], w1[1], offset); w6[0] = hc_bytealign_be (w0[3], w1[0], offset); w5[3] = hc_bytealign_be (w0[2], w0[3], offset); w5[2] = hc_bytealign_be (w0[1], w0[2], offset); w5[1] = hc_bytealign_be (w0[0], w0[1], offset); w5[0] = hc_bytealign_be ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_bytealign_be (w7[3], 0, offset); c5[0] = hc_bytealign_be (w7[2], w7[3], offset); c4[3] = hc_bytealign_be (w7[1], w7[2], offset); c4[2] = hc_bytealign_be (w7[0], w7[1], offset); c4[1] = hc_bytealign_be (w6[3], w7[0], offset); c4[0] = hc_bytealign_be (w6[2], w6[3], offset); c3[3] = hc_bytealign_be (w6[1], w6[2], offset); c3[2] = hc_bytealign_be (w6[0], w6[1], offset); c3[1] = hc_bytealign_be (w5[3], w6[0], offset); c3[0] = hc_bytealign_be (w5[2], w5[3], offset); c2[3] = hc_bytealign_be (w5[1], w5[2], offset); c2[2] = hc_bytealign_be (w5[0], w5[1], offset); c2[1] = hc_bytealign_be (w4[3], w5[0], offset); c2[0] = hc_bytealign_be (w4[2], w4[3], offset); c1[3] = hc_bytealign_be (w4[1], w4[2], offset); c1[2] = hc_bytealign_be (w4[0], w4[1], offset); c1[1] = hc_bytealign_be (w3[3], w4[0], offset); c1[0] = hc_bytealign_be (w3[2], w3[3], offset); c0[3] = hc_bytealign_be (w3[1], w3[2], offset); c0[2] = hc_bytealign_be (w3[0], w3[1], offset); c0[1] = hc_bytealign_be (w2[3], w3[0], offset); c0[0] = hc_bytealign_be (w2[2], w2[3], offset); w7[3] = hc_bytealign_be (w2[1], w2[2], offset); w7[2] = hc_bytealign_be (w2[0], w2[1], offset); w7[1] = hc_bytealign_be (w1[3], w2[0], offset); w7[0] = hc_bytealign_be (w1[2], w1[3], offset); w6[3] = hc_bytealign_be (w1[1], w1[2], offset); w6[2] = hc_bytealign_be (w1[0], w1[1], offset); w6[1] = hc_bytealign_be (w0[3], w1[0], offset); w6[0] = hc_bytealign_be (w0[2], w0[3], offset); w5[3] = hc_bytealign_be (w0[1], w0[2], offset); w5[2] = hc_bytealign_be (w0[0], w0[1], offset); w5[1] = hc_bytealign_be ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_bytealign_be (w7[3], 0, offset); c5[1] = hc_bytealign_be (w7[2], w7[3], offset); c5[0] = hc_bytealign_be (w7[1], w7[2], offset); c4[3] = hc_bytealign_be (w7[0], w7[1], offset); c4[2] = hc_bytealign_be (w6[3], w7[0], offset); c4[1] = hc_bytealign_be (w6[2], w6[3], offset); c4[0] = hc_bytealign_be (w6[1], w6[2], offset); c3[3] = hc_bytealign_be (w6[0], w6[1], offset); c3[2] = hc_bytealign_be (w5[3], w6[0], offset); c3[1] = hc_bytealign_be (w5[2], w5[3], offset); c3[0] = hc_bytealign_be (w5[1], w5[2], offset); c2[3] = hc_bytealign_be (w5[0], w5[1], offset); c2[2] = hc_bytealign_be (w4[3], w5[0], offset); c2[1] = hc_bytealign_be (w4[2], w4[3], offset); c2[0] = hc_bytealign_be (w4[1], w4[2], offset); c1[3] = hc_bytealign_be (w4[0], w4[1], offset); c1[2] = hc_bytealign_be (w3[3], w4[0], offset); c1[1] = hc_bytealign_be (w3[2], w3[3], offset); c1[0] = hc_bytealign_be (w3[1], w3[2], offset); c0[3] = hc_bytealign_be (w3[0], w3[1], offset); c0[2] = hc_bytealign_be (w2[3], w3[0], offset); c0[1] = hc_bytealign_be (w2[2], w2[3], offset); c0[0] = hc_bytealign_be (w2[1], w2[2], offset); w7[3] = hc_bytealign_be (w2[0], w2[1], offset); w7[2] = hc_bytealign_be (w1[3], w2[0], offset); w7[1] = hc_bytealign_be (w1[2], w1[3], offset); w7[0] = hc_bytealign_be (w1[1], w1[2], offset); w6[3] = hc_bytealign_be (w1[0], w1[1], offset); w6[2] = hc_bytealign_be (w0[3], w1[0], offset); w6[1] = hc_bytealign_be (w0[2], w0[3], offset); w6[0] = hc_bytealign_be (w0[1], w0[2], offset); w5[3] = hc_bytealign_be (w0[0], w0[1], offset); w5[2] = hc_bytealign_be ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_bytealign_be (w7[3], 0, offset); c5[2] = hc_bytealign_be (w7[2], w7[3], offset); c5[1] = hc_bytealign_be (w7[1], w7[2], offset); c5[0] = hc_bytealign_be (w7[0], w7[1], offset); c4[3] = hc_bytealign_be (w6[3], w7[0], offset); c4[2] = hc_bytealign_be (w6[2], w6[3], offset); c4[1] = hc_bytealign_be (w6[1], w6[2], offset); c4[0] = hc_bytealign_be (w6[0], w6[1], offset); c3[3] = hc_bytealign_be (w5[3], w6[0], offset); c3[2] = hc_bytealign_be (w5[2], w5[3], offset); c3[1] = hc_bytealign_be (w5[1], w5[2], offset); c3[0] = hc_bytealign_be (w5[0], w5[1], offset); c2[3] = hc_bytealign_be (w4[3], w5[0], offset); c2[2] = hc_bytealign_be (w4[2], w4[3], offset); c2[1] = hc_bytealign_be (w4[1], w4[2], offset); c2[0] = hc_bytealign_be (w4[0], w4[1], offset); c1[3] = hc_bytealign_be (w3[3], w4[0], offset); c1[2] = hc_bytealign_be (w3[2], w3[3], offset); c1[1] = hc_bytealign_be (w3[1], w3[2], offset); c1[0] = hc_bytealign_be (w3[0], w3[1], offset); c0[3] = hc_bytealign_be (w2[3], w3[0], offset); c0[2] = hc_bytealign_be (w2[2], w2[3], offset); c0[1] = hc_bytealign_be (w2[1], w2[2], offset); c0[0] = hc_bytealign_be (w2[0], w2[1], offset); w7[3] = hc_bytealign_be (w1[3], w2[0], offset); w7[2] = hc_bytealign_be (w1[2], w1[3], offset); w7[1] = hc_bytealign_be (w1[1], w1[2], offset); w7[0] = hc_bytealign_be (w1[0], w1[1], offset); w6[3] = hc_bytealign_be (w0[3], w1[0], offset); w6[2] = hc_bytealign_be (w0[2], w0[3], offset); w6[1] = hc_bytealign_be (w0[1], w0[2], offset); w6[0] = hc_bytealign_be (w0[0], w0[1], offset); w5[3] = hc_bytealign_be ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_bytealign_be (w7[3], 0, offset); c5[3] = hc_bytealign_be (w7[2], w7[3], offset); c5[2] = hc_bytealign_be (w7[1], w7[2], offset); c5[1] = hc_bytealign_be (w7[0], w7[1], offset); c5[0] = hc_bytealign_be (w6[3], w7[0], offset); c4[3] = hc_bytealign_be (w6[2], w6[3], offset); c4[2] = hc_bytealign_be (w6[1], w6[2], offset); c4[1] = hc_bytealign_be (w6[0], w6[1], offset); c4[0] = hc_bytealign_be (w5[3], w6[0], offset); c3[3] = hc_bytealign_be (w5[2], w5[3], offset); c3[2] = hc_bytealign_be (w5[1], w5[2], offset); c3[1] = hc_bytealign_be (w5[0], w5[1], offset); c3[0] = hc_bytealign_be (w4[3], w5[0], offset); c2[3] = hc_bytealign_be (w4[2], w4[3], offset); c2[2] = hc_bytealign_be (w4[1], w4[2], offset); c2[1] = hc_bytealign_be (w4[0], w4[1], offset); c2[0] = hc_bytealign_be (w3[3], w4[0], offset); c1[3] = hc_bytealign_be (w3[2], w3[3], offset); c1[2] = hc_bytealign_be (w3[1], w3[2], offset); c1[1] = hc_bytealign_be (w3[0], w3[1], offset); c1[0] = hc_bytealign_be (w2[3], w3[0], offset); c0[3] = hc_bytealign_be (w2[2], w2[3], offset); c0[2] = hc_bytealign_be (w2[1], w2[2], offset); c0[1] = hc_bytealign_be (w2[0], w2[1], offset); c0[0] = hc_bytealign_be (w1[3], w2[0], offset); w7[3] = hc_bytealign_be (w1[2], w1[3], offset); w7[2] = hc_bytealign_be (w1[1], w1[2], offset); w7[1] = hc_bytealign_be (w1[0], w1[1], offset); w7[0] = hc_bytealign_be (w0[3], w1[0], offset); w6[3] = hc_bytealign_be (w0[2], w0[3], offset); w6[2] = hc_bytealign_be (w0[1], w0[2], offset); w6[1] = hc_bytealign_be (w0[0], w0[1], offset); w6[0] = hc_bytealign_be ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_bytealign_be (w7[3], 0, offset); c6[0] = hc_bytealign_be (w7[2], w7[3], offset); c5[3] = hc_bytealign_be (w7[1], w7[2], offset); c5[2] = hc_bytealign_be (w7[0], w7[1], offset); c5[1] = hc_bytealign_be (w6[3], w7[0], offset); c5[0] = hc_bytealign_be (w6[2], w6[3], offset); c4[3] = hc_bytealign_be (w6[1], w6[2], offset); c4[2] = hc_bytealign_be (w6[0], w6[1], offset); c4[1] = hc_bytealign_be (w5[3], w6[0], offset); c4[0] = hc_bytealign_be (w5[2], w5[3], offset); c3[3] = hc_bytealign_be (w5[1], w5[2], offset); c3[2] = hc_bytealign_be (w5[0], w5[1], offset); c3[1] = hc_bytealign_be (w4[3], w5[0], offset); c3[0] = hc_bytealign_be (w4[2], w4[3], offset); c2[3] = hc_bytealign_be (w4[1], w4[2], offset); c2[2] = hc_bytealign_be (w4[0], w4[1], offset); c2[1] = hc_bytealign_be (w3[3], w4[0], offset); c2[0] = hc_bytealign_be (w3[2], w3[3], offset); c1[3] = hc_bytealign_be (w3[1], w3[2], offset); c1[2] = hc_bytealign_be (w3[0], w3[1], offset); c1[1] = hc_bytealign_be (w2[3], w3[0], offset); c1[0] = hc_bytealign_be (w2[2], w2[3], offset); c0[3] = hc_bytealign_be (w2[1], w2[2], offset); c0[2] = hc_bytealign_be (w2[0], w2[1], offset); c0[1] = hc_bytealign_be (w1[3], w2[0], offset); c0[0] = hc_bytealign_be (w1[2], w1[3], offset); w7[3] = hc_bytealign_be (w1[1], w1[2], offset); w7[2] = hc_bytealign_be (w1[0], w1[1], offset); w7[1] = hc_bytealign_be (w0[3], w1[0], offset); w7[0] = hc_bytealign_be (w0[2], w0[3], offset); w6[3] = hc_bytealign_be (w0[1], w0[2], offset); w6[2] = hc_bytealign_be (w0[0], w0[1], offset); w6[1] = hc_bytealign_be ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_bytealign_be (w7[3], 0, offset); c6[1] = hc_bytealign_be (w7[2], w7[3], offset); c6[0] = hc_bytealign_be (w7[1], w7[2], offset); c5[3] = hc_bytealign_be (w7[0], w7[1], offset); c5[2] = hc_bytealign_be (w6[3], w7[0], offset); c5[1] = hc_bytealign_be (w6[2], w6[3], offset); c5[0] = hc_bytealign_be (w6[1], w6[2], offset); c4[3] = hc_bytealign_be (w6[0], w6[1], offset); c4[2] = hc_bytealign_be (w5[3], w6[0], offset); c4[1] = hc_bytealign_be (w5[2], w5[3], offset); c4[0] = hc_bytealign_be (w5[1], w5[2], offset); c3[3] = hc_bytealign_be (w5[0], w5[1], offset); c3[2] = hc_bytealign_be (w4[3], w5[0], offset); c3[1] = hc_bytealign_be (w4[2], w4[3], offset); c3[0] = hc_bytealign_be (w4[1], w4[2], offset); c2[3] = hc_bytealign_be (w4[0], w4[1], offset); c2[2] = hc_bytealign_be (w3[3], w4[0], offset); c2[1] = hc_bytealign_be (w3[2], w3[3], offset); c2[0] = hc_bytealign_be (w3[1], w3[2], offset); c1[3] = hc_bytealign_be (w3[0], w3[1], offset); c1[2] = hc_bytealign_be (w2[3], w3[0], offset); c1[1] = hc_bytealign_be (w2[2], w2[3], offset); c1[0] = hc_bytealign_be (w2[1], w2[2], offset); c0[3] = hc_bytealign_be (w2[0], w2[1], offset); c0[2] = hc_bytealign_be (w1[3], w2[0], offset); c0[1] = hc_bytealign_be (w1[2], w1[3], offset); c0[0] = hc_bytealign_be (w1[1], w1[2], offset); w7[3] = hc_bytealign_be (w1[0], w1[1], offset); w7[2] = hc_bytealign_be (w0[3], w1[0], offset); w7[1] = hc_bytealign_be (w0[2], w0[3], offset); w7[0] = hc_bytealign_be (w0[1], w0[2], offset); w6[3] = hc_bytealign_be (w0[0], w0[1], offset); w6[2] = hc_bytealign_be ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_bytealign_be (w7[3], 0, offset); c6[2] = hc_bytealign_be (w7[2], w7[3], offset); c6[1] = hc_bytealign_be (w7[1], w7[2], offset); c6[0] = hc_bytealign_be (w7[0], w7[1], offset); c5[3] = hc_bytealign_be (w6[3], w7[0], offset); c5[2] = hc_bytealign_be (w6[2], w6[3], offset); c5[1] = hc_bytealign_be (w6[1], w6[2], offset); c5[0] = hc_bytealign_be (w6[0], w6[1], offset); c4[3] = hc_bytealign_be (w5[3], w6[0], offset); c4[2] = hc_bytealign_be (w5[2], w5[3], offset); c4[1] = hc_bytealign_be (w5[1], w5[2], offset); c4[0] = hc_bytealign_be (w5[0], w5[1], offset); c3[3] = hc_bytealign_be (w4[3], w5[0], offset); c3[2] = hc_bytealign_be (w4[2], w4[3], offset); c3[1] = hc_bytealign_be (w4[1], w4[2], offset); c3[0] = hc_bytealign_be (w4[0], w4[1], offset); c2[3] = hc_bytealign_be (w3[3], w4[0], offset); c2[2] = hc_bytealign_be (w3[2], w3[3], offset); c2[1] = hc_bytealign_be (w3[1], w3[2], offset); c2[0] = hc_bytealign_be (w3[0], w3[1], offset); c1[3] = hc_bytealign_be (w2[3], w3[0], offset); c1[2] = hc_bytealign_be (w2[2], w2[3], offset); c1[1] = hc_bytealign_be (w2[1], w2[2], offset); c1[0] = hc_bytealign_be (w2[0], w2[1], offset); c0[3] = hc_bytealign_be (w1[3], w2[0], offset); c0[2] = hc_bytealign_be (w1[2], w1[3], offset); c0[1] = hc_bytealign_be (w1[1], w1[2], offset); c0[0] = hc_bytealign_be (w1[0], w1[1], offset); w7[3] = hc_bytealign_be (w0[3], w1[0], offset); w7[2] = hc_bytealign_be (w0[2], w0[3], offset); w7[1] = hc_bytealign_be (w0[1], w0[2], offset); w7[0] = hc_bytealign_be (w0[0], w0[1], offset); w6[3] = hc_bytealign_be ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_bytealign_be (w7[3], 0, offset); c6[3] = hc_bytealign_be (w7[2], w7[3], offset); c6[2] = hc_bytealign_be (w7[1], w7[2], offset); c6[1] = hc_bytealign_be (w7[0], w7[1], offset); c6[0] = hc_bytealign_be (w6[3], w7[0], offset); c5[3] = hc_bytealign_be (w6[2], w6[3], offset); c5[2] = hc_bytealign_be (w6[1], w6[2], offset); c5[1] = hc_bytealign_be (w6[0], w6[1], offset); c5[0] = hc_bytealign_be (w5[3], w6[0], offset); c4[3] = hc_bytealign_be (w5[2], w5[3], offset); c4[2] = hc_bytealign_be (w5[1], w5[2], offset); c4[1] = hc_bytealign_be (w5[0], w5[1], offset); c4[0] = hc_bytealign_be (w4[3], w5[0], offset); c3[3] = hc_bytealign_be (w4[2], w4[3], offset); c3[2] = hc_bytealign_be (w4[1], w4[2], offset); c3[1] = hc_bytealign_be (w4[0], w4[1], offset); c3[0] = hc_bytealign_be (w3[3], w4[0], offset); c2[3] = hc_bytealign_be (w3[2], w3[3], offset); c2[2] = hc_bytealign_be (w3[1], w3[2], offset); c2[1] = hc_bytealign_be (w3[0], w3[1], offset); c2[0] = hc_bytealign_be (w2[3], w3[0], offset); c1[3] = hc_bytealign_be (w2[2], w2[3], offset); c1[2] = hc_bytealign_be (w2[1], w2[2], offset); c1[1] = hc_bytealign_be (w2[0], w2[1], offset); c1[0] = hc_bytealign_be (w1[3], w2[0], offset); c0[3] = hc_bytealign_be (w1[2], w1[3], offset); c0[2] = hc_bytealign_be (w1[1], w1[2], offset); c0[1] = hc_bytealign_be (w1[0], w1[1], offset); c0[0] = hc_bytealign_be (w0[3], w1[0], offset); w7[3] = hc_bytealign_be (w0[2], w0[3], offset); w7[2] = hc_bytealign_be (w0[1], w0[2], offset); w7[1] = hc_bytealign_be (w0[0], w0[1], offset); w7[0] = hc_bytealign_be ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_bytealign_be (w7[3], 0, offset); c7[0] = hc_bytealign_be (w7[2], w7[3], offset); c6[3] = hc_bytealign_be (w7[1], w7[2], offset); c6[2] = hc_bytealign_be (w7[0], w7[1], offset); c6[1] = hc_bytealign_be (w6[3], w7[0], offset); c6[0] = hc_bytealign_be (w6[2], w6[3], offset); c5[3] = hc_bytealign_be (w6[1], w6[2], offset); c5[2] = hc_bytealign_be (w6[0], w6[1], offset); c5[1] = hc_bytealign_be (w5[3], w6[0], offset); c5[0] = hc_bytealign_be (w5[2], w5[3], offset); c4[3] = hc_bytealign_be (w5[1], w5[2], offset); c4[2] = hc_bytealign_be (w5[0], w5[1], offset); c4[1] = hc_bytealign_be (w4[3], w5[0], offset); c4[0] = hc_bytealign_be (w4[2], w4[3], offset); c3[3] = hc_bytealign_be (w4[1], w4[2], offset); c3[2] = hc_bytealign_be (w4[0], w4[1], offset); c3[1] = hc_bytealign_be (w3[3], w4[0], offset); c3[0] = hc_bytealign_be (w3[2], w3[3], offset); c2[3] = hc_bytealign_be (w3[1], w3[2], offset); c2[2] = hc_bytealign_be (w3[0], w3[1], offset); c2[1] = hc_bytealign_be (w2[3], w3[0], offset); c2[0] = hc_bytealign_be (w2[2], w2[3], offset); c1[3] = hc_bytealign_be (w2[1], w2[2], offset); c1[2] = hc_bytealign_be (w2[0], w2[1], offset); c1[1] = hc_bytealign_be (w1[3], w2[0], offset); c1[0] = hc_bytealign_be (w1[2], w1[3], offset); c0[3] = hc_bytealign_be (w1[1], w1[2], offset); c0[2] = hc_bytealign_be (w1[0], w1[1], offset); c0[1] = hc_bytealign_be (w0[3], w1[0], offset); c0[0] = hc_bytealign_be (w0[2], w0[3], offset); w7[3] = hc_bytealign_be (w0[1], w0[2], offset); w7[2] = hc_bytealign_be (w0[0], w0[1], offset); w7[1] = hc_bytealign_be ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_bytealign_be (w7[3], 0, offset); c7[1] = hc_bytealign_be (w7[2], w7[3], offset); c7[0] = hc_bytealign_be (w7[1], w7[2], offset); c6[3] = hc_bytealign_be (w7[0], w7[1], offset); c6[2] = hc_bytealign_be (w6[3], w7[0], offset); c6[1] = hc_bytealign_be (w6[2], w6[3], offset); c6[0] = hc_bytealign_be (w6[1], w6[2], offset); c5[3] = hc_bytealign_be (w6[0], w6[1], offset); c5[2] = hc_bytealign_be (w5[3], w6[0], offset); c5[1] = hc_bytealign_be (w5[2], w5[3], offset); c5[0] = hc_bytealign_be (w5[1], w5[2], offset); c4[3] = hc_bytealign_be (w5[0], w5[1], offset); c4[2] = hc_bytealign_be (w4[3], w5[0], offset); c4[1] = hc_bytealign_be (w4[2], w4[3], offset); c4[0] = hc_bytealign_be (w4[1], w4[2], offset); c3[3] = hc_bytealign_be (w4[0], w4[1], offset); c3[2] = hc_bytealign_be (w3[3], w4[0], offset); c3[1] = hc_bytealign_be (w3[2], w3[3], offset); c3[0] = hc_bytealign_be (w3[1], w3[2], offset); c2[3] = hc_bytealign_be (w3[0], w3[1], offset); c2[2] = hc_bytealign_be (w2[3], w3[0], offset); c2[1] = hc_bytealign_be (w2[2], w2[3], offset); c2[0] = hc_bytealign_be (w2[1], w2[2], offset); c1[3] = hc_bytealign_be (w2[0], w2[1], offset); c1[2] = hc_bytealign_be (w1[3], w2[0], offset); c1[1] = hc_bytealign_be (w1[2], w1[3], offset); c1[0] = hc_bytealign_be (w1[1], w1[2], offset); c0[3] = hc_bytealign_be (w1[0], w1[1], offset); c0[2] = hc_bytealign_be (w0[3], w1[0], offset); c0[1] = hc_bytealign_be (w0[2], w0[3], offset); c0[0] = hc_bytealign_be (w0[1], w0[2], offset); w7[3] = hc_bytealign_be (w0[0], w0[1], offset); w7[2] = hc_bytealign_be ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_bytealign_be (w7[3], 0, offset); c7[2] = hc_bytealign_be (w7[2], w7[3], offset); c7[1] = hc_bytealign_be (w7[1], w7[2], offset); c7[0] = hc_bytealign_be (w7[0], w7[1], offset); c6[3] = hc_bytealign_be (w6[3], w7[0], offset); c6[2] = hc_bytealign_be (w6[2], w6[3], offset); c6[1] = hc_bytealign_be (w6[1], w6[2], offset); c6[0] = hc_bytealign_be (w6[0], w6[1], offset); c5[3] = hc_bytealign_be (w5[3], w6[0], offset); c5[2] = hc_bytealign_be (w5[2], w5[3], offset); c5[1] = hc_bytealign_be (w5[1], w5[2], offset); c5[0] = hc_bytealign_be (w5[0], w5[1], offset); c4[3] = hc_bytealign_be (w4[3], w5[0], offset); c4[2] = hc_bytealign_be (w4[2], w4[3], offset); c4[1] = hc_bytealign_be (w4[1], w4[2], offset); c4[0] = hc_bytealign_be (w4[0], w4[1], offset); c3[3] = hc_bytealign_be (w3[3], w4[0], offset); c3[2] = hc_bytealign_be (w3[2], w3[3], offset); c3[1] = hc_bytealign_be (w3[1], w3[2], offset); c3[0] = hc_bytealign_be (w3[0], w3[1], offset); c2[3] = hc_bytealign_be (w2[3], w3[0], offset); c2[2] = hc_bytealign_be (w2[2], w2[3], offset); c2[1] = hc_bytealign_be (w2[1], w2[2], offset); c2[0] = hc_bytealign_be (w2[0], w2[1], offset); c1[3] = hc_bytealign_be (w1[3], w2[0], offset); c1[2] = hc_bytealign_be (w1[2], w1[3], offset); c1[1] = hc_bytealign_be (w1[1], w1[2], offset); c1[0] = hc_bytealign_be (w1[0], w1[1], offset); c0[3] = hc_bytealign_be (w0[3], w1[0], offset); c0[2] = hc_bytealign_be (w0[2], w0[3], offset); c0[1] = hc_bytealign_be (w0[1], w0[2], offset); c0[0] = hc_bytealign_be (w0[0], w0[1], offset); w7[3] = hc_bytealign_be ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm ( 0, w7[3], selector); w7[3] = hc_byte_perm (w7[3], w7[2], selector); w7[2] = hc_byte_perm (w7[2], w7[1], selector); w7[1] = hc_byte_perm (w7[1], w7[0], selector); w7[0] = hc_byte_perm (w7[0], w6[3], selector); w6[3] = hc_byte_perm (w6[3], w6[2], selector); w6[2] = hc_byte_perm (w6[2], w6[1], selector); w6[1] = hc_byte_perm (w6[1], w6[0], selector); w6[0] = hc_byte_perm (w6[0], w5[3], selector); w5[3] = hc_byte_perm (w5[3], w5[2], selector); w5[2] = hc_byte_perm (w5[2], w5[1], selector); w5[1] = hc_byte_perm (w5[1], w5[0], selector); w5[0] = hc_byte_perm (w5[0], w4[3], selector); w4[3] = hc_byte_perm (w4[3], w4[2], selector); w4[2] = hc_byte_perm (w4[2], w4[1], selector); w4[1] = hc_byte_perm (w4[1], w4[0], selector); w4[0] = hc_byte_perm (w4[0], w3[3], selector); w3[3] = hc_byte_perm (w3[3], w3[2], selector); w3[2] = hc_byte_perm (w3[2], w3[1], selector); w3[1] = hc_byte_perm (w3[1], w3[0], selector); w3[0] = hc_byte_perm (w3[0], w2[3], selector); w2[3] = hc_byte_perm (w2[3], w2[2], selector); w2[2] = hc_byte_perm (w2[2], w2[1], selector); w2[1] = hc_byte_perm (w2[1], w2[0], selector); w2[0] = hc_byte_perm (w2[0], w1[3], selector); w1[3] = hc_byte_perm (w1[3], w1[2], selector); w1[2] = hc_byte_perm (w1[2], w1[1], selector); w1[1] = hc_byte_perm (w1[1], w1[0], selector); w1[0] = hc_byte_perm (w1[0], w0[3], selector); w0[3] = hc_byte_perm (w0[3], w0[2], selector); w0[2] = hc_byte_perm (w0[2], w0[1], selector); w0[1] = hc_byte_perm (w0[1], w0[0], selector); w0[0] = hc_byte_perm (w0[0], 0, selector); break; case 1: c0[1] = hc_byte_perm ( 0, w7[3], selector); c0[0] = hc_byte_perm (w7[3], w7[2], selector); w7[3] = hc_byte_perm (w7[2], w7[1], selector); w7[2] = hc_byte_perm (w7[1], w7[0], selector); w7[1] = hc_byte_perm (w7[0], w6[3], selector); w7[0] = hc_byte_perm (w6[3], w6[2], selector); w6[3] = hc_byte_perm (w6[2], w6[1], selector); w6[2] = hc_byte_perm (w6[1], w6[0], selector); w6[1] = hc_byte_perm (w6[0], w5[3], selector); w6[0] = hc_byte_perm (w5[3], w5[2], selector); w5[3] = hc_byte_perm (w5[2], w5[1], selector); w5[2] = hc_byte_perm (w5[1], w5[0], selector); w5[1] = hc_byte_perm (w5[0], w4[3], selector); w5[0] = hc_byte_perm (w4[3], w4[2], selector); w4[3] = hc_byte_perm (w4[2], w4[1], selector); w4[2] = hc_byte_perm (w4[1], w4[0], selector); w4[1] = hc_byte_perm (w4[0], w3[3], selector); w4[0] = hc_byte_perm (w3[3], w3[2], selector); w3[3] = hc_byte_perm (w3[2], w3[1], selector); w3[2] = hc_byte_perm (w3[1], w3[0], selector); w3[1] = hc_byte_perm (w3[0], w2[3], selector); w3[0] = hc_byte_perm (w2[3], w2[2], selector); w2[3] = hc_byte_perm (w2[2], w2[1], selector); w2[2] = hc_byte_perm (w2[1], w2[0], selector); w2[1] = hc_byte_perm (w2[0], w1[3], selector); w2[0] = hc_byte_perm (w1[3], w1[2], selector); w1[3] = hc_byte_perm (w1[2], w1[1], selector); w1[2] = hc_byte_perm (w1[1], w1[0], selector); w1[1] = hc_byte_perm (w1[0], w0[3], selector); w1[0] = hc_byte_perm (w0[3], w0[2], selector); w0[3] = hc_byte_perm (w0[2], w0[1], selector); w0[2] = hc_byte_perm (w0[1], w0[0], selector); w0[1] = hc_byte_perm (w0[0], 0, selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm ( 0, w7[3], selector); c0[1] = hc_byte_perm (w7[3], w7[2], selector); c0[0] = hc_byte_perm (w7[2], w7[1], selector); w7[3] = hc_byte_perm (w7[1], w7[0], selector); w7[2] = hc_byte_perm (w7[0], w6[3], selector); w7[1] = hc_byte_perm (w6[3], w6[2], selector); w7[0] = hc_byte_perm (w6[2], w6[1], selector); w6[3] = hc_byte_perm (w6[1], w6[0], selector); w6[2] = hc_byte_perm (w6[0], w5[3], selector); w6[1] = hc_byte_perm (w5[3], w5[2], selector); w6[0] = hc_byte_perm (w5[2], w5[1], selector); w5[3] = hc_byte_perm (w5[1], w5[0], selector); w5[2] = hc_byte_perm (w5[0], w4[3], selector); w5[1] = hc_byte_perm (w4[3], w4[2], selector); w5[0] = hc_byte_perm (w4[2], w4[1], selector); w4[3] = hc_byte_perm (w4[1], w4[0], selector); w4[2] = hc_byte_perm (w4[0], w3[3], selector); w4[1] = hc_byte_perm (w3[3], w3[2], selector); w4[0] = hc_byte_perm (w3[2], w3[1], selector); w3[3] = hc_byte_perm (w3[1], w3[0], selector); w3[2] = hc_byte_perm (w3[0], w2[3], selector); w3[1] = hc_byte_perm (w2[3], w2[2], selector); w3[0] = hc_byte_perm (w2[2], w2[1], selector); w2[3] = hc_byte_perm (w2[1], w2[0], selector); w2[2] = hc_byte_perm (w2[0], w1[3], selector); w2[1] = hc_byte_perm (w1[3], w1[2], selector); w2[0] = hc_byte_perm (w1[2], w1[1], selector); w1[3] = hc_byte_perm (w1[1], w1[0], selector); w1[2] = hc_byte_perm (w1[0], w0[3], selector); w1[1] = hc_byte_perm (w0[3], w0[2], selector); w1[0] = hc_byte_perm (w0[2], w0[1], selector); w0[3] = hc_byte_perm (w0[1], w0[0], selector); w0[2] = hc_byte_perm (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm ( 0, w7[3], selector); c0[2] = hc_byte_perm (w7[3], w7[2], selector); c0[1] = hc_byte_perm (w7[2], w7[1], selector); c0[0] = hc_byte_perm (w7[1], w7[0], selector); w7[3] = hc_byte_perm (w7[0], w6[3], selector); w7[2] = hc_byte_perm (w6[3], w6[2], selector); w7[1] = hc_byte_perm (w6[2], w6[1], selector); w7[0] = hc_byte_perm (w6[1], w6[0], selector); w6[3] = hc_byte_perm (w6[0], w5[3], selector); w6[2] = hc_byte_perm (w5[3], w5[2], selector); w6[1] = hc_byte_perm (w5[2], w5[1], selector); w6[0] = hc_byte_perm (w5[1], w5[0], selector); w5[3] = hc_byte_perm (w5[0], w4[3], selector); w5[2] = hc_byte_perm (w4[3], w4[2], selector); w5[1] = hc_byte_perm (w4[2], w4[1], selector); w5[0] = hc_byte_perm (w4[1], w4[0], selector); w4[3] = hc_byte_perm (w4[0], w3[3], selector); w4[2] = hc_byte_perm (w3[3], w3[2], selector); w4[1] = hc_byte_perm (w3[2], w3[1], selector); w4[0] = hc_byte_perm (w3[1], w3[0], selector); w3[3] = hc_byte_perm (w3[0], w2[3], selector); w3[2] = hc_byte_perm (w2[3], w2[2], selector); w3[1] = hc_byte_perm (w2[2], w2[1], selector); w3[0] = hc_byte_perm (w2[1], w2[0], selector); w2[3] = hc_byte_perm (w2[0], w1[3], selector); w2[2] = hc_byte_perm (w1[3], w1[2], selector); w2[1] = hc_byte_perm (w1[2], w1[1], selector); w2[0] = hc_byte_perm (w1[1], w1[0], selector); w1[3] = hc_byte_perm (w1[0], w0[3], selector); w1[2] = hc_byte_perm (w0[3], w0[2], selector); w1[1] = hc_byte_perm (w0[2], w0[1], selector); w1[0] = hc_byte_perm (w0[1], w0[0], selector); w0[3] = hc_byte_perm (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm ( 0, w7[3], selector); c0[3] = hc_byte_perm (w7[3], w7[2], selector); c0[2] = hc_byte_perm (w7[2], w7[1], selector); c0[1] = hc_byte_perm (w7[1], w7[0], selector); c0[0] = hc_byte_perm (w7[0], w6[3], selector); w7[3] = hc_byte_perm (w6[3], w6[2], selector); w7[2] = hc_byte_perm (w6[2], w6[1], selector); w7[1] = hc_byte_perm (w6[1], w6[0], selector); w7[0] = hc_byte_perm (w6[0], w5[3], selector); w6[3] = hc_byte_perm (w5[3], w5[2], selector); w6[2] = hc_byte_perm (w5[2], w5[1], selector); w6[1] = hc_byte_perm (w5[1], w5[0], selector); w6[0] = hc_byte_perm (w5[0], w4[3], selector); w5[3] = hc_byte_perm (w4[3], w4[2], selector); w5[2] = hc_byte_perm (w4[2], w4[1], selector); w5[1] = hc_byte_perm (w4[1], w4[0], selector); w5[0] = hc_byte_perm (w4[0], w3[3], selector); w4[3] = hc_byte_perm (w3[3], w3[2], selector); w4[2] = hc_byte_perm (w3[2], w3[1], selector); w4[1] = hc_byte_perm (w3[1], w3[0], selector); w4[0] = hc_byte_perm (w3[0], w2[3], selector); w3[3] = hc_byte_perm (w2[3], w2[2], selector); w3[2] = hc_byte_perm (w2[2], w2[1], selector); w3[1] = hc_byte_perm (w2[1], w2[0], selector); w3[0] = hc_byte_perm (w2[0], w1[3], selector); w2[3] = hc_byte_perm (w1[3], w1[2], selector); w2[2] = hc_byte_perm (w1[2], w1[1], selector); w2[1] = hc_byte_perm (w1[1], w1[0], selector); w2[0] = hc_byte_perm (w1[0], w0[3], selector); w1[3] = hc_byte_perm (w0[3], w0[2], selector); w1[2] = hc_byte_perm (w0[2], w0[1], selector); w1[1] = hc_byte_perm (w0[1], w0[0], selector); w1[0] = hc_byte_perm (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm ( 0, w7[3], selector); c1[0] = hc_byte_perm (w7[3], w7[2], selector); c0[3] = hc_byte_perm (w7[2], w7[1], selector); c0[2] = hc_byte_perm (w7[1], w7[0], selector); c0[1] = hc_byte_perm (w7[0], w6[3], selector); c0[0] = hc_byte_perm (w6[3], w6[2], selector); w7[3] = hc_byte_perm (w6[2], w6[1], selector); w7[2] = hc_byte_perm (w6[1], w6[0], selector); w7[1] = hc_byte_perm (w6[0], w5[3], selector); w7[0] = hc_byte_perm (w5[3], w5[2], selector); w6[3] = hc_byte_perm (w5[2], w5[1], selector); w6[2] = hc_byte_perm (w5[1], w5[0], selector); w6[1] = hc_byte_perm (w5[0], w4[3], selector); w6[0] = hc_byte_perm (w4[3], w4[2], selector); w5[3] = hc_byte_perm (w4[2], w4[1], selector); w5[2] = hc_byte_perm (w4[1], w4[0], selector); w5[1] = hc_byte_perm (w4[0], w3[3], selector); w5[0] = hc_byte_perm (w3[3], w3[2], selector); w4[3] = hc_byte_perm (w3[2], w3[1], selector); w4[2] = hc_byte_perm (w3[1], w3[0], selector); w4[1] = hc_byte_perm (w3[0], w2[3], selector); w4[0] = hc_byte_perm (w2[3], w2[2], selector); w3[3] = hc_byte_perm (w2[2], w2[1], selector); w3[2] = hc_byte_perm (w2[1], w2[0], selector); w3[1] = hc_byte_perm (w2[0], w1[3], selector); w3[0] = hc_byte_perm (w1[3], w1[2], selector); w2[3] = hc_byte_perm (w1[2], w1[1], selector); w2[2] = hc_byte_perm (w1[1], w1[0], selector); w2[1] = hc_byte_perm (w1[0], w0[3], selector); w2[0] = hc_byte_perm (w0[3], w0[2], selector); w1[3] = hc_byte_perm (w0[2], w0[1], selector); w1[2] = hc_byte_perm (w0[1], w0[0], selector); w1[1] = hc_byte_perm (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm ( 0, w7[3], selector); c1[1] = hc_byte_perm (w7[3], w7[2], selector); c1[0] = hc_byte_perm (w7[2], w7[1], selector); c0[3] = hc_byte_perm (w7[1], w7[0], selector); c0[2] = hc_byte_perm (w7[0], w6[3], selector); c0[1] = hc_byte_perm (w6[3], w6[2], selector); c0[0] = hc_byte_perm (w6[2], w6[1], selector); w7[3] = hc_byte_perm (w6[1], w6[0], selector); w7[2] = hc_byte_perm (w6[0], w5[3], selector); w7[1] = hc_byte_perm (w5[3], w5[2], selector); w7[0] = hc_byte_perm (w5[2], w5[1], selector); w6[3] = hc_byte_perm (w5[1], w5[0], selector); w6[2] = hc_byte_perm (w5[0], w4[3], selector); w6[1] = hc_byte_perm (w4[3], w4[2], selector); w6[0] = hc_byte_perm (w4[2], w4[1], selector); w5[3] = hc_byte_perm (w4[1], w4[0], selector); w5[2] = hc_byte_perm (w4[0], w3[3], selector); w5[1] = hc_byte_perm (w3[3], w3[2], selector); w5[0] = hc_byte_perm (w3[2], w3[1], selector); w4[3] = hc_byte_perm (w3[1], w3[0], selector); w4[2] = hc_byte_perm (w3[0], w2[3], selector); w4[1] = hc_byte_perm (w2[3], w2[2], selector); w4[0] = hc_byte_perm (w2[2], w2[1], selector); w3[3] = hc_byte_perm (w2[1], w2[0], selector); w3[2] = hc_byte_perm (w2[0], w1[3], selector); w3[1] = hc_byte_perm (w1[3], w1[2], selector); w3[0] = hc_byte_perm (w1[2], w1[1], selector); w2[3] = hc_byte_perm (w1[1], w1[0], selector); w2[2] = hc_byte_perm (w1[0], w0[3], selector); w2[1] = hc_byte_perm (w0[3], w0[2], selector); w2[0] = hc_byte_perm (w0[2], w0[1], selector); w1[3] = hc_byte_perm (w0[1], w0[0], selector); w1[2] = hc_byte_perm (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm ( 0, w7[3], selector); c1[2] = hc_byte_perm (w7[3], w7[2], selector); c1[1] = hc_byte_perm (w7[2], w7[1], selector); c1[0] = hc_byte_perm (w7[1], w7[0], selector); c0[3] = hc_byte_perm (w7[0], w6[3], selector); c0[2] = hc_byte_perm (w6[3], w6[2], selector); c0[1] = hc_byte_perm (w6[2], w6[1], selector); c0[0] = hc_byte_perm (w6[1], w6[0], selector); w7[3] = hc_byte_perm (w6[0], w5[3], selector); w7[2] = hc_byte_perm (w5[3], w5[2], selector); w7[1] = hc_byte_perm (w5[2], w5[1], selector); w7[0] = hc_byte_perm (w5[1], w5[0], selector); w6[3] = hc_byte_perm (w5[0], w4[3], selector); w6[2] = hc_byte_perm (w4[3], w4[2], selector); w6[1] = hc_byte_perm (w4[2], w4[1], selector); w6[0] = hc_byte_perm (w4[1], w4[0], selector); w5[3] = hc_byte_perm (w4[0], w3[3], selector); w5[2] = hc_byte_perm (w3[3], w3[2], selector); w5[1] = hc_byte_perm (w3[2], w3[1], selector); w5[0] = hc_byte_perm (w3[1], w3[0], selector); w4[3] = hc_byte_perm (w3[0], w2[3], selector); w4[2] = hc_byte_perm (w2[3], w2[2], selector); w4[1] = hc_byte_perm (w2[2], w2[1], selector); w4[0] = hc_byte_perm (w2[1], w2[0], selector); w3[3] = hc_byte_perm (w2[0], w1[3], selector); w3[2] = hc_byte_perm (w1[3], w1[2], selector); w3[1] = hc_byte_perm (w1[2], w1[1], selector); w3[0] = hc_byte_perm (w1[1], w1[0], selector); w2[3] = hc_byte_perm (w1[0], w0[3], selector); w2[2] = hc_byte_perm (w0[3], w0[2], selector); w2[1] = hc_byte_perm (w0[2], w0[1], selector); w2[0] = hc_byte_perm (w0[1], w0[0], selector); w1[3] = hc_byte_perm (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm ( 0, w7[3], selector); c1[3] = hc_byte_perm (w7[3], w7[2], selector); c1[2] = hc_byte_perm (w7[2], w7[1], selector); c1[1] = hc_byte_perm (w7[1], w7[0], selector); c1[0] = hc_byte_perm (w7[0], w6[3], selector); c0[3] = hc_byte_perm (w6[3], w6[2], selector); c0[2] = hc_byte_perm (w6[2], w6[1], selector); c0[1] = hc_byte_perm (w6[1], w6[0], selector); c0[0] = hc_byte_perm (w6[0], w5[3], selector); w7[3] = hc_byte_perm (w5[3], w5[2], selector); w7[2] = hc_byte_perm (w5[2], w5[1], selector); w7[1] = hc_byte_perm (w5[1], w5[0], selector); w7[0] = hc_byte_perm (w5[0], w4[3], selector); w6[3] = hc_byte_perm (w4[3], w4[2], selector); w6[2] = hc_byte_perm (w4[2], w4[1], selector); w6[1] = hc_byte_perm (w4[1], w4[0], selector); w6[0] = hc_byte_perm (w4[0], w3[3], selector); w5[3] = hc_byte_perm (w3[3], w3[2], selector); w5[2] = hc_byte_perm (w3[2], w3[1], selector); w5[1] = hc_byte_perm (w3[1], w3[0], selector); w5[0] = hc_byte_perm (w3[0], w2[3], selector); w4[3] = hc_byte_perm (w2[3], w2[2], selector); w4[2] = hc_byte_perm (w2[2], w2[1], selector); w4[1] = hc_byte_perm (w2[1], w2[0], selector); w4[0] = hc_byte_perm (w2[0], w1[3], selector); w3[3] = hc_byte_perm (w1[3], w1[2], selector); w3[2] = hc_byte_perm (w1[2], w1[1], selector); w3[1] = hc_byte_perm (w1[1], w1[0], selector); w3[0] = hc_byte_perm (w1[0], w0[3], selector); w2[3] = hc_byte_perm (w0[3], w0[2], selector); w2[2] = hc_byte_perm (w0[2], w0[1], selector); w2[1] = hc_byte_perm (w0[1], w0[0], selector); w2[0] = hc_byte_perm (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm ( 0, w7[3], selector); c2[0] = hc_byte_perm (w7[3], w7[2], selector); c1[3] = hc_byte_perm (w7[2], w7[1], selector); c1[2] = hc_byte_perm (w7[1], w7[0], selector); c1[1] = hc_byte_perm (w7[0], w6[3], selector); c1[0] = hc_byte_perm (w6[3], w6[2], selector); c0[3] = hc_byte_perm (w6[2], w6[1], selector); c0[2] = hc_byte_perm (w6[1], w6[0], selector); c0[1] = hc_byte_perm (w6[0], w5[3], selector); c0[0] = hc_byte_perm (w5[3], w5[2], selector); w7[3] = hc_byte_perm (w5[2], w5[1], selector); w7[2] = hc_byte_perm (w5[1], w5[0], selector); w7[1] = hc_byte_perm (w5[0], w4[3], selector); w7[0] = hc_byte_perm (w4[3], w4[2], selector); w6[3] = hc_byte_perm (w4[2], w4[1], selector); w6[2] = hc_byte_perm (w4[1], w4[0], selector); w6[1] = hc_byte_perm (w4[0], w3[3], selector); w6[0] = hc_byte_perm (w3[3], w3[2], selector); w5[3] = hc_byte_perm (w3[2], w3[1], selector); w5[2] = hc_byte_perm (w3[1], w3[0], selector); w5[1] = hc_byte_perm (w3[0], w2[3], selector); w5[0] = hc_byte_perm (w2[3], w2[2], selector); w4[3] = hc_byte_perm (w2[2], w2[1], selector); w4[2] = hc_byte_perm (w2[1], w2[0], selector); w4[1] = hc_byte_perm (w2[0], w1[3], selector); w4[0] = hc_byte_perm (w1[3], w1[2], selector); w3[3] = hc_byte_perm (w1[2], w1[1], selector); w3[2] = hc_byte_perm (w1[1], w1[0], selector); w3[1] = hc_byte_perm (w1[0], w0[3], selector); w3[0] = hc_byte_perm (w0[3], w0[2], selector); w2[3] = hc_byte_perm (w0[2], w0[1], selector); w2[2] = hc_byte_perm (w0[1], w0[0], selector); w2[1] = hc_byte_perm (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm ( 0, w7[3], selector); c2[1] = hc_byte_perm (w7[3], w7[2], selector); c2[0] = hc_byte_perm (w7[2], w7[1], selector); c1[3] = hc_byte_perm (w7[1], w7[0], selector); c1[2] = hc_byte_perm (w7[0], w6[3], selector); c1[1] = hc_byte_perm (w6[3], w6[2], selector); c1[0] = hc_byte_perm (w6[2], w6[1], selector); c0[3] = hc_byte_perm (w6[1], w6[0], selector); c0[2] = hc_byte_perm (w6[0], w5[3], selector); c0[1] = hc_byte_perm (w5[3], w5[2], selector); c0[0] = hc_byte_perm (w5[2], w5[1], selector); w7[3] = hc_byte_perm (w5[1], w5[0], selector); w7[2] = hc_byte_perm (w5[0], w4[3], selector); w7[1] = hc_byte_perm (w4[3], w4[2], selector); w7[0] = hc_byte_perm (w4[2], w4[1], selector); w6[3] = hc_byte_perm (w4[1], w4[0], selector); w6[2] = hc_byte_perm (w4[0], w3[3], selector); w6[1] = hc_byte_perm (w3[3], w3[2], selector); w6[0] = hc_byte_perm (w3[2], w3[1], selector); w5[3] = hc_byte_perm (w3[1], w3[0], selector); w5[2] = hc_byte_perm (w3[0], w2[3], selector); w5[1] = hc_byte_perm (w2[3], w2[2], selector); w5[0] = hc_byte_perm (w2[2], w2[1], selector); w4[3] = hc_byte_perm (w2[1], w2[0], selector); w4[2] = hc_byte_perm (w2[0], w1[3], selector); w4[1] = hc_byte_perm (w1[3], w1[2], selector); w4[0] = hc_byte_perm (w1[2], w1[1], selector); w3[3] = hc_byte_perm (w1[1], w1[0], selector); w3[2] = hc_byte_perm (w1[0], w0[3], selector); w3[1] = hc_byte_perm (w0[3], w0[2], selector); w3[0] = hc_byte_perm (w0[2], w0[1], selector); w2[3] = hc_byte_perm (w0[1], w0[0], selector); w2[2] = hc_byte_perm (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm ( 0, w7[3], selector); c2[2] = hc_byte_perm (w7[3], w7[2], selector); c2[1] = hc_byte_perm (w7[2], w7[1], selector); c2[0] = hc_byte_perm (w7[1], w7[0], selector); c1[3] = hc_byte_perm (w7[0], w6[3], selector); c1[2] = hc_byte_perm (w6[3], w6[2], selector); c1[1] = hc_byte_perm (w6[2], w6[1], selector); c1[0] = hc_byte_perm (w6[1], w6[0], selector); c0[3] = hc_byte_perm (w6[0], w5[3], selector); c0[2] = hc_byte_perm (w5[3], w5[2], selector); c0[1] = hc_byte_perm (w5[2], w5[1], selector); c0[0] = hc_byte_perm (w5[1], w5[0], selector); w7[3] = hc_byte_perm (w5[0], w4[3], selector); w7[2] = hc_byte_perm (w4[3], w4[2], selector); w7[1] = hc_byte_perm (w4[2], w4[1], selector); w7[0] = hc_byte_perm (w4[1], w4[0], selector); w6[3] = hc_byte_perm (w4[0], w3[3], selector); w6[2] = hc_byte_perm (w3[3], w3[2], selector); w6[1] = hc_byte_perm (w3[2], w3[1], selector); w6[0] = hc_byte_perm (w3[1], w3[0], selector); w5[3] = hc_byte_perm (w3[0], w2[3], selector); w5[2] = hc_byte_perm (w2[3], w2[2], selector); w5[1] = hc_byte_perm (w2[2], w2[1], selector); w5[0] = hc_byte_perm (w2[1], w2[0], selector); w4[3] = hc_byte_perm (w2[0], w1[3], selector); w4[2] = hc_byte_perm (w1[3], w1[2], selector); w4[1] = hc_byte_perm (w1[2], w1[1], selector); w4[0] = hc_byte_perm (w1[1], w1[0], selector); w3[3] = hc_byte_perm (w1[0], w0[3], selector); w3[2] = hc_byte_perm (w0[3], w0[2], selector); w3[1] = hc_byte_perm (w0[2], w0[1], selector); w3[0] = hc_byte_perm (w0[1], w0[0], selector); w2[3] = hc_byte_perm (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm ( 0, w7[3], selector); c2[3] = hc_byte_perm (w7[3], w7[2], selector); c2[2] = hc_byte_perm (w7[2], w7[1], selector); c2[1] = hc_byte_perm (w7[1], w7[0], selector); c2[0] = hc_byte_perm (w7[0], w6[3], selector); c1[3] = hc_byte_perm (w6[3], w6[2], selector); c1[2] = hc_byte_perm (w6[2], w6[1], selector); c1[1] = hc_byte_perm (w6[1], w6[0], selector); c1[0] = hc_byte_perm (w6[0], w5[3], selector); c0[3] = hc_byte_perm (w5[3], w5[2], selector); c0[2] = hc_byte_perm (w5[2], w5[1], selector); c0[1] = hc_byte_perm (w5[1], w5[0], selector); c0[0] = hc_byte_perm (w5[0], w4[3], selector); w7[3] = hc_byte_perm (w4[3], w4[2], selector); w7[2] = hc_byte_perm (w4[2], w4[1], selector); w7[1] = hc_byte_perm (w4[1], w4[0], selector); w7[0] = hc_byte_perm (w4[0], w3[3], selector); w6[3] = hc_byte_perm (w3[3], w3[2], selector); w6[2] = hc_byte_perm (w3[2], w3[1], selector); w6[1] = hc_byte_perm (w3[1], w3[0], selector); w6[0] = hc_byte_perm (w3[0], w2[3], selector); w5[3] = hc_byte_perm (w2[3], w2[2], selector); w5[2] = hc_byte_perm (w2[2], w2[1], selector); w5[1] = hc_byte_perm (w2[1], w2[0], selector); w5[0] = hc_byte_perm (w2[0], w1[3], selector); w4[3] = hc_byte_perm (w1[3], w1[2], selector); w4[2] = hc_byte_perm (w1[2], w1[1], selector); w4[1] = hc_byte_perm (w1[1], w1[0], selector); w4[0] = hc_byte_perm (w1[0], w0[3], selector); w3[3] = hc_byte_perm (w0[3], w0[2], selector); w3[2] = hc_byte_perm (w0[2], w0[1], selector); w3[1] = hc_byte_perm (w0[1], w0[0], selector); w3[0] = hc_byte_perm (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm ( 0, w7[3], selector); c3[0] = hc_byte_perm (w7[3], w7[2], selector); c2[3] = hc_byte_perm (w7[2], w7[1], selector); c2[2] = hc_byte_perm (w7[1], w7[0], selector); c2[1] = hc_byte_perm (w7[0], w6[3], selector); c2[0] = hc_byte_perm (w6[3], w6[2], selector); c1[3] = hc_byte_perm (w6[2], w6[1], selector); c1[2] = hc_byte_perm (w6[1], w6[0], selector); c1[1] = hc_byte_perm (w6[0], w5[3], selector); c1[0] = hc_byte_perm (w5[3], w5[2], selector); c0[3] = hc_byte_perm (w5[2], w5[1], selector); c0[2] = hc_byte_perm (w5[1], w5[0], selector); c0[1] = hc_byte_perm (w5[0], w4[3], selector); c0[0] = hc_byte_perm (w4[3], w4[2], selector); w7[3] = hc_byte_perm (w4[2], w4[1], selector); w7[2] = hc_byte_perm (w4[1], w4[0], selector); w7[1] = hc_byte_perm (w4[0], w3[3], selector); w7[0] = hc_byte_perm (w3[3], w3[2], selector); w6[3] = hc_byte_perm (w3[2], w3[1], selector); w6[2] = hc_byte_perm (w3[1], w3[0], selector); w6[1] = hc_byte_perm (w3[0], w2[3], selector); w6[0] = hc_byte_perm (w2[3], w2[2], selector); w5[3] = hc_byte_perm (w2[2], w2[1], selector); w5[2] = hc_byte_perm (w2[1], w2[0], selector); w5[1] = hc_byte_perm (w2[0], w1[3], selector); w5[0] = hc_byte_perm (w1[3], w1[2], selector); w4[3] = hc_byte_perm (w1[2], w1[1], selector); w4[2] = hc_byte_perm (w1[1], w1[0], selector); w4[1] = hc_byte_perm (w1[0], w0[3], selector); w4[0] = hc_byte_perm (w0[3], w0[2], selector); w3[3] = hc_byte_perm (w0[2], w0[1], selector); w3[2] = hc_byte_perm (w0[1], w0[0], selector); w3[1] = hc_byte_perm (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm ( 0, w7[3], selector); c3[1] = hc_byte_perm (w7[3], w7[2], selector); c3[0] = hc_byte_perm (w7[2], w7[1], selector); c2[3] = hc_byte_perm (w7[1], w7[0], selector); c2[2] = hc_byte_perm (w7[0], w6[3], selector); c2[1] = hc_byte_perm (w6[3], w6[2], selector); c2[0] = hc_byte_perm (w6[2], w6[1], selector); c1[3] = hc_byte_perm (w6[1], w6[0], selector); c1[2] = hc_byte_perm (w6[0], w5[3], selector); c1[1] = hc_byte_perm (w5[3], w5[2], selector); c1[0] = hc_byte_perm (w5[2], w5[1], selector); c0[3] = hc_byte_perm (w5[1], w5[0], selector); c0[2] = hc_byte_perm (w5[0], w4[3], selector); c0[1] = hc_byte_perm (w4[3], w4[2], selector); c0[0] = hc_byte_perm (w4[2], w4[1], selector); w7[3] = hc_byte_perm (w4[1], w4[0], selector); w7[2] = hc_byte_perm (w4[0], w3[3], selector); w7[1] = hc_byte_perm (w3[3], w3[2], selector); w7[0] = hc_byte_perm (w3[2], w3[1], selector); w6[3] = hc_byte_perm (w3[1], w3[0], selector); w6[2] = hc_byte_perm (w3[0], w2[3], selector); w6[1] = hc_byte_perm (w2[3], w2[2], selector); w6[0] = hc_byte_perm (w2[2], w2[1], selector); w5[3] = hc_byte_perm (w2[1], w2[0], selector); w5[2] = hc_byte_perm (w2[0], w1[3], selector); w5[1] = hc_byte_perm (w1[3], w1[2], selector); w5[0] = hc_byte_perm (w1[2], w1[1], selector); w4[3] = hc_byte_perm (w1[1], w1[0], selector); w4[2] = hc_byte_perm (w1[0], w0[3], selector); w4[1] = hc_byte_perm (w0[3], w0[2], selector); w4[0] = hc_byte_perm (w0[2], w0[1], selector); w3[3] = hc_byte_perm (w0[1], w0[0], selector); w3[2] = hc_byte_perm (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm ( 0, w7[3], selector); c3[2] = hc_byte_perm (w7[3], w7[2], selector); c3[1] = hc_byte_perm (w7[2], w7[1], selector); c3[0] = hc_byte_perm (w7[1], w7[0], selector); c2[3] = hc_byte_perm (w7[0], w6[3], selector); c2[2] = hc_byte_perm (w6[3], w6[2], selector); c2[1] = hc_byte_perm (w6[2], w6[1], selector); c2[0] = hc_byte_perm (w6[1], w6[0], selector); c1[3] = hc_byte_perm (w6[0], w5[3], selector); c1[2] = hc_byte_perm (w5[3], w5[2], selector); c1[1] = hc_byte_perm (w5[2], w5[1], selector); c1[0] = hc_byte_perm (w5[1], w5[0], selector); c0[3] = hc_byte_perm (w5[0], w4[3], selector); c0[2] = hc_byte_perm (w4[3], w4[2], selector); c0[1] = hc_byte_perm (w4[2], w4[1], selector); c0[0] = hc_byte_perm (w4[1], w4[0], selector); w7[3] = hc_byte_perm (w4[0], w3[3], selector); w7[2] = hc_byte_perm (w3[3], w3[2], selector); w7[1] = hc_byte_perm (w3[2], w3[1], selector); w7[0] = hc_byte_perm (w3[1], w3[0], selector); w6[3] = hc_byte_perm (w3[0], w2[3], selector); w6[2] = hc_byte_perm (w2[3], w2[2], selector); w6[1] = hc_byte_perm (w2[2], w2[1], selector); w6[0] = hc_byte_perm (w2[1], w2[0], selector); w5[3] = hc_byte_perm (w2[0], w1[3], selector); w5[2] = hc_byte_perm (w1[3], w1[2], selector); w5[1] = hc_byte_perm (w1[2], w1[1], selector); w5[0] = hc_byte_perm (w1[1], w1[0], selector); w4[3] = hc_byte_perm (w1[0], w0[3], selector); w4[2] = hc_byte_perm (w0[3], w0[2], selector); w4[1] = hc_byte_perm (w0[2], w0[1], selector); w4[0] = hc_byte_perm (w0[1], w0[0], selector); w3[3] = hc_byte_perm (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_byte_perm ( 0, w7[3], selector); c3[3] = hc_byte_perm (w7[3], w7[2], selector); c3[2] = hc_byte_perm (w7[2], w7[1], selector); c3[1] = hc_byte_perm (w7[1], w7[0], selector); c3[0] = hc_byte_perm (w7[0], w6[3], selector); c2[3] = hc_byte_perm (w6[3], w6[2], selector); c2[2] = hc_byte_perm (w6[2], w6[1], selector); c2[1] = hc_byte_perm (w6[1], w6[0], selector); c2[0] = hc_byte_perm (w6[0], w5[3], selector); c1[3] = hc_byte_perm (w5[3], w5[2], selector); c1[2] = hc_byte_perm (w5[2], w5[1], selector); c1[1] = hc_byte_perm (w5[1], w5[0], selector); c1[0] = hc_byte_perm (w5[0], w4[3], selector); c0[3] = hc_byte_perm (w4[3], w4[2], selector); c0[2] = hc_byte_perm (w4[2], w4[1], selector); c0[1] = hc_byte_perm (w4[1], w4[0], selector); c0[0] = hc_byte_perm (w4[0], w3[3], selector); w7[3] = hc_byte_perm (w3[3], w3[2], selector); w7[2] = hc_byte_perm (w3[2], w3[1], selector); w7[1] = hc_byte_perm (w3[1], w3[0], selector); w7[0] = hc_byte_perm (w3[0], w2[3], selector); w6[3] = hc_byte_perm (w2[3], w2[2], selector); w6[2] = hc_byte_perm (w2[2], w2[1], selector); w6[1] = hc_byte_perm (w2[1], w2[0], selector); w6[0] = hc_byte_perm (w2[0], w1[3], selector); w5[3] = hc_byte_perm (w1[3], w1[2], selector); w5[2] = hc_byte_perm (w1[2], w1[1], selector); w5[1] = hc_byte_perm (w1[1], w1[0], selector); w5[0] = hc_byte_perm (w1[0], w0[3], selector); w4[3] = hc_byte_perm (w0[3], w0[2], selector); w4[2] = hc_byte_perm (w0[2], w0[1], selector); w4[1] = hc_byte_perm (w0[1], w0[0], selector); w4[0] = hc_byte_perm (w0[0], 0, selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_byte_perm ( 0, w7[3], selector); c4[0] = hc_byte_perm (w7[3], w7[2], selector); c3[3] = hc_byte_perm (w7[2], w7[1], selector); c3[2] = hc_byte_perm (w7[1], w7[0], selector); c3[1] = hc_byte_perm (w7[0], w6[3], selector); c3[0] = hc_byte_perm (w6[3], w6[2], selector); c2[3] = hc_byte_perm (w6[2], w6[1], selector); c2[2] = hc_byte_perm (w6[1], w6[0], selector); c2[1] = hc_byte_perm (w6[0], w5[3], selector); c2[0] = hc_byte_perm (w5[3], w5[2], selector); c1[3] = hc_byte_perm (w5[2], w5[1], selector); c1[2] = hc_byte_perm (w5[1], w5[0], selector); c1[1] = hc_byte_perm (w5[0], w4[3], selector); c1[0] = hc_byte_perm (w4[3], w4[2], selector); c0[3] = hc_byte_perm (w4[2], w4[1], selector); c0[2] = hc_byte_perm (w4[1], w4[0], selector); c0[1] = hc_byte_perm (w4[0], w3[3], selector); c0[0] = hc_byte_perm (w3[3], w3[2], selector); w7[3] = hc_byte_perm (w3[2], w3[1], selector); w7[2] = hc_byte_perm (w3[1], w3[0], selector); w7[1] = hc_byte_perm (w3[0], w2[3], selector); w7[0] = hc_byte_perm (w2[3], w2[2], selector); w6[3] = hc_byte_perm (w2[2], w2[1], selector); w6[2] = hc_byte_perm (w2[1], w2[0], selector); w6[1] = hc_byte_perm (w2[0], w1[3], selector); w6[0] = hc_byte_perm (w1[3], w1[2], selector); w5[3] = hc_byte_perm (w1[2], w1[1], selector); w5[2] = hc_byte_perm (w1[1], w1[0], selector); w5[1] = hc_byte_perm (w1[0], w0[3], selector); w5[0] = hc_byte_perm (w0[3], w0[2], selector); w4[3] = hc_byte_perm (w0[2], w0[1], selector); w4[2] = hc_byte_perm (w0[1], w0[0], selector); w4[1] = hc_byte_perm (w0[0], 0, selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_byte_perm ( 0, w7[3], selector); c4[1] = hc_byte_perm (w7[3], w7[2], selector); c4[0] = hc_byte_perm (w7[2], w7[1], selector); c3[3] = hc_byte_perm (w7[1], w7[0], selector); c3[2] = hc_byte_perm (w7[0], w6[3], selector); c3[1] = hc_byte_perm (w6[3], w6[2], selector); c3[0] = hc_byte_perm (w6[2], w6[1], selector); c2[3] = hc_byte_perm (w6[1], w6[0], selector); c2[2] = hc_byte_perm (w6[0], w5[3], selector); c2[1] = hc_byte_perm (w5[3], w5[2], selector); c2[0] = hc_byte_perm (w5[2], w5[1], selector); c1[3] = hc_byte_perm (w5[1], w5[0], selector); c1[2] = hc_byte_perm (w5[0], w4[3], selector); c1[1] = hc_byte_perm (w4[3], w4[2], selector); c1[0] = hc_byte_perm (w4[2], w4[1], selector); c0[3] = hc_byte_perm (w4[1], w4[0], selector); c0[2] = hc_byte_perm (w4[0], w3[3], selector); c0[1] = hc_byte_perm (w3[3], w3[2], selector); c0[0] = hc_byte_perm (w3[2], w3[1], selector); w7[3] = hc_byte_perm (w3[1], w3[0], selector); w7[2] = hc_byte_perm (w3[0], w2[3], selector); w7[1] = hc_byte_perm (w2[3], w2[2], selector); w7[0] = hc_byte_perm (w2[2], w2[1], selector); w6[3] = hc_byte_perm (w2[1], w2[0], selector); w6[2] = hc_byte_perm (w2[0], w1[3], selector); w6[1] = hc_byte_perm (w1[3], w1[2], selector); w6[0] = hc_byte_perm (w1[2], w1[1], selector); w5[3] = hc_byte_perm (w1[1], w1[0], selector); w5[2] = hc_byte_perm (w1[0], w0[3], selector); w5[1] = hc_byte_perm (w0[3], w0[2], selector); w5[0] = hc_byte_perm (w0[2], w0[1], selector); w4[3] = hc_byte_perm (w0[1], w0[0], selector); w4[2] = hc_byte_perm (w0[0], 0, selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_byte_perm ( 0, w7[3], selector); c4[2] = hc_byte_perm (w7[3], w7[2], selector); c4[1] = hc_byte_perm (w7[2], w7[1], selector); c4[0] = hc_byte_perm (w7[1], w7[0], selector); c3[3] = hc_byte_perm (w7[0], w6[3], selector); c3[2] = hc_byte_perm (w6[3], w6[2], selector); c3[1] = hc_byte_perm (w6[2], w6[1], selector); c3[0] = hc_byte_perm (w6[1], w6[0], selector); c2[3] = hc_byte_perm (w6[0], w5[3], selector); c2[2] = hc_byte_perm (w5[3], w5[2], selector); c2[1] = hc_byte_perm (w5[2], w5[1], selector); c2[0] = hc_byte_perm (w5[1], w5[0], selector); c1[3] = hc_byte_perm (w5[0], w4[3], selector); c1[2] = hc_byte_perm (w4[3], w4[2], selector); c1[1] = hc_byte_perm (w4[2], w4[1], selector); c1[0] = hc_byte_perm (w4[1], w4[0], selector); c0[3] = hc_byte_perm (w4[0], w3[3], selector); c0[2] = hc_byte_perm (w3[3], w3[2], selector); c0[1] = hc_byte_perm (w3[2], w3[1], selector); c0[0] = hc_byte_perm (w3[1], w3[0], selector); w7[3] = hc_byte_perm (w3[0], w2[3], selector); w7[2] = hc_byte_perm (w2[3], w2[2], selector); w7[1] = hc_byte_perm (w2[2], w2[1], selector); w7[0] = hc_byte_perm (w2[1], w2[0], selector); w6[3] = hc_byte_perm (w2[0], w1[3], selector); w6[2] = hc_byte_perm (w1[3], w1[2], selector); w6[1] = hc_byte_perm (w1[2], w1[1], selector); w6[0] = hc_byte_perm (w1[1], w1[0], selector); w5[3] = hc_byte_perm (w1[0], w0[3], selector); w5[2] = hc_byte_perm (w0[3], w0[2], selector); w5[1] = hc_byte_perm (w0[2], w0[1], selector); w5[0] = hc_byte_perm (w0[1], w0[0], selector); w4[3] = hc_byte_perm (w0[0], 0, selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_byte_perm ( 0, w7[3], selector); c4[3] = hc_byte_perm (w7[3], w7[2], selector); c4[2] = hc_byte_perm (w7[2], w7[1], selector); c4[1] = hc_byte_perm (w7[1], w7[0], selector); c4[0] = hc_byte_perm (w7[0], w6[3], selector); c3[3] = hc_byte_perm (w6[3], w6[2], selector); c3[2] = hc_byte_perm (w6[2], w6[1], selector); c3[1] = hc_byte_perm (w6[1], w6[0], selector); c3[0] = hc_byte_perm (w6[0], w5[3], selector); c2[3] = hc_byte_perm (w5[3], w5[2], selector); c2[2] = hc_byte_perm (w5[2], w5[1], selector); c2[1] = hc_byte_perm (w5[1], w5[0], selector); c2[0] = hc_byte_perm (w5[0], w4[3], selector); c1[3] = hc_byte_perm (w4[3], w4[2], selector); c1[2] = hc_byte_perm (w4[2], w4[1], selector); c1[1] = hc_byte_perm (w4[1], w4[0], selector); c1[0] = hc_byte_perm (w4[0], w3[3], selector); c0[3] = hc_byte_perm (w3[3], w3[2], selector); c0[2] = hc_byte_perm (w3[2], w3[1], selector); c0[1] = hc_byte_perm (w3[1], w3[0], selector); c0[0] = hc_byte_perm (w3[0], w2[3], selector); w7[3] = hc_byte_perm (w2[3], w2[2], selector); w7[2] = hc_byte_perm (w2[2], w2[1], selector); w7[1] = hc_byte_perm (w2[1], w2[0], selector); w7[0] = hc_byte_perm (w2[0], w1[3], selector); w6[3] = hc_byte_perm (w1[3], w1[2], selector); w6[2] = hc_byte_perm (w1[2], w1[1], selector); w6[1] = hc_byte_perm (w1[1], w1[0], selector); w6[0] = hc_byte_perm (w1[0], w0[3], selector); w5[3] = hc_byte_perm (w0[3], w0[2], selector); w5[2] = hc_byte_perm (w0[2], w0[1], selector); w5[1] = hc_byte_perm (w0[1], w0[0], selector); w5[0] = hc_byte_perm (w0[0], 0, selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_byte_perm ( 0, w7[3], selector); c5[0] = hc_byte_perm (w7[3], w7[2], selector); c4[3] = hc_byte_perm (w7[2], w7[1], selector); c4[2] = hc_byte_perm (w7[1], w7[0], selector); c4[1] = hc_byte_perm (w7[0], w6[3], selector); c4[0] = hc_byte_perm (w6[3], w6[2], selector); c3[3] = hc_byte_perm (w6[2], w6[1], selector); c3[2] = hc_byte_perm (w6[1], w6[0], selector); c3[1] = hc_byte_perm (w6[0], w5[3], selector); c3[0] = hc_byte_perm (w5[3], w5[2], selector); c2[3] = hc_byte_perm (w5[2], w5[1], selector); c2[2] = hc_byte_perm (w5[1], w5[0], selector); c2[1] = hc_byte_perm (w5[0], w4[3], selector); c2[0] = hc_byte_perm (w4[3], w4[2], selector); c1[3] = hc_byte_perm (w4[2], w4[1], selector); c1[2] = hc_byte_perm (w4[1], w4[0], selector); c1[1] = hc_byte_perm (w4[0], w3[3], selector); c1[0] = hc_byte_perm (w3[3], w3[2], selector); c0[3] = hc_byte_perm (w3[2], w3[1], selector); c0[2] = hc_byte_perm (w3[1], w3[0], selector); c0[1] = hc_byte_perm (w3[0], w2[3], selector); c0[0] = hc_byte_perm (w2[3], w2[2], selector); w7[3] = hc_byte_perm (w2[2], w2[1], selector); w7[2] = hc_byte_perm (w2[1], w2[0], selector); w7[1] = hc_byte_perm (w2[0], w1[3], selector); w7[0] = hc_byte_perm (w1[3], w1[2], selector); w6[3] = hc_byte_perm (w1[2], w1[1], selector); w6[2] = hc_byte_perm (w1[1], w1[0], selector); w6[1] = hc_byte_perm (w1[0], w0[3], selector); w6[0] = hc_byte_perm (w0[3], w0[2], selector); w5[3] = hc_byte_perm (w0[2], w0[1], selector); w5[2] = hc_byte_perm (w0[1], w0[0], selector); w5[1] = hc_byte_perm (w0[0], 0, selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_byte_perm ( 0, w7[3], selector); c5[1] = hc_byte_perm (w7[3], w7[2], selector); c5[0] = hc_byte_perm (w7[2], w7[1], selector); c4[3] = hc_byte_perm (w7[1], w7[0], selector); c4[2] = hc_byte_perm (w7[0], w6[3], selector); c4[1] = hc_byte_perm (w6[3], w6[2], selector); c4[0] = hc_byte_perm (w6[2], w6[1], selector); c3[3] = hc_byte_perm (w6[1], w6[0], selector); c3[2] = hc_byte_perm (w6[0], w5[3], selector); c3[1] = hc_byte_perm (w5[3], w5[2], selector); c3[0] = hc_byte_perm (w5[2], w5[1], selector); c2[3] = hc_byte_perm (w5[1], w5[0], selector); c2[2] = hc_byte_perm (w5[0], w4[3], selector); c2[1] = hc_byte_perm (w4[3], w4[2], selector); c2[0] = hc_byte_perm (w4[2], w4[1], selector); c1[3] = hc_byte_perm (w4[1], w4[0], selector); c1[2] = hc_byte_perm (w4[0], w3[3], selector); c1[1] = hc_byte_perm (w3[3], w3[2], selector); c1[0] = hc_byte_perm (w3[2], w3[1], selector); c0[3] = hc_byte_perm (w3[1], w3[0], selector); c0[2] = hc_byte_perm (w3[0], w2[3], selector); c0[1] = hc_byte_perm (w2[3], w2[2], selector); c0[0] = hc_byte_perm (w2[2], w2[1], selector); w7[3] = hc_byte_perm (w2[1], w2[0], selector); w7[2] = hc_byte_perm (w2[0], w1[3], selector); w7[1] = hc_byte_perm (w1[3], w1[2], selector); w7[0] = hc_byte_perm (w1[2], w1[1], selector); w6[3] = hc_byte_perm (w1[1], w1[0], selector); w6[2] = hc_byte_perm (w1[0], w0[3], selector); w6[1] = hc_byte_perm (w0[3], w0[2], selector); w6[0] = hc_byte_perm (w0[2], w0[1], selector); w5[3] = hc_byte_perm (w0[1], w0[0], selector); w5[2] = hc_byte_perm (w0[0], 0, selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_byte_perm ( 0, w7[3], selector); c5[2] = hc_byte_perm (w7[3], w7[2], selector); c5[1] = hc_byte_perm (w7[2], w7[1], selector); c5[0] = hc_byte_perm (w7[1], w7[0], selector); c4[3] = hc_byte_perm (w7[0], w6[3], selector); c4[2] = hc_byte_perm (w6[3], w6[2], selector); c4[1] = hc_byte_perm (w6[2], w6[1], selector); c4[0] = hc_byte_perm (w6[1], w6[0], selector); c3[3] = hc_byte_perm (w6[0], w5[3], selector); c3[2] = hc_byte_perm (w5[3], w5[2], selector); c3[1] = hc_byte_perm (w5[2], w5[1], selector); c3[0] = hc_byte_perm (w5[1], w5[0], selector); c2[3] = hc_byte_perm (w5[0], w4[3], selector); c2[2] = hc_byte_perm (w4[3], w4[2], selector); c2[1] = hc_byte_perm (w4[2], w4[1], selector); c2[0] = hc_byte_perm (w4[1], w4[0], selector); c1[3] = hc_byte_perm (w4[0], w3[3], selector); c1[2] = hc_byte_perm (w3[3], w3[2], selector); c1[1] = hc_byte_perm (w3[2], w3[1], selector); c1[0] = hc_byte_perm (w3[1], w3[0], selector); c0[3] = hc_byte_perm (w3[0], w2[3], selector); c0[2] = hc_byte_perm (w2[3], w2[2], selector); c0[1] = hc_byte_perm (w2[2], w2[1], selector); c0[0] = hc_byte_perm (w2[1], w2[0], selector); w7[3] = hc_byte_perm (w2[0], w1[3], selector); w7[2] = hc_byte_perm (w1[3], w1[2], selector); w7[1] = hc_byte_perm (w1[2], w1[1], selector); w7[0] = hc_byte_perm (w1[1], w1[0], selector); w6[3] = hc_byte_perm (w1[0], w0[3], selector); w6[2] = hc_byte_perm (w0[3], w0[2], selector); w6[1] = hc_byte_perm (w0[2], w0[1], selector); w6[0] = hc_byte_perm (w0[1], w0[0], selector); w5[3] = hc_byte_perm (w0[0], 0, selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_byte_perm ( 0, w7[3], selector); c5[3] = hc_byte_perm (w7[3], w7[2], selector); c5[2] = hc_byte_perm (w7[2], w7[1], selector); c5[1] = hc_byte_perm (w7[1], w7[0], selector); c5[0] = hc_byte_perm (w7[0], w6[3], selector); c4[3] = hc_byte_perm (w6[3], w6[2], selector); c4[2] = hc_byte_perm (w6[2], w6[1], selector); c4[1] = hc_byte_perm (w6[1], w6[0], selector); c4[0] = hc_byte_perm (w6[0], w5[3], selector); c3[3] = hc_byte_perm (w5[3], w5[2], selector); c3[2] = hc_byte_perm (w5[2], w5[1], selector); c3[1] = hc_byte_perm (w5[1], w5[0], selector); c3[0] = hc_byte_perm (w5[0], w4[3], selector); c2[3] = hc_byte_perm (w4[3], w4[2], selector); c2[2] = hc_byte_perm (w4[2], w4[1], selector); c2[1] = hc_byte_perm (w4[1], w4[0], selector); c2[0] = hc_byte_perm (w4[0], w3[3], selector); c1[3] = hc_byte_perm (w3[3], w3[2], selector); c1[2] = hc_byte_perm (w3[2], w3[1], selector); c1[1] = hc_byte_perm (w3[1], w3[0], selector); c1[0] = hc_byte_perm (w3[0], w2[3], selector); c0[3] = hc_byte_perm (w2[3], w2[2], selector); c0[2] = hc_byte_perm (w2[2], w2[1], selector); c0[1] = hc_byte_perm (w2[1], w2[0], selector); c0[0] = hc_byte_perm (w2[0], w1[3], selector); w7[3] = hc_byte_perm (w1[3], w1[2], selector); w7[2] = hc_byte_perm (w1[2], w1[1], selector); w7[1] = hc_byte_perm (w1[1], w1[0], selector); w7[0] = hc_byte_perm (w1[0], w0[3], selector); w6[3] = hc_byte_perm (w0[3], w0[2], selector); w6[2] = hc_byte_perm (w0[2], w0[1], selector); w6[1] = hc_byte_perm (w0[1], w0[0], selector); w6[0] = hc_byte_perm (w0[0], 0, selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_byte_perm ( 0, w7[3], selector); c6[0] = hc_byte_perm (w7[3], w7[2], selector); c5[3] = hc_byte_perm (w7[2], w7[1], selector); c5[2] = hc_byte_perm (w7[1], w7[0], selector); c5[1] = hc_byte_perm (w7[0], w6[3], selector); c5[0] = hc_byte_perm (w6[3], w6[2], selector); c4[3] = hc_byte_perm (w6[2], w6[1], selector); c4[2] = hc_byte_perm (w6[1], w6[0], selector); c4[1] = hc_byte_perm (w6[0], w5[3], selector); c4[0] = hc_byte_perm (w5[3], w5[2], selector); c3[3] = hc_byte_perm (w5[2], w5[1], selector); c3[2] = hc_byte_perm (w5[1], w5[0], selector); c3[1] = hc_byte_perm (w5[0], w4[3], selector); c3[0] = hc_byte_perm (w4[3], w4[2], selector); c2[3] = hc_byte_perm (w4[2], w4[1], selector); c2[2] = hc_byte_perm (w4[1], w4[0], selector); c2[1] = hc_byte_perm (w4[0], w3[3], selector); c2[0] = hc_byte_perm (w3[3], w3[2], selector); c1[3] = hc_byte_perm (w3[2], w3[1], selector); c1[2] = hc_byte_perm (w3[1], w3[0], selector); c1[1] = hc_byte_perm (w3[0], w2[3], selector); c1[0] = hc_byte_perm (w2[3], w2[2], selector); c0[3] = hc_byte_perm (w2[2], w2[1], selector); c0[2] = hc_byte_perm (w2[1], w2[0], selector); c0[1] = hc_byte_perm (w2[0], w1[3], selector); c0[0] = hc_byte_perm (w1[3], w1[2], selector); w7[3] = hc_byte_perm (w1[2], w1[1], selector); w7[2] = hc_byte_perm (w1[1], w1[0], selector); w7[1] = hc_byte_perm (w1[0], w0[3], selector); w7[0] = hc_byte_perm (w0[3], w0[2], selector); w6[3] = hc_byte_perm (w0[2], w0[1], selector); w6[2] = hc_byte_perm (w0[1], w0[0], selector); w6[1] = hc_byte_perm (w0[0], 0, selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_byte_perm ( 0, w7[3], selector); c6[1] = hc_byte_perm (w7[3], w7[2], selector); c6[0] = hc_byte_perm (w7[2], w7[1], selector); c5[3] = hc_byte_perm (w7[1], w7[0], selector); c5[2] = hc_byte_perm (w7[0], w6[3], selector); c5[1] = hc_byte_perm (w6[3], w6[2], selector); c5[0] = hc_byte_perm (w6[2], w6[1], selector); c4[3] = hc_byte_perm (w6[1], w6[0], selector); c4[2] = hc_byte_perm (w6[0], w5[3], selector); c4[1] = hc_byte_perm (w5[3], w5[2], selector); c4[0] = hc_byte_perm (w5[2], w5[1], selector); c3[3] = hc_byte_perm (w5[1], w5[0], selector); c3[2] = hc_byte_perm (w5[0], w4[3], selector); c3[1] = hc_byte_perm (w4[3], w4[2], selector); c3[0] = hc_byte_perm (w4[2], w4[1], selector); c2[3] = hc_byte_perm (w4[1], w4[0], selector); c2[2] = hc_byte_perm (w4[0], w3[3], selector); c2[1] = hc_byte_perm (w3[3], w3[2], selector); c2[0] = hc_byte_perm (w3[2], w3[1], selector); c1[3] = hc_byte_perm (w3[1], w3[0], selector); c1[2] = hc_byte_perm (w3[0], w2[3], selector); c1[1] = hc_byte_perm (w2[3], w2[2], selector); c1[0] = hc_byte_perm (w2[2], w2[1], selector); c0[3] = hc_byte_perm (w2[1], w2[0], selector); c0[2] = hc_byte_perm (w2[0], w1[3], selector); c0[1] = hc_byte_perm (w1[3], w1[2], selector); c0[0] = hc_byte_perm (w1[2], w1[1], selector); w7[3] = hc_byte_perm (w1[1], w1[0], selector); w7[2] = hc_byte_perm (w1[0], w0[3], selector); w7[1] = hc_byte_perm (w0[3], w0[2], selector); w7[0] = hc_byte_perm (w0[2], w0[1], selector); w6[3] = hc_byte_perm (w0[1], w0[0], selector); w6[2] = hc_byte_perm (w0[0], 0, selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_byte_perm ( 0, w7[3], selector); c6[2] = hc_byte_perm (w7[3], w7[2], selector); c6[1] = hc_byte_perm (w7[2], w7[1], selector); c6[0] = hc_byte_perm (w7[1], w7[0], selector); c5[3] = hc_byte_perm (w7[0], w6[3], selector); c5[2] = hc_byte_perm (w6[3], w6[2], selector); c5[1] = hc_byte_perm (w6[2], w6[1], selector); c5[0] = hc_byte_perm (w6[1], w6[0], selector); c4[3] = hc_byte_perm (w6[0], w5[3], selector); c4[2] = hc_byte_perm (w5[3], w5[2], selector); c4[1] = hc_byte_perm (w5[2], w5[1], selector); c4[0] = hc_byte_perm (w5[1], w5[0], selector); c3[3] = hc_byte_perm (w5[0], w4[3], selector); c3[2] = hc_byte_perm (w4[3], w4[2], selector); c3[1] = hc_byte_perm (w4[2], w4[1], selector); c3[0] = hc_byte_perm (w4[1], w4[0], selector); c2[3] = hc_byte_perm (w4[0], w3[3], selector); c2[2] = hc_byte_perm (w3[3], w3[2], selector); c2[1] = hc_byte_perm (w3[2], w3[1], selector); c2[0] = hc_byte_perm (w3[1], w3[0], selector); c1[3] = hc_byte_perm (w3[0], w2[3], selector); c1[2] = hc_byte_perm (w2[3], w2[2], selector); c1[1] = hc_byte_perm (w2[2], w2[1], selector); c1[0] = hc_byte_perm (w2[1], w2[0], selector); c0[3] = hc_byte_perm (w2[0], w1[3], selector); c0[2] = hc_byte_perm (w1[3], w1[2], selector); c0[1] = hc_byte_perm (w1[2], w1[1], selector); c0[0] = hc_byte_perm (w1[1], w1[0], selector); w7[3] = hc_byte_perm (w1[0], w0[3], selector); w7[2] = hc_byte_perm (w0[3], w0[2], selector); w7[1] = hc_byte_perm (w0[2], w0[1], selector); w7[0] = hc_byte_perm (w0[1], w0[0], selector); w6[3] = hc_byte_perm (w0[0], 0, selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_byte_perm ( 0, w7[3], selector); c6[3] = hc_byte_perm (w7[3], w7[2], selector); c6[2] = hc_byte_perm (w7[2], w7[1], selector); c6[1] = hc_byte_perm (w7[1], w7[0], selector); c6[0] = hc_byte_perm (w7[0], w6[3], selector); c5[3] = hc_byte_perm (w6[3], w6[2], selector); c5[2] = hc_byte_perm (w6[2], w6[1], selector); c5[1] = hc_byte_perm (w6[1], w6[0], selector); c5[0] = hc_byte_perm (w6[0], w5[3], selector); c4[3] = hc_byte_perm (w5[3], w5[2], selector); c4[2] = hc_byte_perm (w5[2], w5[1], selector); c4[1] = hc_byte_perm (w5[1], w5[0], selector); c4[0] = hc_byte_perm (w5[0], w4[3], selector); c3[3] = hc_byte_perm (w4[3], w4[2], selector); c3[2] = hc_byte_perm (w4[2], w4[1], selector); c3[1] = hc_byte_perm (w4[1], w4[0], selector); c3[0] = hc_byte_perm (w4[0], w3[3], selector); c2[3] = hc_byte_perm (w3[3], w3[2], selector); c2[2] = hc_byte_perm (w3[2], w3[1], selector); c2[1] = hc_byte_perm (w3[1], w3[0], selector); c2[0] = hc_byte_perm (w3[0], w2[3], selector); c1[3] = hc_byte_perm (w2[3], w2[2], selector); c1[2] = hc_byte_perm (w2[2], w2[1], selector); c1[1] = hc_byte_perm (w2[1], w2[0], selector); c1[0] = hc_byte_perm (w2[0], w1[3], selector); c0[3] = hc_byte_perm (w1[3], w1[2], selector); c0[2] = hc_byte_perm (w1[2], w1[1], selector); c0[1] = hc_byte_perm (w1[1], w1[0], selector); c0[0] = hc_byte_perm (w1[0], w0[3], selector); w7[3] = hc_byte_perm (w0[3], w0[2], selector); w7[2] = hc_byte_perm (w0[2], w0[1], selector); w7[1] = hc_byte_perm (w0[1], w0[0], selector); w7[0] = hc_byte_perm (w0[0], 0, selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_byte_perm ( 0, w7[3], selector); c7[0] = hc_byte_perm (w7[3], w7[2], selector); c6[3] = hc_byte_perm (w7[2], w7[1], selector); c6[2] = hc_byte_perm (w7[1], w7[0], selector); c6[1] = hc_byte_perm (w7[0], w6[3], selector); c6[0] = hc_byte_perm (w6[3], w6[2], selector); c5[3] = hc_byte_perm (w6[2], w6[1], selector); c5[2] = hc_byte_perm (w6[1], w6[0], selector); c5[1] = hc_byte_perm (w6[0], w5[3], selector); c5[0] = hc_byte_perm (w5[3], w5[2], selector); c4[3] = hc_byte_perm (w5[2], w5[1], selector); c4[2] = hc_byte_perm (w5[1], w5[0], selector); c4[1] = hc_byte_perm (w5[0], w4[3], selector); c4[0] = hc_byte_perm (w4[3], w4[2], selector); c3[3] = hc_byte_perm (w4[2], w4[1], selector); c3[2] = hc_byte_perm (w4[1], w4[0], selector); c3[1] = hc_byte_perm (w4[0], w3[3], selector); c3[0] = hc_byte_perm (w3[3], w3[2], selector); c2[3] = hc_byte_perm (w3[2], w3[1], selector); c2[2] = hc_byte_perm (w3[1], w3[0], selector); c2[1] = hc_byte_perm (w3[0], w2[3], selector); c2[0] = hc_byte_perm (w2[3], w2[2], selector); c1[3] = hc_byte_perm (w2[2], w2[1], selector); c1[2] = hc_byte_perm (w2[1], w2[0], selector); c1[1] = hc_byte_perm (w2[0], w1[3], selector); c1[0] = hc_byte_perm (w1[3], w1[2], selector); c0[3] = hc_byte_perm (w1[2], w1[1], selector); c0[2] = hc_byte_perm (w1[1], w1[0], selector); c0[1] = hc_byte_perm (w1[0], w0[3], selector); c0[0] = hc_byte_perm (w0[3], w0[2], selector); w7[3] = hc_byte_perm (w0[2], w0[1], selector); w7[2] = hc_byte_perm (w0[1], w0[0], selector); w7[1] = hc_byte_perm (w0[0], 0, selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_byte_perm ( 0, w7[3], selector); c7[1] = hc_byte_perm (w7[3], w7[2], selector); c7[0] = hc_byte_perm (w7[2], w7[1], selector); c6[3] = hc_byte_perm (w7[1], w7[0], selector); c6[2] = hc_byte_perm (w7[0], w6[3], selector); c6[1] = hc_byte_perm (w6[3], w6[2], selector); c6[0] = hc_byte_perm (w6[2], w6[1], selector); c5[3] = hc_byte_perm (w6[1], w6[0], selector); c5[2] = hc_byte_perm (w6[0], w5[3], selector); c5[1] = hc_byte_perm (w5[3], w5[2], selector); c5[0] = hc_byte_perm (w5[2], w5[1], selector); c4[3] = hc_byte_perm (w5[1], w5[0], selector); c4[2] = hc_byte_perm (w5[0], w4[3], selector); c4[1] = hc_byte_perm (w4[3], w4[2], selector); c4[0] = hc_byte_perm (w4[2], w4[1], selector); c3[3] = hc_byte_perm (w4[1], w4[0], selector); c3[2] = hc_byte_perm (w4[0], w3[3], selector); c3[1] = hc_byte_perm (w3[3], w3[2], selector); c3[0] = hc_byte_perm (w3[2], w3[1], selector); c2[3] = hc_byte_perm (w3[1], w3[0], selector); c2[2] = hc_byte_perm (w3[0], w2[3], selector); c2[1] = hc_byte_perm (w2[3], w2[2], selector); c2[0] = hc_byte_perm (w2[2], w2[1], selector); c1[3] = hc_byte_perm (w2[1], w2[0], selector); c1[2] = hc_byte_perm (w2[0], w1[3], selector); c1[1] = hc_byte_perm (w1[3], w1[2], selector); c1[0] = hc_byte_perm (w1[2], w1[1], selector); c0[3] = hc_byte_perm (w1[1], w1[0], selector); c0[2] = hc_byte_perm (w1[0], w0[3], selector); c0[1] = hc_byte_perm (w0[3], w0[2], selector); c0[0] = hc_byte_perm (w0[2], w0[1], selector); w7[3] = hc_byte_perm (w0[1], w0[0], selector); w7[2] = hc_byte_perm (w0[0], 0, selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_byte_perm ( 0, w7[3], selector); c7[2] = hc_byte_perm (w7[3], w7[2], selector); c7[1] = hc_byte_perm (w7[2], w7[1], selector); c7[0] = hc_byte_perm (w7[1], w7[0], selector); c6[3] = hc_byte_perm (w7[0], w6[3], selector); c6[2] = hc_byte_perm (w6[3], w6[2], selector); c6[1] = hc_byte_perm (w6[2], w6[1], selector); c6[0] = hc_byte_perm (w6[1], w6[0], selector); c5[3] = hc_byte_perm (w6[0], w5[3], selector); c5[2] = hc_byte_perm (w5[3], w5[2], selector); c5[1] = hc_byte_perm (w5[2], w5[1], selector); c5[0] = hc_byte_perm (w5[1], w5[0], selector); c4[3] = hc_byte_perm (w5[0], w4[3], selector); c4[2] = hc_byte_perm (w4[3], w4[2], selector); c4[1] = hc_byte_perm (w4[2], w4[1], selector); c4[0] = hc_byte_perm (w4[1], w4[0], selector); c3[3] = hc_byte_perm (w4[0], w3[3], selector); c3[2] = hc_byte_perm (w3[3], w3[2], selector); c3[1] = hc_byte_perm (w3[2], w3[1], selector); c3[0] = hc_byte_perm (w3[1], w3[0], selector); c2[3] = hc_byte_perm (w3[0], w2[3], selector); c2[2] = hc_byte_perm (w2[3], w2[2], selector); c2[1] = hc_byte_perm (w2[2], w2[1], selector); c2[0] = hc_byte_perm (w2[1], w2[0], selector); c1[3] = hc_byte_perm (w2[0], w1[3], selector); c1[2] = hc_byte_perm (w1[3], w1[2], selector); c1[1] = hc_byte_perm (w1[2], w1[1], selector); c1[0] = hc_byte_perm (w1[1], w1[0], selector); c0[3] = hc_byte_perm (w1[0], w0[3], selector); c0[2] = hc_byte_perm (w0[3], w0[2], selector); c0[1] = hc_byte_perm (w0[2], w0[1], selector); c0[0] = hc_byte_perm (w0[1], w0[0], selector); w7[3] = hc_byte_perm (w0[0], 0, selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_1x64_le (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w[63] = hc_bytealign (w[62], w[63], offset); w[62] = hc_bytealign (w[61], w[62], offset); w[61] = hc_bytealign (w[60], w[61], offset); w[60] = hc_bytealign (w[59], w[60], offset); w[59] = hc_bytealign (w[58], w[59], offset); w[58] = hc_bytealign (w[57], w[58], offset); w[57] = hc_bytealign (w[56], w[57], offset); w[56] = hc_bytealign (w[55], w[56], offset); w[55] = hc_bytealign (w[54], w[55], offset); w[54] = hc_bytealign (w[53], w[54], offset); w[53] = hc_bytealign (w[52], w[53], offset); w[52] = hc_bytealign (w[51], w[52], offset); w[51] = hc_bytealign (w[50], w[51], offset); w[50] = hc_bytealign (w[49], w[50], offset); w[49] = hc_bytealign (w[48], w[49], offset); w[48] = hc_bytealign (w[47], w[48], offset); w[47] = hc_bytealign (w[46], w[47], offset); w[46] = hc_bytealign (w[45], w[46], offset); w[45] = hc_bytealign (w[44], w[45], offset); w[44] = hc_bytealign (w[43], w[44], offset); w[43] = hc_bytealign (w[42], w[43], offset); w[42] = hc_bytealign (w[41], w[42], offset); w[41] = hc_bytealign (w[40], w[41], offset); w[40] = hc_bytealign (w[39], w[40], offset); w[39] = hc_bytealign (w[38], w[39], offset); w[38] = hc_bytealign (w[37], w[38], offset); w[37] = hc_bytealign (w[36], w[37], offset); w[36] = hc_bytealign (w[35], w[36], offset); w[35] = hc_bytealign (w[34], w[35], offset); w[34] = hc_bytealign (w[33], w[34], offset); w[33] = hc_bytealign (w[32], w[33], offset); w[32] = hc_bytealign (w[31], w[32], offset); w[31] = hc_bytealign (w[30], w[31], offset); w[30] = hc_bytealign (w[29], w[30], offset); w[29] = hc_bytealign (w[28], w[29], offset); w[28] = hc_bytealign (w[27], w[28], offset); w[27] = hc_bytealign (w[26], w[27], offset); w[26] = hc_bytealign (w[25], w[26], offset); w[25] = hc_bytealign (w[24], w[25], offset); w[24] = hc_bytealign (w[23], w[24], offset); w[23] = hc_bytealign (w[22], w[23], offset); w[22] = hc_bytealign (w[21], w[22], offset); w[21] = hc_bytealign (w[20], w[21], offset); w[20] = hc_bytealign (w[19], w[20], offset); w[19] = hc_bytealign (w[18], w[19], offset); w[18] = hc_bytealign (w[17], w[18], offset); w[17] = hc_bytealign (w[16], w[17], offset); w[16] = hc_bytealign (w[15], w[16], offset); w[15] = hc_bytealign (w[14], w[15], offset); w[14] = hc_bytealign (w[13], w[14], offset); w[13] = hc_bytealign (w[12], w[13], offset); w[12] = hc_bytealign (w[11], w[12], offset); w[11] = hc_bytealign (w[10], w[11], offset); w[10] = hc_bytealign (w[ 9], w[10], offset); w[ 9] = hc_bytealign (w[ 8], w[ 9], offset); w[ 8] = hc_bytealign (w[ 7], w[ 8], offset); w[ 7] = hc_bytealign (w[ 6], w[ 7], offset); w[ 6] = hc_bytealign (w[ 5], w[ 6], offset); w[ 5] = hc_bytealign (w[ 4], w[ 5], offset); w[ 4] = hc_bytealign (w[ 3], w[ 4], offset); w[ 3] = hc_bytealign (w[ 2], w[ 3], offset); w[ 2] = hc_bytealign (w[ 1], w[ 2], offset); w[ 1] = hc_bytealign (w[ 0], w[ 1], offset); w[ 0] = hc_bytealign ( 0, w[ 0], offset); break; case 1: w[63] = hc_bytealign (w[61], w[62], offset); w[62] = hc_bytealign (w[60], w[61], offset); w[61] = hc_bytealign (w[59], w[60], offset); w[60] = hc_bytealign (w[58], w[59], offset); w[59] = hc_bytealign (w[57], w[58], offset); w[58] = hc_bytealign (w[56], w[57], offset); w[57] = hc_bytealign (w[55], w[56], offset); w[56] = hc_bytealign (w[54], w[55], offset); w[55] = hc_bytealign (w[53], w[54], offset); w[54] = hc_bytealign (w[52], w[53], offset); w[53] = hc_bytealign (w[51], w[52], offset); w[52] = hc_bytealign (w[50], w[51], offset); w[51] = hc_bytealign (w[49], w[50], offset); w[50] = hc_bytealign (w[48], w[49], offset); w[49] = hc_bytealign (w[47], w[48], offset); w[48] = hc_bytealign (w[46], w[47], offset); w[47] = hc_bytealign (w[45], w[46], offset); w[46] = hc_bytealign (w[44], w[45], offset); w[45] = hc_bytealign (w[43], w[44], offset); w[44] = hc_bytealign (w[42], w[43], offset); w[43] = hc_bytealign (w[41], w[42], offset); w[42] = hc_bytealign (w[40], w[41], offset); w[41] = hc_bytealign (w[39], w[40], offset); w[40] = hc_bytealign (w[38], w[39], offset); w[39] = hc_bytealign (w[37], w[38], offset); w[38] = hc_bytealign (w[36], w[37], offset); w[37] = hc_bytealign (w[35], w[36], offset); w[36] = hc_bytealign (w[34], w[35], offset); w[35] = hc_bytealign (w[33], w[34], offset); w[34] = hc_bytealign (w[32], w[33], offset); w[33] = hc_bytealign (w[31], w[32], offset); w[32] = hc_bytealign (w[30], w[31], offset); w[31] = hc_bytealign (w[29], w[30], offset); w[30] = hc_bytealign (w[28], w[29], offset); w[29] = hc_bytealign (w[27], w[28], offset); w[28] = hc_bytealign (w[26], w[27], offset); w[27] = hc_bytealign (w[25], w[26], offset); w[26] = hc_bytealign (w[24], w[25], offset); w[25] = hc_bytealign (w[23], w[24], offset); w[24] = hc_bytealign (w[22], w[23], offset); w[23] = hc_bytealign (w[21], w[22], offset); w[22] = hc_bytealign (w[20], w[21], offset); w[21] = hc_bytealign (w[19], w[20], offset); w[20] = hc_bytealign (w[18], w[19], offset); w[19] = hc_bytealign (w[17], w[18], offset); w[18] = hc_bytealign (w[16], w[17], offset); w[17] = hc_bytealign (w[15], w[16], offset); w[16] = hc_bytealign (w[14], w[15], offset); w[15] = hc_bytealign (w[13], w[14], offset); w[14] = hc_bytealign (w[12], w[13], offset); w[13] = hc_bytealign (w[11], w[12], offset); w[12] = hc_bytealign (w[10], w[11], offset); w[11] = hc_bytealign (w[ 9], w[10], offset); w[10] = hc_bytealign (w[ 8], w[ 9], offset); w[ 9] = hc_bytealign (w[ 7], w[ 8], offset); w[ 8] = hc_bytealign (w[ 6], w[ 7], offset); w[ 7] = hc_bytealign (w[ 5], w[ 6], offset); w[ 6] = hc_bytealign (w[ 4], w[ 5], offset); w[ 5] = hc_bytealign (w[ 3], w[ 4], offset); w[ 4] = hc_bytealign (w[ 2], w[ 3], offset); w[ 3] = hc_bytealign (w[ 1], w[ 2], offset); w[ 2] = hc_bytealign (w[ 0], w[ 1], offset); w[ 1] = hc_bytealign ( 0, w[ 0], offset); w[ 0] = 0; break; case 2: w[63] = hc_bytealign (w[60], w[61], offset); w[62] = hc_bytealign (w[59], w[60], offset); w[61] = hc_bytealign (w[58], w[59], offset); w[60] = hc_bytealign (w[57], w[58], offset); w[59] = hc_bytealign (w[56], w[57], offset); w[58] = hc_bytealign (w[55], w[56], offset); w[57] = hc_bytealign (w[54], w[55], offset); w[56] = hc_bytealign (w[53], w[54], offset); w[55] = hc_bytealign (w[52], w[53], offset); w[54] = hc_bytealign (w[51], w[52], offset); w[53] = hc_bytealign (w[50], w[51], offset); w[52] = hc_bytealign (w[49], w[50], offset); w[51] = hc_bytealign (w[48], w[49], offset); w[50] = hc_bytealign (w[47], w[48], offset); w[49] = hc_bytealign (w[46], w[47], offset); w[48] = hc_bytealign (w[45], w[46], offset); w[47] = hc_bytealign (w[44], w[45], offset); w[46] = hc_bytealign (w[43], w[44], offset); w[45] = hc_bytealign (w[42], w[43], offset); w[44] = hc_bytealign (w[41], w[42], offset); w[43] = hc_bytealign (w[40], w[41], offset); w[42] = hc_bytealign (w[39], w[40], offset); w[41] = hc_bytealign (w[38], w[39], offset); w[40] = hc_bytealign (w[37], w[38], offset); w[39] = hc_bytealign (w[36], w[37], offset); w[38] = hc_bytealign (w[35], w[36], offset); w[37] = hc_bytealign (w[34], w[35], offset); w[36] = hc_bytealign (w[33], w[34], offset); w[35] = hc_bytealign (w[32], w[33], offset); w[34] = hc_bytealign (w[31], w[32], offset); w[33] = hc_bytealign (w[30], w[31], offset); w[32] = hc_bytealign (w[29], w[30], offset); w[31] = hc_bytealign (w[28], w[29], offset); w[30] = hc_bytealign (w[27], w[28], offset); w[29] = hc_bytealign (w[26], w[27], offset); w[28] = hc_bytealign (w[25], w[26], offset); w[27] = hc_bytealign (w[24], w[25], offset); w[26] = hc_bytealign (w[23], w[24], offset); w[25] = hc_bytealign (w[22], w[23], offset); w[24] = hc_bytealign (w[21], w[22], offset); w[23] = hc_bytealign (w[20], w[21], offset); w[22] = hc_bytealign (w[19], w[20], offset); w[21] = hc_bytealign (w[18], w[19], offset); w[20] = hc_bytealign (w[17], w[18], offset); w[19] = hc_bytealign (w[16], w[17], offset); w[18] = hc_bytealign (w[15], w[16], offset); w[17] = hc_bytealign (w[14], w[15], offset); w[16] = hc_bytealign (w[13], w[14], offset); w[15] = hc_bytealign (w[12], w[13], offset); w[14] = hc_bytealign (w[11], w[12], offset); w[13] = hc_bytealign (w[10], w[11], offset); w[12] = hc_bytealign (w[ 9], w[10], offset); w[11] = hc_bytealign (w[ 8], w[ 9], offset); w[10] = hc_bytealign (w[ 7], w[ 8], offset); w[ 9] = hc_bytealign (w[ 6], w[ 7], offset); w[ 8] = hc_bytealign (w[ 5], w[ 6], offset); w[ 7] = hc_bytealign (w[ 4], w[ 5], offset); w[ 6] = hc_bytealign (w[ 3], w[ 4], offset); w[ 5] = hc_bytealign (w[ 2], w[ 3], offset); w[ 4] = hc_bytealign (w[ 1], w[ 2], offset); w[ 3] = hc_bytealign (w[ 0], w[ 1], offset); w[ 2] = hc_bytealign ( 0, w[ 0], offset); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_bytealign (w[59], w[60], offset); w[62] = hc_bytealign (w[58], w[59], offset); w[61] = hc_bytealign (w[57], w[58], offset); w[60] = hc_bytealign (w[56], w[57], offset); w[59] = hc_bytealign (w[55], w[56], offset); w[58] = hc_bytealign (w[54], w[55], offset); w[57] = hc_bytealign (w[53], w[54], offset); w[56] = hc_bytealign (w[52], w[53], offset); w[55] = hc_bytealign (w[51], w[52], offset); w[54] = hc_bytealign (w[50], w[51], offset); w[53] = hc_bytealign (w[49], w[50], offset); w[52] = hc_bytealign (w[48], w[49], offset); w[51] = hc_bytealign (w[47], w[48], offset); w[50] = hc_bytealign (w[46], w[47], offset); w[49] = hc_bytealign (w[45], w[46], offset); w[48] = hc_bytealign (w[44], w[45], offset); w[47] = hc_bytealign (w[43], w[44], offset); w[46] = hc_bytealign (w[42], w[43], offset); w[45] = hc_bytealign (w[41], w[42], offset); w[44] = hc_bytealign (w[40], w[41], offset); w[43] = hc_bytealign (w[39], w[40], offset); w[42] = hc_bytealign (w[38], w[39], offset); w[41] = hc_bytealign (w[37], w[38], offset); w[40] = hc_bytealign (w[36], w[37], offset); w[39] = hc_bytealign (w[35], w[36], offset); w[38] = hc_bytealign (w[34], w[35], offset); w[37] = hc_bytealign (w[33], w[34], offset); w[36] = hc_bytealign (w[32], w[33], offset); w[35] = hc_bytealign (w[31], w[32], offset); w[34] = hc_bytealign (w[30], w[31], offset); w[33] = hc_bytealign (w[29], w[30], offset); w[32] = hc_bytealign (w[28], w[29], offset); w[31] = hc_bytealign (w[27], w[28], offset); w[30] = hc_bytealign (w[26], w[27], offset); w[29] = hc_bytealign (w[25], w[26], offset); w[28] = hc_bytealign (w[24], w[25], offset); w[27] = hc_bytealign (w[23], w[24], offset); w[26] = hc_bytealign (w[22], w[23], offset); w[25] = hc_bytealign (w[21], w[22], offset); w[24] = hc_bytealign (w[20], w[21], offset); w[23] = hc_bytealign (w[19], w[20], offset); w[22] = hc_bytealign (w[18], w[19], offset); w[21] = hc_bytealign (w[17], w[18], offset); w[20] = hc_bytealign (w[16], w[17], offset); w[19] = hc_bytealign (w[15], w[16], offset); w[18] = hc_bytealign (w[14], w[15], offset); w[17] = hc_bytealign (w[13], w[14], offset); w[16] = hc_bytealign (w[12], w[13], offset); w[15] = hc_bytealign (w[11], w[12], offset); w[14] = hc_bytealign (w[10], w[11], offset); w[13] = hc_bytealign (w[ 9], w[10], offset); w[12] = hc_bytealign (w[ 8], w[ 9], offset); w[11] = hc_bytealign (w[ 7], w[ 8], offset); w[10] = hc_bytealign (w[ 6], w[ 7], offset); w[ 9] = hc_bytealign (w[ 5], w[ 6], offset); w[ 8] = hc_bytealign (w[ 4], w[ 5], offset); w[ 7] = hc_bytealign (w[ 3], w[ 4], offset); w[ 6] = hc_bytealign (w[ 2], w[ 3], offset); w[ 5] = hc_bytealign (w[ 1], w[ 2], offset); w[ 4] = hc_bytealign (w[ 0], w[ 1], offset); w[ 3] = hc_bytealign ( 0, w[ 0], offset); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_bytealign (w[58], w[59], offset); w[62] = hc_bytealign (w[57], w[58], offset); w[61] = hc_bytealign (w[56], w[57], offset); w[60] = hc_bytealign (w[55], w[56], offset); w[59] = hc_bytealign (w[54], w[55], offset); w[58] = hc_bytealign (w[53], w[54], offset); w[57] = hc_bytealign (w[52], w[53], offset); w[56] = hc_bytealign (w[51], w[52], offset); w[55] = hc_bytealign (w[50], w[51], offset); w[54] = hc_bytealign (w[49], w[50], offset); w[53] = hc_bytealign (w[48], w[49], offset); w[52] = hc_bytealign (w[47], w[48], offset); w[51] = hc_bytealign (w[46], w[47], offset); w[50] = hc_bytealign (w[45], w[46], offset); w[49] = hc_bytealign (w[44], w[45], offset); w[48] = hc_bytealign (w[43], w[44], offset); w[47] = hc_bytealign (w[42], w[43], offset); w[46] = hc_bytealign (w[41], w[42], offset); w[45] = hc_bytealign (w[40], w[41], offset); w[44] = hc_bytealign (w[39], w[40], offset); w[43] = hc_bytealign (w[38], w[39], offset); w[42] = hc_bytealign (w[37], w[38], offset); w[41] = hc_bytealign (w[36], w[37], offset); w[40] = hc_bytealign (w[35], w[36], offset); w[39] = hc_bytealign (w[34], w[35], offset); w[38] = hc_bytealign (w[33], w[34], offset); w[37] = hc_bytealign (w[32], w[33], offset); w[36] = hc_bytealign (w[31], w[32], offset); w[35] = hc_bytealign (w[30], w[31], offset); w[34] = hc_bytealign (w[29], w[30], offset); w[33] = hc_bytealign (w[28], w[29], offset); w[32] = hc_bytealign (w[27], w[28], offset); w[31] = hc_bytealign (w[26], w[27], offset); w[30] = hc_bytealign (w[25], w[26], offset); w[29] = hc_bytealign (w[24], w[25], offset); w[28] = hc_bytealign (w[23], w[24], offset); w[27] = hc_bytealign (w[22], w[23], offset); w[26] = hc_bytealign (w[21], w[22], offset); w[25] = hc_bytealign (w[20], w[21], offset); w[24] = hc_bytealign (w[19], w[20], offset); w[23] = hc_bytealign (w[18], w[19], offset); w[22] = hc_bytealign (w[17], w[18], offset); w[21] = hc_bytealign (w[16], w[17], offset); w[20] = hc_bytealign (w[15], w[16], offset); w[19] = hc_bytealign (w[14], w[15], offset); w[18] = hc_bytealign (w[13], w[14], offset); w[17] = hc_bytealign (w[12], w[13], offset); w[16] = hc_bytealign (w[11], w[12], offset); w[15] = hc_bytealign (w[10], w[11], offset); w[14] = hc_bytealign (w[ 9], w[10], offset); w[13] = hc_bytealign (w[ 8], w[ 9], offset); w[12] = hc_bytealign (w[ 7], w[ 8], offset); w[11] = hc_bytealign (w[ 6], w[ 7], offset); w[10] = hc_bytealign (w[ 5], w[ 6], offset); w[ 9] = hc_bytealign (w[ 4], w[ 5], offset); w[ 8] = hc_bytealign (w[ 3], w[ 4], offset); w[ 7] = hc_bytealign (w[ 2], w[ 3], offset); w[ 6] = hc_bytealign (w[ 1], w[ 2], offset); w[ 5] = hc_bytealign (w[ 0], w[ 1], offset); w[ 4] = hc_bytealign ( 0, w[ 0], offset); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_bytealign (w[57], w[58], offset); w[62] = hc_bytealign (w[56], w[57], offset); w[61] = hc_bytealign (w[55], w[56], offset); w[60] = hc_bytealign (w[54], w[55], offset); w[59] = hc_bytealign (w[53], w[54], offset); w[58] = hc_bytealign (w[52], w[53], offset); w[57] = hc_bytealign (w[51], w[52], offset); w[56] = hc_bytealign (w[50], w[51], offset); w[55] = hc_bytealign (w[49], w[50], offset); w[54] = hc_bytealign (w[48], w[49], offset); w[53] = hc_bytealign (w[47], w[48], offset); w[52] = hc_bytealign (w[46], w[47], offset); w[51] = hc_bytealign (w[45], w[46], offset); w[50] = hc_bytealign (w[44], w[45], offset); w[49] = hc_bytealign (w[43], w[44], offset); w[48] = hc_bytealign (w[42], w[43], offset); w[47] = hc_bytealign (w[41], w[42], offset); w[46] = hc_bytealign (w[40], w[41], offset); w[45] = hc_bytealign (w[39], w[40], offset); w[44] = hc_bytealign (w[38], w[39], offset); w[43] = hc_bytealign (w[37], w[38], offset); w[42] = hc_bytealign (w[36], w[37], offset); w[41] = hc_bytealign (w[35], w[36], offset); w[40] = hc_bytealign (w[34], w[35], offset); w[39] = hc_bytealign (w[33], w[34], offset); w[38] = hc_bytealign (w[32], w[33], offset); w[37] = hc_bytealign (w[31], w[32], offset); w[36] = hc_bytealign (w[30], w[31], offset); w[35] = hc_bytealign (w[29], w[30], offset); w[34] = hc_bytealign (w[28], w[29], offset); w[33] = hc_bytealign (w[27], w[28], offset); w[32] = hc_bytealign (w[26], w[27], offset); w[31] = hc_bytealign (w[25], w[26], offset); w[30] = hc_bytealign (w[24], w[25], offset); w[29] = hc_bytealign (w[23], w[24], offset); w[28] = hc_bytealign (w[22], w[23], offset); w[27] = hc_bytealign (w[21], w[22], offset); w[26] = hc_bytealign (w[20], w[21], offset); w[25] = hc_bytealign (w[19], w[20], offset); w[24] = hc_bytealign (w[18], w[19], offset); w[23] = hc_bytealign (w[17], w[18], offset); w[22] = hc_bytealign (w[16], w[17], offset); w[21] = hc_bytealign (w[15], w[16], offset); w[20] = hc_bytealign (w[14], w[15], offset); w[19] = hc_bytealign (w[13], w[14], offset); w[18] = hc_bytealign (w[12], w[13], offset); w[17] = hc_bytealign (w[11], w[12], offset); w[16] = hc_bytealign (w[10], w[11], offset); w[15] = hc_bytealign (w[ 9], w[10], offset); w[14] = hc_bytealign (w[ 8], w[ 9], offset); w[13] = hc_bytealign (w[ 7], w[ 8], offset); w[12] = hc_bytealign (w[ 6], w[ 7], offset); w[11] = hc_bytealign (w[ 5], w[ 6], offset); w[10] = hc_bytealign (w[ 4], w[ 5], offset); w[ 9] = hc_bytealign (w[ 3], w[ 4], offset); w[ 8] = hc_bytealign (w[ 2], w[ 3], offset); w[ 7] = hc_bytealign (w[ 1], w[ 2], offset); w[ 6] = hc_bytealign (w[ 0], w[ 1], offset); w[ 5] = hc_bytealign ( 0, w[ 0], offset); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_bytealign (w[56], w[57], offset); w[62] = hc_bytealign (w[55], w[56], offset); w[61] = hc_bytealign (w[54], w[55], offset); w[60] = hc_bytealign (w[53], w[54], offset); w[59] = hc_bytealign (w[52], w[53], offset); w[58] = hc_bytealign (w[51], w[52], offset); w[57] = hc_bytealign (w[50], w[51], offset); w[56] = hc_bytealign (w[49], w[50], offset); w[55] = hc_bytealign (w[48], w[49], offset); w[54] = hc_bytealign (w[47], w[48], offset); w[53] = hc_bytealign (w[46], w[47], offset); w[52] = hc_bytealign (w[45], w[46], offset); w[51] = hc_bytealign (w[44], w[45], offset); w[50] = hc_bytealign (w[43], w[44], offset); w[49] = hc_bytealign (w[42], w[43], offset); w[48] = hc_bytealign (w[41], w[42], offset); w[47] = hc_bytealign (w[40], w[41], offset); w[46] = hc_bytealign (w[39], w[40], offset); w[45] = hc_bytealign (w[38], w[39], offset); w[44] = hc_bytealign (w[37], w[38], offset); w[43] = hc_bytealign (w[36], w[37], offset); w[42] = hc_bytealign (w[35], w[36], offset); w[41] = hc_bytealign (w[34], w[35], offset); w[40] = hc_bytealign (w[33], w[34], offset); w[39] = hc_bytealign (w[32], w[33], offset); w[38] = hc_bytealign (w[31], w[32], offset); w[37] = hc_bytealign (w[30], w[31], offset); w[36] = hc_bytealign (w[29], w[30], offset); w[35] = hc_bytealign (w[28], w[29], offset); w[34] = hc_bytealign (w[27], w[28], offset); w[33] = hc_bytealign (w[26], w[27], offset); w[32] = hc_bytealign (w[25], w[26], offset); w[31] = hc_bytealign (w[24], w[25], offset); w[30] = hc_bytealign (w[23], w[24], offset); w[29] = hc_bytealign (w[22], w[23], offset); w[28] = hc_bytealign (w[21], w[22], offset); w[27] = hc_bytealign (w[20], w[21], offset); w[26] = hc_bytealign (w[19], w[20], offset); w[25] = hc_bytealign (w[18], w[19], offset); w[24] = hc_bytealign (w[17], w[18], offset); w[23] = hc_bytealign (w[16], w[17], offset); w[22] = hc_bytealign (w[15], w[16], offset); w[21] = hc_bytealign (w[14], w[15], offset); w[20] = hc_bytealign (w[13], w[14], offset); w[19] = hc_bytealign (w[12], w[13], offset); w[18] = hc_bytealign (w[11], w[12], offset); w[17] = hc_bytealign (w[10], w[11], offset); w[16] = hc_bytealign (w[ 9], w[10], offset); w[15] = hc_bytealign (w[ 8], w[ 9], offset); w[14] = hc_bytealign (w[ 7], w[ 8], offset); w[13] = hc_bytealign (w[ 6], w[ 7], offset); w[12] = hc_bytealign (w[ 5], w[ 6], offset); w[11] = hc_bytealign (w[ 4], w[ 5], offset); w[10] = hc_bytealign (w[ 3], w[ 4], offset); w[ 9] = hc_bytealign (w[ 2], w[ 3], offset); w[ 8] = hc_bytealign (w[ 1], w[ 2], offset); w[ 7] = hc_bytealign (w[ 0], w[ 1], offset); w[ 6] = hc_bytealign ( 0, w[ 0], offset); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_bytealign (w[55], w[56], offset); w[62] = hc_bytealign (w[54], w[55], offset); w[61] = hc_bytealign (w[53], w[54], offset); w[60] = hc_bytealign (w[52], w[53], offset); w[59] = hc_bytealign (w[51], w[52], offset); w[58] = hc_bytealign (w[50], w[51], offset); w[57] = hc_bytealign (w[49], w[50], offset); w[56] = hc_bytealign (w[48], w[49], offset); w[55] = hc_bytealign (w[47], w[48], offset); w[54] = hc_bytealign (w[46], w[47], offset); w[53] = hc_bytealign (w[45], w[46], offset); w[52] = hc_bytealign (w[44], w[45], offset); w[51] = hc_bytealign (w[43], w[44], offset); w[50] = hc_bytealign (w[42], w[43], offset); w[49] = hc_bytealign (w[41], w[42], offset); w[48] = hc_bytealign (w[40], w[41], offset); w[47] = hc_bytealign (w[39], w[40], offset); w[46] = hc_bytealign (w[38], w[39], offset); w[45] = hc_bytealign (w[37], w[38], offset); w[44] = hc_bytealign (w[36], w[37], offset); w[43] = hc_bytealign (w[35], w[36], offset); w[42] = hc_bytealign (w[34], w[35], offset); w[41] = hc_bytealign (w[33], w[34], offset); w[40] = hc_bytealign (w[32], w[33], offset); w[39] = hc_bytealign (w[31], w[32], offset); w[38] = hc_bytealign (w[30], w[31], offset); w[37] = hc_bytealign (w[29], w[30], offset); w[36] = hc_bytealign (w[28], w[29], offset); w[35] = hc_bytealign (w[27], w[28], offset); w[34] = hc_bytealign (w[26], w[27], offset); w[33] = hc_bytealign (w[25], w[26], offset); w[32] = hc_bytealign (w[24], w[25], offset); w[31] = hc_bytealign (w[23], w[24], offset); w[30] = hc_bytealign (w[22], w[23], offset); w[29] = hc_bytealign (w[21], w[22], offset); w[28] = hc_bytealign (w[20], w[21], offset); w[27] = hc_bytealign (w[19], w[20], offset); w[26] = hc_bytealign (w[18], w[19], offset); w[25] = hc_bytealign (w[17], w[18], offset); w[24] = hc_bytealign (w[16], w[17], offset); w[23] = hc_bytealign (w[15], w[16], offset); w[22] = hc_bytealign (w[14], w[15], offset); w[21] = hc_bytealign (w[13], w[14], offset); w[20] = hc_bytealign (w[12], w[13], offset); w[19] = hc_bytealign (w[11], w[12], offset); w[18] = hc_bytealign (w[10], w[11], offset); w[17] = hc_bytealign (w[ 9], w[10], offset); w[16] = hc_bytealign (w[ 8], w[ 9], offset); w[15] = hc_bytealign (w[ 7], w[ 8], offset); w[14] = hc_bytealign (w[ 6], w[ 7], offset); w[13] = hc_bytealign (w[ 5], w[ 6], offset); w[12] = hc_bytealign (w[ 4], w[ 5], offset); w[11] = hc_bytealign (w[ 3], w[ 4], offset); w[10] = hc_bytealign (w[ 2], w[ 3], offset); w[ 9] = hc_bytealign (w[ 1], w[ 2], offset); w[ 8] = hc_bytealign (w[ 0], w[ 1], offset); w[ 7] = hc_bytealign ( 0, w[ 0], offset); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_bytealign (w[54], w[55], offset); w[62] = hc_bytealign (w[53], w[54], offset); w[61] = hc_bytealign (w[52], w[53], offset); w[60] = hc_bytealign (w[51], w[52], offset); w[59] = hc_bytealign (w[50], w[51], offset); w[58] = hc_bytealign (w[49], w[50], offset); w[57] = hc_bytealign (w[48], w[49], offset); w[56] = hc_bytealign (w[47], w[48], offset); w[55] = hc_bytealign (w[46], w[47], offset); w[54] = hc_bytealign (w[45], w[46], offset); w[53] = hc_bytealign (w[44], w[45], offset); w[52] = hc_bytealign (w[43], w[44], offset); w[51] = hc_bytealign (w[42], w[43], offset); w[50] = hc_bytealign (w[41], w[42], offset); w[49] = hc_bytealign (w[40], w[41], offset); w[48] = hc_bytealign (w[39], w[40], offset); w[47] = hc_bytealign (w[38], w[39], offset); w[46] = hc_bytealign (w[37], w[38], offset); w[45] = hc_bytealign (w[36], w[37], offset); w[44] = hc_bytealign (w[35], w[36], offset); w[43] = hc_bytealign (w[34], w[35], offset); w[42] = hc_bytealign (w[33], w[34], offset); w[41] = hc_bytealign (w[32], w[33], offset); w[40] = hc_bytealign (w[31], w[32], offset); w[39] = hc_bytealign (w[30], w[31], offset); w[38] = hc_bytealign (w[29], w[30], offset); w[37] = hc_bytealign (w[28], w[29], offset); w[36] = hc_bytealign (w[27], w[28], offset); w[35] = hc_bytealign (w[26], w[27], offset); w[34] = hc_bytealign (w[25], w[26], offset); w[33] = hc_bytealign (w[24], w[25], offset); w[32] = hc_bytealign (w[23], w[24], offset); w[31] = hc_bytealign (w[22], w[23], offset); w[30] = hc_bytealign (w[21], w[22], offset); w[29] = hc_bytealign (w[20], w[21], offset); w[28] = hc_bytealign (w[19], w[20], offset); w[27] = hc_bytealign (w[18], w[19], offset); w[26] = hc_bytealign (w[17], w[18], offset); w[25] = hc_bytealign (w[16], w[17], offset); w[24] = hc_bytealign (w[15], w[16], offset); w[23] = hc_bytealign (w[14], w[15], offset); w[22] = hc_bytealign (w[13], w[14], offset); w[21] = hc_bytealign (w[12], w[13], offset); w[20] = hc_bytealign (w[11], w[12], offset); w[19] = hc_bytealign (w[10], w[11], offset); w[18] = hc_bytealign (w[ 9], w[10], offset); w[17] = hc_bytealign (w[ 8], w[ 9], offset); w[16] = hc_bytealign (w[ 7], w[ 8], offset); w[15] = hc_bytealign (w[ 6], w[ 7], offset); w[14] = hc_bytealign (w[ 5], w[ 6], offset); w[13] = hc_bytealign (w[ 4], w[ 5], offset); w[12] = hc_bytealign (w[ 3], w[ 4], offset); w[11] = hc_bytealign (w[ 2], w[ 3], offset); w[10] = hc_bytealign (w[ 1], w[ 2], offset); w[ 9] = hc_bytealign (w[ 0], w[ 1], offset); w[ 8] = hc_bytealign ( 0, w[ 0], offset); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_bytealign (w[53], w[54], offset); w[62] = hc_bytealign (w[52], w[53], offset); w[61] = hc_bytealign (w[51], w[52], offset); w[60] = hc_bytealign (w[50], w[51], offset); w[59] = hc_bytealign (w[49], w[50], offset); w[58] = hc_bytealign (w[48], w[49], offset); w[57] = hc_bytealign (w[47], w[48], offset); w[56] = hc_bytealign (w[46], w[47], offset); w[55] = hc_bytealign (w[45], w[46], offset); w[54] = hc_bytealign (w[44], w[45], offset); w[53] = hc_bytealign (w[43], w[44], offset); w[52] = hc_bytealign (w[42], w[43], offset); w[51] = hc_bytealign (w[41], w[42], offset); w[50] = hc_bytealign (w[40], w[41], offset); w[49] = hc_bytealign (w[39], w[40], offset); w[48] = hc_bytealign (w[38], w[39], offset); w[47] = hc_bytealign (w[37], w[38], offset); w[46] = hc_bytealign (w[36], w[37], offset); w[45] = hc_bytealign (w[35], w[36], offset); w[44] = hc_bytealign (w[34], w[35], offset); w[43] = hc_bytealign (w[33], w[34], offset); w[42] = hc_bytealign (w[32], w[33], offset); w[41] = hc_bytealign (w[31], w[32], offset); w[40] = hc_bytealign (w[30], w[31], offset); w[39] = hc_bytealign (w[29], w[30], offset); w[38] = hc_bytealign (w[28], w[29], offset); w[37] = hc_bytealign (w[27], w[28], offset); w[36] = hc_bytealign (w[26], w[27], offset); w[35] = hc_bytealign (w[25], w[26], offset); w[34] = hc_bytealign (w[24], w[25], offset); w[33] = hc_bytealign (w[23], w[24], offset); w[32] = hc_bytealign (w[22], w[23], offset); w[31] = hc_bytealign (w[21], w[22], offset); w[30] = hc_bytealign (w[20], w[21], offset); w[29] = hc_bytealign (w[19], w[20], offset); w[28] = hc_bytealign (w[18], w[19], offset); w[27] = hc_bytealign (w[17], w[18], offset); w[26] = hc_bytealign (w[16], w[17], offset); w[25] = hc_bytealign (w[15], w[16], offset); w[24] = hc_bytealign (w[14], w[15], offset); w[23] = hc_bytealign (w[13], w[14], offset); w[22] = hc_bytealign (w[12], w[13], offset); w[21] = hc_bytealign (w[11], w[12], offset); w[20] = hc_bytealign (w[10], w[11], offset); w[19] = hc_bytealign (w[ 9], w[10], offset); w[18] = hc_bytealign (w[ 8], w[ 9], offset); w[17] = hc_bytealign (w[ 7], w[ 8], offset); w[16] = hc_bytealign (w[ 6], w[ 7], offset); w[15] = hc_bytealign (w[ 5], w[ 6], offset); w[14] = hc_bytealign (w[ 4], w[ 5], offset); w[13] = hc_bytealign (w[ 3], w[ 4], offset); w[12] = hc_bytealign (w[ 2], w[ 3], offset); w[11] = hc_bytealign (w[ 1], w[ 2], offset); w[10] = hc_bytealign (w[ 0], w[ 1], offset); w[ 9] = hc_bytealign ( 0, w[ 0], offset); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_bytealign (w[52], w[53], offset); w[62] = hc_bytealign (w[51], w[52], offset); w[61] = hc_bytealign (w[50], w[51], offset); w[60] = hc_bytealign (w[49], w[50], offset); w[59] = hc_bytealign (w[48], w[49], offset); w[58] = hc_bytealign (w[47], w[48], offset); w[57] = hc_bytealign (w[46], w[47], offset); w[56] = hc_bytealign (w[45], w[46], offset); w[55] = hc_bytealign (w[44], w[45], offset); w[54] = hc_bytealign (w[43], w[44], offset); w[53] = hc_bytealign (w[42], w[43], offset); w[52] = hc_bytealign (w[41], w[42], offset); w[51] = hc_bytealign (w[40], w[41], offset); w[50] = hc_bytealign (w[39], w[40], offset); w[49] = hc_bytealign (w[38], w[39], offset); w[48] = hc_bytealign (w[37], w[38], offset); w[47] = hc_bytealign (w[36], w[37], offset); w[46] = hc_bytealign (w[35], w[36], offset); w[45] = hc_bytealign (w[34], w[35], offset); w[44] = hc_bytealign (w[33], w[34], offset); w[43] = hc_bytealign (w[32], w[33], offset); w[42] = hc_bytealign (w[31], w[32], offset); w[41] = hc_bytealign (w[30], w[31], offset); w[40] = hc_bytealign (w[29], w[30], offset); w[39] = hc_bytealign (w[28], w[29], offset); w[38] = hc_bytealign (w[27], w[28], offset); w[37] = hc_bytealign (w[26], w[27], offset); w[36] = hc_bytealign (w[25], w[26], offset); w[35] = hc_bytealign (w[24], w[25], offset); w[34] = hc_bytealign (w[23], w[24], offset); w[33] = hc_bytealign (w[22], w[23], offset); w[32] = hc_bytealign (w[21], w[22], offset); w[31] = hc_bytealign (w[20], w[21], offset); w[30] = hc_bytealign (w[19], w[20], offset); w[29] = hc_bytealign (w[18], w[19], offset); w[28] = hc_bytealign (w[17], w[18], offset); w[27] = hc_bytealign (w[16], w[17], offset); w[26] = hc_bytealign (w[15], w[16], offset); w[25] = hc_bytealign (w[14], w[15], offset); w[24] = hc_bytealign (w[13], w[14], offset); w[23] = hc_bytealign (w[12], w[13], offset); w[22] = hc_bytealign (w[11], w[12], offset); w[21] = hc_bytealign (w[10], w[11], offset); w[20] = hc_bytealign (w[ 9], w[10], offset); w[19] = hc_bytealign (w[ 8], w[ 9], offset); w[18] = hc_bytealign (w[ 7], w[ 8], offset); w[17] = hc_bytealign (w[ 6], w[ 7], offset); w[16] = hc_bytealign (w[ 5], w[ 6], offset); w[15] = hc_bytealign (w[ 4], w[ 5], offset); w[14] = hc_bytealign (w[ 3], w[ 4], offset); w[13] = hc_bytealign (w[ 2], w[ 3], offset); w[12] = hc_bytealign (w[ 1], w[ 2], offset); w[11] = hc_bytealign (w[ 0], w[ 1], offset); w[10] = hc_bytealign ( 0, w[ 0], offset); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_bytealign (w[51], w[52], offset); w[62] = hc_bytealign (w[50], w[51], offset); w[61] = hc_bytealign (w[49], w[50], offset); w[60] = hc_bytealign (w[48], w[49], offset); w[59] = hc_bytealign (w[47], w[48], offset); w[58] = hc_bytealign (w[46], w[47], offset); w[57] = hc_bytealign (w[45], w[46], offset); w[56] = hc_bytealign (w[44], w[45], offset); w[55] = hc_bytealign (w[43], w[44], offset); w[54] = hc_bytealign (w[42], w[43], offset); w[53] = hc_bytealign (w[41], w[42], offset); w[52] = hc_bytealign (w[40], w[41], offset); w[51] = hc_bytealign (w[39], w[40], offset); w[50] = hc_bytealign (w[38], w[39], offset); w[49] = hc_bytealign (w[37], w[38], offset); w[48] = hc_bytealign (w[36], w[37], offset); w[47] = hc_bytealign (w[35], w[36], offset); w[46] = hc_bytealign (w[34], w[35], offset); w[45] = hc_bytealign (w[33], w[34], offset); w[44] = hc_bytealign (w[32], w[33], offset); w[43] = hc_bytealign (w[31], w[32], offset); w[42] = hc_bytealign (w[30], w[31], offset); w[41] = hc_bytealign (w[29], w[30], offset); w[40] = hc_bytealign (w[28], w[29], offset); w[39] = hc_bytealign (w[27], w[28], offset); w[38] = hc_bytealign (w[26], w[27], offset); w[37] = hc_bytealign (w[25], w[26], offset); w[36] = hc_bytealign (w[24], w[25], offset); w[35] = hc_bytealign (w[23], w[24], offset); w[34] = hc_bytealign (w[22], w[23], offset); w[33] = hc_bytealign (w[21], w[22], offset); w[32] = hc_bytealign (w[20], w[21], offset); w[31] = hc_bytealign (w[19], w[20], offset); w[30] = hc_bytealign (w[18], w[19], offset); w[29] = hc_bytealign (w[17], w[18], offset); w[28] = hc_bytealign (w[16], w[17], offset); w[27] = hc_bytealign (w[15], w[16], offset); w[26] = hc_bytealign (w[14], w[15], offset); w[25] = hc_bytealign (w[13], w[14], offset); w[24] = hc_bytealign (w[12], w[13], offset); w[23] = hc_bytealign (w[11], w[12], offset); w[22] = hc_bytealign (w[10], w[11], offset); w[21] = hc_bytealign (w[ 9], w[10], offset); w[20] = hc_bytealign (w[ 8], w[ 9], offset); w[19] = hc_bytealign (w[ 7], w[ 8], offset); w[18] = hc_bytealign (w[ 6], w[ 7], offset); w[17] = hc_bytealign (w[ 5], w[ 6], offset); w[16] = hc_bytealign (w[ 4], w[ 5], offset); w[15] = hc_bytealign (w[ 3], w[ 4], offset); w[14] = hc_bytealign (w[ 2], w[ 3], offset); w[13] = hc_bytealign (w[ 1], w[ 2], offset); w[12] = hc_bytealign (w[ 0], w[ 1], offset); w[11] = hc_bytealign ( 0, w[ 0], offset); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_bytealign (w[50], w[51], offset); w[62] = hc_bytealign (w[49], w[50], offset); w[61] = hc_bytealign (w[48], w[49], offset); w[60] = hc_bytealign (w[47], w[48], offset); w[59] = hc_bytealign (w[46], w[47], offset); w[58] = hc_bytealign (w[45], w[46], offset); w[57] = hc_bytealign (w[44], w[45], offset); w[56] = hc_bytealign (w[43], w[44], offset); w[55] = hc_bytealign (w[42], w[43], offset); w[54] = hc_bytealign (w[41], w[42], offset); w[53] = hc_bytealign (w[40], w[41], offset); w[52] = hc_bytealign (w[39], w[40], offset); w[51] = hc_bytealign (w[38], w[39], offset); w[50] = hc_bytealign (w[37], w[38], offset); w[49] = hc_bytealign (w[36], w[37], offset); w[48] = hc_bytealign (w[35], w[36], offset); w[47] = hc_bytealign (w[34], w[35], offset); w[46] = hc_bytealign (w[33], w[34], offset); w[45] = hc_bytealign (w[32], w[33], offset); w[44] = hc_bytealign (w[31], w[32], offset); w[43] = hc_bytealign (w[30], w[31], offset); w[42] = hc_bytealign (w[29], w[30], offset); w[41] = hc_bytealign (w[28], w[29], offset); w[40] = hc_bytealign (w[27], w[28], offset); w[39] = hc_bytealign (w[26], w[27], offset); w[38] = hc_bytealign (w[25], w[26], offset); w[37] = hc_bytealign (w[24], w[25], offset); w[36] = hc_bytealign (w[23], w[24], offset); w[35] = hc_bytealign (w[22], w[23], offset); w[34] = hc_bytealign (w[21], w[22], offset); w[33] = hc_bytealign (w[20], w[21], offset); w[32] = hc_bytealign (w[19], w[20], offset); w[31] = hc_bytealign (w[18], w[19], offset); w[30] = hc_bytealign (w[17], w[18], offset); w[29] = hc_bytealign (w[16], w[17], offset); w[28] = hc_bytealign (w[15], w[16], offset); w[27] = hc_bytealign (w[14], w[15], offset); w[26] = hc_bytealign (w[13], w[14], offset); w[25] = hc_bytealign (w[12], w[13], offset); w[24] = hc_bytealign (w[11], w[12], offset); w[23] = hc_bytealign (w[10], w[11], offset); w[22] = hc_bytealign (w[ 9], w[10], offset); w[21] = hc_bytealign (w[ 8], w[ 9], offset); w[20] = hc_bytealign (w[ 7], w[ 8], offset); w[19] = hc_bytealign (w[ 6], w[ 7], offset); w[18] = hc_bytealign (w[ 5], w[ 6], offset); w[17] = hc_bytealign (w[ 4], w[ 5], offset); w[16] = hc_bytealign (w[ 3], w[ 4], offset); w[15] = hc_bytealign (w[ 2], w[ 3], offset); w[14] = hc_bytealign (w[ 1], w[ 2], offset); w[13] = hc_bytealign (w[ 0], w[ 1], offset); w[12] = hc_bytealign ( 0, w[ 0], offset); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_bytealign (w[49], w[50], offset); w[62] = hc_bytealign (w[48], w[49], offset); w[61] = hc_bytealign (w[47], w[48], offset); w[60] = hc_bytealign (w[46], w[47], offset); w[59] = hc_bytealign (w[45], w[46], offset); w[58] = hc_bytealign (w[44], w[45], offset); w[57] = hc_bytealign (w[43], w[44], offset); w[56] = hc_bytealign (w[42], w[43], offset); w[55] = hc_bytealign (w[41], w[42], offset); w[54] = hc_bytealign (w[40], w[41], offset); w[53] = hc_bytealign (w[39], w[40], offset); w[52] = hc_bytealign (w[38], w[39], offset); w[51] = hc_bytealign (w[37], w[38], offset); w[50] = hc_bytealign (w[36], w[37], offset); w[49] = hc_bytealign (w[35], w[36], offset); w[48] = hc_bytealign (w[34], w[35], offset); w[47] = hc_bytealign (w[33], w[34], offset); w[46] = hc_bytealign (w[32], w[33], offset); w[45] = hc_bytealign (w[31], w[32], offset); w[44] = hc_bytealign (w[30], w[31], offset); w[43] = hc_bytealign (w[29], w[30], offset); w[42] = hc_bytealign (w[28], w[29], offset); w[41] = hc_bytealign (w[27], w[28], offset); w[40] = hc_bytealign (w[26], w[27], offset); w[39] = hc_bytealign (w[25], w[26], offset); w[38] = hc_bytealign (w[24], w[25], offset); w[37] = hc_bytealign (w[23], w[24], offset); w[36] = hc_bytealign (w[22], w[23], offset); w[35] = hc_bytealign (w[21], w[22], offset); w[34] = hc_bytealign (w[20], w[21], offset); w[33] = hc_bytealign (w[19], w[20], offset); w[32] = hc_bytealign (w[18], w[19], offset); w[31] = hc_bytealign (w[17], w[18], offset); w[30] = hc_bytealign (w[16], w[17], offset); w[29] = hc_bytealign (w[15], w[16], offset); w[28] = hc_bytealign (w[14], w[15], offset); w[27] = hc_bytealign (w[13], w[14], offset); w[26] = hc_bytealign (w[12], w[13], offset); w[25] = hc_bytealign (w[11], w[12], offset); w[24] = hc_bytealign (w[10], w[11], offset); w[23] = hc_bytealign (w[ 9], w[10], offset); w[22] = hc_bytealign (w[ 8], w[ 9], offset); w[21] = hc_bytealign (w[ 7], w[ 8], offset); w[20] = hc_bytealign (w[ 6], w[ 7], offset); w[19] = hc_bytealign (w[ 5], w[ 6], offset); w[18] = hc_bytealign (w[ 4], w[ 5], offset); w[17] = hc_bytealign (w[ 3], w[ 4], offset); w[16] = hc_bytealign (w[ 2], w[ 3], offset); w[15] = hc_bytealign (w[ 1], w[ 2], offset); w[14] = hc_bytealign (w[ 0], w[ 1], offset); w[13] = hc_bytealign ( 0, w[ 0], offset); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_bytealign (w[48], w[49], offset); w[62] = hc_bytealign (w[47], w[48], offset); w[61] = hc_bytealign (w[46], w[47], offset); w[60] = hc_bytealign (w[45], w[46], offset); w[59] = hc_bytealign (w[44], w[45], offset); w[58] = hc_bytealign (w[43], w[44], offset); w[57] = hc_bytealign (w[42], w[43], offset); w[56] = hc_bytealign (w[41], w[42], offset); w[55] = hc_bytealign (w[40], w[41], offset); w[54] = hc_bytealign (w[39], w[40], offset); w[53] = hc_bytealign (w[38], w[39], offset); w[52] = hc_bytealign (w[37], w[38], offset); w[51] = hc_bytealign (w[36], w[37], offset); w[50] = hc_bytealign (w[35], w[36], offset); w[49] = hc_bytealign (w[34], w[35], offset); w[48] = hc_bytealign (w[33], w[34], offset); w[47] = hc_bytealign (w[32], w[33], offset); w[46] = hc_bytealign (w[31], w[32], offset); w[45] = hc_bytealign (w[30], w[31], offset); w[44] = hc_bytealign (w[29], w[30], offset); w[43] = hc_bytealign (w[28], w[29], offset); w[42] = hc_bytealign (w[27], w[28], offset); w[41] = hc_bytealign (w[26], w[27], offset); w[40] = hc_bytealign (w[25], w[26], offset); w[39] = hc_bytealign (w[24], w[25], offset); w[38] = hc_bytealign (w[23], w[24], offset); w[37] = hc_bytealign (w[22], w[23], offset); w[36] = hc_bytealign (w[21], w[22], offset); w[35] = hc_bytealign (w[20], w[21], offset); w[34] = hc_bytealign (w[19], w[20], offset); w[33] = hc_bytealign (w[18], w[19], offset); w[32] = hc_bytealign (w[17], w[18], offset); w[31] = hc_bytealign (w[16], w[17], offset); w[30] = hc_bytealign (w[15], w[16], offset); w[29] = hc_bytealign (w[14], w[15], offset); w[28] = hc_bytealign (w[13], w[14], offset); w[27] = hc_bytealign (w[12], w[13], offset); w[26] = hc_bytealign (w[11], w[12], offset); w[25] = hc_bytealign (w[10], w[11], offset); w[24] = hc_bytealign (w[ 9], w[10], offset); w[23] = hc_bytealign (w[ 8], w[ 9], offset); w[22] = hc_bytealign (w[ 7], w[ 8], offset); w[21] = hc_bytealign (w[ 6], w[ 7], offset); w[20] = hc_bytealign (w[ 5], w[ 6], offset); w[19] = hc_bytealign (w[ 4], w[ 5], offset); w[18] = hc_bytealign (w[ 3], w[ 4], offset); w[17] = hc_bytealign (w[ 2], w[ 3], offset); w[16] = hc_bytealign (w[ 1], w[ 2], offset); w[15] = hc_bytealign (w[ 0], w[ 1], offset); w[14] = hc_bytealign ( 0, w[ 0], offset); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_bytealign (w[47], w[48], offset); w[62] = hc_bytealign (w[46], w[47], offset); w[61] = hc_bytealign (w[45], w[46], offset); w[60] = hc_bytealign (w[44], w[45], offset); w[59] = hc_bytealign (w[43], w[44], offset); w[58] = hc_bytealign (w[42], w[43], offset); w[57] = hc_bytealign (w[41], w[42], offset); w[56] = hc_bytealign (w[40], w[41], offset); w[55] = hc_bytealign (w[39], w[40], offset); w[54] = hc_bytealign (w[38], w[39], offset); w[53] = hc_bytealign (w[37], w[38], offset); w[52] = hc_bytealign (w[36], w[37], offset); w[51] = hc_bytealign (w[35], w[36], offset); w[50] = hc_bytealign (w[34], w[35], offset); w[49] = hc_bytealign (w[33], w[34], offset); w[48] = hc_bytealign (w[32], w[33], offset); w[47] = hc_bytealign (w[31], w[32], offset); w[46] = hc_bytealign (w[30], w[31], offset); w[45] = hc_bytealign (w[29], w[30], offset); w[44] = hc_bytealign (w[28], w[29], offset); w[43] = hc_bytealign (w[27], w[28], offset); w[42] = hc_bytealign (w[26], w[27], offset); w[41] = hc_bytealign (w[25], w[26], offset); w[40] = hc_bytealign (w[24], w[25], offset); w[39] = hc_bytealign (w[23], w[24], offset); w[38] = hc_bytealign (w[22], w[23], offset); w[37] = hc_bytealign (w[21], w[22], offset); w[36] = hc_bytealign (w[20], w[21], offset); w[35] = hc_bytealign (w[19], w[20], offset); w[34] = hc_bytealign (w[18], w[19], offset); w[33] = hc_bytealign (w[17], w[18], offset); w[32] = hc_bytealign (w[16], w[17], offset); w[31] = hc_bytealign (w[15], w[16], offset); w[30] = hc_bytealign (w[14], w[15], offset); w[29] = hc_bytealign (w[13], w[14], offset); w[28] = hc_bytealign (w[12], w[13], offset); w[27] = hc_bytealign (w[11], w[12], offset); w[26] = hc_bytealign (w[10], w[11], offset); w[25] = hc_bytealign (w[ 9], w[10], offset); w[24] = hc_bytealign (w[ 8], w[ 9], offset); w[23] = hc_bytealign (w[ 7], w[ 8], offset); w[22] = hc_bytealign (w[ 6], w[ 7], offset); w[21] = hc_bytealign (w[ 5], w[ 6], offset); w[20] = hc_bytealign (w[ 4], w[ 5], offset); w[19] = hc_bytealign (w[ 3], w[ 4], offset); w[18] = hc_bytealign (w[ 2], w[ 3], offset); w[17] = hc_bytealign (w[ 1], w[ 2], offset); w[16] = hc_bytealign (w[ 0], w[ 1], offset); w[15] = hc_bytealign ( 0, w[ 0], offset); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_bytealign (w[46], w[47], offset); w[62] = hc_bytealign (w[45], w[46], offset); w[61] = hc_bytealign (w[44], w[45], offset); w[60] = hc_bytealign (w[43], w[44], offset); w[59] = hc_bytealign (w[42], w[43], offset); w[58] = hc_bytealign (w[41], w[42], offset); w[57] = hc_bytealign (w[40], w[41], offset); w[56] = hc_bytealign (w[39], w[40], offset); w[55] = hc_bytealign (w[38], w[39], offset); w[54] = hc_bytealign (w[37], w[38], offset); w[53] = hc_bytealign (w[36], w[37], offset); w[52] = hc_bytealign (w[35], w[36], offset); w[51] = hc_bytealign (w[34], w[35], offset); w[50] = hc_bytealign (w[33], w[34], offset); w[49] = hc_bytealign (w[32], w[33], offset); w[48] = hc_bytealign (w[31], w[32], offset); w[47] = hc_bytealign (w[30], w[31], offset); w[46] = hc_bytealign (w[29], w[30], offset); w[45] = hc_bytealign (w[28], w[29], offset); w[44] = hc_bytealign (w[27], w[28], offset); w[43] = hc_bytealign (w[26], w[27], offset); w[42] = hc_bytealign (w[25], w[26], offset); w[41] = hc_bytealign (w[24], w[25], offset); w[40] = hc_bytealign (w[23], w[24], offset); w[39] = hc_bytealign (w[22], w[23], offset); w[38] = hc_bytealign (w[21], w[22], offset); w[37] = hc_bytealign (w[20], w[21], offset); w[36] = hc_bytealign (w[19], w[20], offset); w[35] = hc_bytealign (w[18], w[19], offset); w[34] = hc_bytealign (w[17], w[18], offset); w[33] = hc_bytealign (w[16], w[17], offset); w[32] = hc_bytealign (w[15], w[16], offset); w[31] = hc_bytealign (w[14], w[15], offset); w[30] = hc_bytealign (w[13], w[14], offset); w[29] = hc_bytealign (w[12], w[13], offset); w[28] = hc_bytealign (w[11], w[12], offset); w[27] = hc_bytealign (w[10], w[11], offset); w[26] = hc_bytealign (w[ 9], w[10], offset); w[25] = hc_bytealign (w[ 8], w[ 9], offset); w[24] = hc_bytealign (w[ 7], w[ 8], offset); w[23] = hc_bytealign (w[ 6], w[ 7], offset); w[22] = hc_bytealign (w[ 5], w[ 6], offset); w[21] = hc_bytealign (w[ 4], w[ 5], offset); w[20] = hc_bytealign (w[ 3], w[ 4], offset); w[19] = hc_bytealign (w[ 2], w[ 3], offset); w[18] = hc_bytealign (w[ 1], w[ 2], offset); w[17] = hc_bytealign (w[ 0], w[ 1], offset); w[16] = hc_bytealign ( 0, w[ 0], offset); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_bytealign (w[45], w[46], offset); w[62] = hc_bytealign (w[44], w[45], offset); w[61] = hc_bytealign (w[43], w[44], offset); w[60] = hc_bytealign (w[42], w[43], offset); w[59] = hc_bytealign (w[41], w[42], offset); w[58] = hc_bytealign (w[40], w[41], offset); w[57] = hc_bytealign (w[39], w[40], offset); w[56] = hc_bytealign (w[38], w[39], offset); w[55] = hc_bytealign (w[37], w[38], offset); w[54] = hc_bytealign (w[36], w[37], offset); w[53] = hc_bytealign (w[35], w[36], offset); w[52] = hc_bytealign (w[34], w[35], offset); w[51] = hc_bytealign (w[33], w[34], offset); w[50] = hc_bytealign (w[32], w[33], offset); w[49] = hc_bytealign (w[31], w[32], offset); w[48] = hc_bytealign (w[30], w[31], offset); w[47] = hc_bytealign (w[29], w[30], offset); w[46] = hc_bytealign (w[28], w[29], offset); w[45] = hc_bytealign (w[27], w[28], offset); w[44] = hc_bytealign (w[26], w[27], offset); w[43] = hc_bytealign (w[25], w[26], offset); w[42] = hc_bytealign (w[24], w[25], offset); w[41] = hc_bytealign (w[23], w[24], offset); w[40] = hc_bytealign (w[22], w[23], offset); w[39] = hc_bytealign (w[21], w[22], offset); w[38] = hc_bytealign (w[20], w[21], offset); w[37] = hc_bytealign (w[19], w[20], offset); w[36] = hc_bytealign (w[18], w[19], offset); w[35] = hc_bytealign (w[17], w[18], offset); w[34] = hc_bytealign (w[16], w[17], offset); w[33] = hc_bytealign (w[15], w[16], offset); w[32] = hc_bytealign (w[14], w[15], offset); w[31] = hc_bytealign (w[13], w[14], offset); w[30] = hc_bytealign (w[12], w[13], offset); w[29] = hc_bytealign (w[11], w[12], offset); w[28] = hc_bytealign (w[10], w[11], offset); w[27] = hc_bytealign (w[ 9], w[10], offset); w[26] = hc_bytealign (w[ 8], w[ 9], offset); w[25] = hc_bytealign (w[ 7], w[ 8], offset); w[24] = hc_bytealign (w[ 6], w[ 7], offset); w[23] = hc_bytealign (w[ 5], w[ 6], offset); w[22] = hc_bytealign (w[ 4], w[ 5], offset); w[21] = hc_bytealign (w[ 3], w[ 4], offset); w[20] = hc_bytealign (w[ 2], w[ 3], offset); w[19] = hc_bytealign (w[ 1], w[ 2], offset); w[18] = hc_bytealign (w[ 0], w[ 1], offset); w[17] = hc_bytealign ( 0, w[ 0], offset); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_bytealign (w[44], w[45], offset); w[62] = hc_bytealign (w[43], w[44], offset); w[61] = hc_bytealign (w[42], w[43], offset); w[60] = hc_bytealign (w[41], w[42], offset); w[59] = hc_bytealign (w[40], w[41], offset); w[58] = hc_bytealign (w[39], w[40], offset); w[57] = hc_bytealign (w[38], w[39], offset); w[56] = hc_bytealign (w[37], w[38], offset); w[55] = hc_bytealign (w[36], w[37], offset); w[54] = hc_bytealign (w[35], w[36], offset); w[53] = hc_bytealign (w[34], w[35], offset); w[52] = hc_bytealign (w[33], w[34], offset); w[51] = hc_bytealign (w[32], w[33], offset); w[50] = hc_bytealign (w[31], w[32], offset); w[49] = hc_bytealign (w[30], w[31], offset); w[48] = hc_bytealign (w[29], w[30], offset); w[47] = hc_bytealign (w[28], w[29], offset); w[46] = hc_bytealign (w[27], w[28], offset); w[45] = hc_bytealign (w[26], w[27], offset); w[44] = hc_bytealign (w[25], w[26], offset); w[43] = hc_bytealign (w[24], w[25], offset); w[42] = hc_bytealign (w[23], w[24], offset); w[41] = hc_bytealign (w[22], w[23], offset); w[40] = hc_bytealign (w[21], w[22], offset); w[39] = hc_bytealign (w[20], w[21], offset); w[38] = hc_bytealign (w[19], w[20], offset); w[37] = hc_bytealign (w[18], w[19], offset); w[36] = hc_bytealign (w[17], w[18], offset); w[35] = hc_bytealign (w[16], w[17], offset); w[34] = hc_bytealign (w[15], w[16], offset); w[33] = hc_bytealign (w[14], w[15], offset); w[32] = hc_bytealign (w[13], w[14], offset); w[31] = hc_bytealign (w[12], w[13], offset); w[30] = hc_bytealign (w[11], w[12], offset); w[29] = hc_bytealign (w[10], w[11], offset); w[28] = hc_bytealign (w[ 9], w[10], offset); w[27] = hc_bytealign (w[ 8], w[ 9], offset); w[26] = hc_bytealign (w[ 7], w[ 8], offset); w[25] = hc_bytealign (w[ 6], w[ 7], offset); w[24] = hc_bytealign (w[ 5], w[ 6], offset); w[23] = hc_bytealign (w[ 4], w[ 5], offset); w[22] = hc_bytealign (w[ 3], w[ 4], offset); w[21] = hc_bytealign (w[ 2], w[ 3], offset); w[20] = hc_bytealign (w[ 1], w[ 2], offset); w[19] = hc_bytealign (w[ 0], w[ 1], offset); w[18] = hc_bytealign ( 0, w[ 0], offset); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_bytealign (w[43], w[44], offset); w[62] = hc_bytealign (w[42], w[43], offset); w[61] = hc_bytealign (w[41], w[42], offset); w[60] = hc_bytealign (w[40], w[41], offset); w[59] = hc_bytealign (w[39], w[40], offset); w[58] = hc_bytealign (w[38], w[39], offset); w[57] = hc_bytealign (w[37], w[38], offset); w[56] = hc_bytealign (w[36], w[37], offset); w[55] = hc_bytealign (w[35], w[36], offset); w[54] = hc_bytealign (w[34], w[35], offset); w[53] = hc_bytealign (w[33], w[34], offset); w[52] = hc_bytealign (w[32], w[33], offset); w[51] = hc_bytealign (w[31], w[32], offset); w[50] = hc_bytealign (w[30], w[31], offset); w[49] = hc_bytealign (w[29], w[30], offset); w[48] = hc_bytealign (w[28], w[29], offset); w[47] = hc_bytealign (w[27], w[28], offset); w[46] = hc_bytealign (w[26], w[27], offset); w[45] = hc_bytealign (w[25], w[26], offset); w[44] = hc_bytealign (w[24], w[25], offset); w[43] = hc_bytealign (w[23], w[24], offset); w[42] = hc_bytealign (w[22], w[23], offset); w[41] = hc_bytealign (w[21], w[22], offset); w[40] = hc_bytealign (w[20], w[21], offset); w[39] = hc_bytealign (w[19], w[20], offset); w[38] = hc_bytealign (w[18], w[19], offset); w[37] = hc_bytealign (w[17], w[18], offset); w[36] = hc_bytealign (w[16], w[17], offset); w[35] = hc_bytealign (w[15], w[16], offset); w[34] = hc_bytealign (w[14], w[15], offset); w[33] = hc_bytealign (w[13], w[14], offset); w[32] = hc_bytealign (w[12], w[13], offset); w[31] = hc_bytealign (w[11], w[12], offset); w[30] = hc_bytealign (w[10], w[11], offset); w[29] = hc_bytealign (w[ 9], w[10], offset); w[28] = hc_bytealign (w[ 8], w[ 9], offset); w[27] = hc_bytealign (w[ 7], w[ 8], offset); w[26] = hc_bytealign (w[ 6], w[ 7], offset); w[25] = hc_bytealign (w[ 5], w[ 6], offset); w[24] = hc_bytealign (w[ 4], w[ 5], offset); w[23] = hc_bytealign (w[ 3], w[ 4], offset); w[22] = hc_bytealign (w[ 2], w[ 3], offset); w[21] = hc_bytealign (w[ 1], w[ 2], offset); w[20] = hc_bytealign (w[ 0], w[ 1], offset); w[19] = hc_bytealign ( 0, w[ 0], offset); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_bytealign (w[42], w[43], offset); w[62] = hc_bytealign (w[41], w[42], offset); w[61] = hc_bytealign (w[40], w[41], offset); w[60] = hc_bytealign (w[39], w[40], offset); w[59] = hc_bytealign (w[38], w[39], offset); w[58] = hc_bytealign (w[37], w[38], offset); w[57] = hc_bytealign (w[36], w[37], offset); w[56] = hc_bytealign (w[35], w[36], offset); w[55] = hc_bytealign (w[34], w[35], offset); w[54] = hc_bytealign (w[33], w[34], offset); w[53] = hc_bytealign (w[32], w[33], offset); w[52] = hc_bytealign (w[31], w[32], offset); w[51] = hc_bytealign (w[30], w[31], offset); w[50] = hc_bytealign (w[29], w[30], offset); w[49] = hc_bytealign (w[28], w[29], offset); w[48] = hc_bytealign (w[27], w[28], offset); w[47] = hc_bytealign (w[26], w[27], offset); w[46] = hc_bytealign (w[25], w[26], offset); w[45] = hc_bytealign (w[24], w[25], offset); w[44] = hc_bytealign (w[23], w[24], offset); w[43] = hc_bytealign (w[22], w[23], offset); w[42] = hc_bytealign (w[21], w[22], offset); w[41] = hc_bytealign (w[20], w[21], offset); w[40] = hc_bytealign (w[19], w[20], offset); w[39] = hc_bytealign (w[18], w[19], offset); w[38] = hc_bytealign (w[17], w[18], offset); w[37] = hc_bytealign (w[16], w[17], offset); w[36] = hc_bytealign (w[15], w[16], offset); w[35] = hc_bytealign (w[14], w[15], offset); w[34] = hc_bytealign (w[13], w[14], offset); w[33] = hc_bytealign (w[12], w[13], offset); w[32] = hc_bytealign (w[11], w[12], offset); w[31] = hc_bytealign (w[10], w[11], offset); w[30] = hc_bytealign (w[ 9], w[10], offset); w[29] = hc_bytealign (w[ 8], w[ 9], offset); w[28] = hc_bytealign (w[ 7], w[ 8], offset); w[27] = hc_bytealign (w[ 6], w[ 7], offset); w[26] = hc_bytealign (w[ 5], w[ 6], offset); w[25] = hc_bytealign (w[ 4], w[ 5], offset); w[24] = hc_bytealign (w[ 3], w[ 4], offset); w[23] = hc_bytealign (w[ 2], w[ 3], offset); w[22] = hc_bytealign (w[ 1], w[ 2], offset); w[21] = hc_bytealign (w[ 0], w[ 1], offset); w[20] = hc_bytealign ( 0, w[ 0], offset); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_bytealign (w[41], w[42], offset); w[62] = hc_bytealign (w[40], w[41], offset); w[61] = hc_bytealign (w[39], w[40], offset); w[60] = hc_bytealign (w[38], w[39], offset); w[59] = hc_bytealign (w[37], w[38], offset); w[58] = hc_bytealign (w[36], w[37], offset); w[57] = hc_bytealign (w[35], w[36], offset); w[56] = hc_bytealign (w[34], w[35], offset); w[55] = hc_bytealign (w[33], w[34], offset); w[54] = hc_bytealign (w[32], w[33], offset); w[53] = hc_bytealign (w[31], w[32], offset); w[52] = hc_bytealign (w[30], w[31], offset); w[51] = hc_bytealign (w[29], w[30], offset); w[50] = hc_bytealign (w[28], w[29], offset); w[49] = hc_bytealign (w[27], w[28], offset); w[48] = hc_bytealign (w[26], w[27], offset); w[47] = hc_bytealign (w[25], w[26], offset); w[46] = hc_bytealign (w[24], w[25], offset); w[45] = hc_bytealign (w[23], w[24], offset); w[44] = hc_bytealign (w[22], w[23], offset); w[43] = hc_bytealign (w[21], w[22], offset); w[42] = hc_bytealign (w[20], w[21], offset); w[41] = hc_bytealign (w[19], w[20], offset); w[40] = hc_bytealign (w[18], w[19], offset); w[39] = hc_bytealign (w[17], w[18], offset); w[38] = hc_bytealign (w[16], w[17], offset); w[37] = hc_bytealign (w[15], w[16], offset); w[36] = hc_bytealign (w[14], w[15], offset); w[35] = hc_bytealign (w[13], w[14], offset); w[34] = hc_bytealign (w[12], w[13], offset); w[33] = hc_bytealign (w[11], w[12], offset); w[32] = hc_bytealign (w[10], w[11], offset); w[31] = hc_bytealign (w[ 9], w[10], offset); w[30] = hc_bytealign (w[ 8], w[ 9], offset); w[29] = hc_bytealign (w[ 7], w[ 8], offset); w[28] = hc_bytealign (w[ 6], w[ 7], offset); w[27] = hc_bytealign (w[ 5], w[ 6], offset); w[26] = hc_bytealign (w[ 4], w[ 5], offset); w[25] = hc_bytealign (w[ 3], w[ 4], offset); w[24] = hc_bytealign (w[ 2], w[ 3], offset); w[23] = hc_bytealign (w[ 1], w[ 2], offset); w[22] = hc_bytealign (w[ 0], w[ 1], offset); w[21] = hc_bytealign ( 0, w[ 0], offset); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_bytealign (w[40], w[41], offset); w[62] = hc_bytealign (w[39], w[40], offset); w[61] = hc_bytealign (w[38], w[39], offset); w[60] = hc_bytealign (w[37], w[38], offset); w[59] = hc_bytealign (w[36], w[37], offset); w[58] = hc_bytealign (w[35], w[36], offset); w[57] = hc_bytealign (w[34], w[35], offset); w[56] = hc_bytealign (w[33], w[34], offset); w[55] = hc_bytealign (w[32], w[33], offset); w[54] = hc_bytealign (w[31], w[32], offset); w[53] = hc_bytealign (w[30], w[31], offset); w[52] = hc_bytealign (w[29], w[30], offset); w[51] = hc_bytealign (w[28], w[29], offset); w[50] = hc_bytealign (w[27], w[28], offset); w[49] = hc_bytealign (w[26], w[27], offset); w[48] = hc_bytealign (w[25], w[26], offset); w[47] = hc_bytealign (w[24], w[25], offset); w[46] = hc_bytealign (w[23], w[24], offset); w[45] = hc_bytealign (w[22], w[23], offset); w[44] = hc_bytealign (w[21], w[22], offset); w[43] = hc_bytealign (w[20], w[21], offset); w[42] = hc_bytealign (w[19], w[20], offset); w[41] = hc_bytealign (w[18], w[19], offset); w[40] = hc_bytealign (w[17], w[18], offset); w[39] = hc_bytealign (w[16], w[17], offset); w[38] = hc_bytealign (w[15], w[16], offset); w[37] = hc_bytealign (w[14], w[15], offset); w[36] = hc_bytealign (w[13], w[14], offset); w[35] = hc_bytealign (w[12], w[13], offset); w[34] = hc_bytealign (w[11], w[12], offset); w[33] = hc_bytealign (w[10], w[11], offset); w[32] = hc_bytealign (w[ 9], w[10], offset); w[31] = hc_bytealign (w[ 8], w[ 9], offset); w[30] = hc_bytealign (w[ 7], w[ 8], offset); w[29] = hc_bytealign (w[ 6], w[ 7], offset); w[28] = hc_bytealign (w[ 5], w[ 6], offset); w[27] = hc_bytealign (w[ 4], w[ 5], offset); w[26] = hc_bytealign (w[ 3], w[ 4], offset); w[25] = hc_bytealign (w[ 2], w[ 3], offset); w[24] = hc_bytealign (w[ 1], w[ 2], offset); w[23] = hc_bytealign (w[ 0], w[ 1], offset); w[22] = hc_bytealign ( 0, w[ 0], offset); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_bytealign (w[39], w[40], offset); w[62] = hc_bytealign (w[38], w[39], offset); w[61] = hc_bytealign (w[37], w[38], offset); w[60] = hc_bytealign (w[36], w[37], offset); w[59] = hc_bytealign (w[35], w[36], offset); w[58] = hc_bytealign (w[34], w[35], offset); w[57] = hc_bytealign (w[33], w[34], offset); w[56] = hc_bytealign (w[32], w[33], offset); w[55] = hc_bytealign (w[31], w[32], offset); w[54] = hc_bytealign (w[30], w[31], offset); w[53] = hc_bytealign (w[29], w[30], offset); w[52] = hc_bytealign (w[28], w[29], offset); w[51] = hc_bytealign (w[27], w[28], offset); w[50] = hc_bytealign (w[26], w[27], offset); w[49] = hc_bytealign (w[25], w[26], offset); w[48] = hc_bytealign (w[24], w[25], offset); w[47] = hc_bytealign (w[23], w[24], offset); w[46] = hc_bytealign (w[22], w[23], offset); w[45] = hc_bytealign (w[21], w[22], offset); w[44] = hc_bytealign (w[20], w[21], offset); w[43] = hc_bytealign (w[19], w[20], offset); w[42] = hc_bytealign (w[18], w[19], offset); w[41] = hc_bytealign (w[17], w[18], offset); w[40] = hc_bytealign (w[16], w[17], offset); w[39] = hc_bytealign (w[15], w[16], offset); w[38] = hc_bytealign (w[14], w[15], offset); w[37] = hc_bytealign (w[13], w[14], offset); w[36] = hc_bytealign (w[12], w[13], offset); w[35] = hc_bytealign (w[11], w[12], offset); w[34] = hc_bytealign (w[10], w[11], offset); w[33] = hc_bytealign (w[ 9], w[10], offset); w[32] = hc_bytealign (w[ 8], w[ 9], offset); w[31] = hc_bytealign (w[ 7], w[ 8], offset); w[30] = hc_bytealign (w[ 6], w[ 7], offset); w[29] = hc_bytealign (w[ 5], w[ 6], offset); w[28] = hc_bytealign (w[ 4], w[ 5], offset); w[27] = hc_bytealign (w[ 3], w[ 4], offset); w[26] = hc_bytealign (w[ 2], w[ 3], offset); w[25] = hc_bytealign (w[ 1], w[ 2], offset); w[24] = hc_bytealign (w[ 0], w[ 1], offset); w[23] = hc_bytealign ( 0, w[ 0], offset); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_bytealign (w[38], w[39], offset); w[62] = hc_bytealign (w[37], w[38], offset); w[61] = hc_bytealign (w[36], w[37], offset); w[60] = hc_bytealign (w[35], w[36], offset); w[59] = hc_bytealign (w[34], w[35], offset); w[58] = hc_bytealign (w[33], w[34], offset); w[57] = hc_bytealign (w[32], w[33], offset); w[56] = hc_bytealign (w[31], w[32], offset); w[55] = hc_bytealign (w[30], w[31], offset); w[54] = hc_bytealign (w[29], w[30], offset); w[53] = hc_bytealign (w[28], w[29], offset); w[52] = hc_bytealign (w[27], w[28], offset); w[51] = hc_bytealign (w[26], w[27], offset); w[50] = hc_bytealign (w[25], w[26], offset); w[49] = hc_bytealign (w[24], w[25], offset); w[48] = hc_bytealign (w[23], w[24], offset); w[47] = hc_bytealign (w[22], w[23], offset); w[46] = hc_bytealign (w[21], w[22], offset); w[45] = hc_bytealign (w[20], w[21], offset); w[44] = hc_bytealign (w[19], w[20], offset); w[43] = hc_bytealign (w[18], w[19], offset); w[42] = hc_bytealign (w[17], w[18], offset); w[41] = hc_bytealign (w[16], w[17], offset); w[40] = hc_bytealign (w[15], w[16], offset); w[39] = hc_bytealign (w[14], w[15], offset); w[38] = hc_bytealign (w[13], w[14], offset); w[37] = hc_bytealign (w[12], w[13], offset); w[36] = hc_bytealign (w[11], w[12], offset); w[35] = hc_bytealign (w[10], w[11], offset); w[34] = hc_bytealign (w[ 9], w[10], offset); w[33] = hc_bytealign (w[ 8], w[ 9], offset); w[32] = hc_bytealign (w[ 7], w[ 8], offset); w[31] = hc_bytealign (w[ 6], w[ 7], offset); w[30] = hc_bytealign (w[ 5], w[ 6], offset); w[29] = hc_bytealign (w[ 4], w[ 5], offset); w[28] = hc_bytealign (w[ 3], w[ 4], offset); w[27] = hc_bytealign (w[ 2], w[ 3], offset); w[26] = hc_bytealign (w[ 1], w[ 2], offset); w[25] = hc_bytealign (w[ 0], w[ 1], offset); w[24] = hc_bytealign ( 0, w[ 0], offset); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_bytealign (w[37], w[38], offset); w[62] = hc_bytealign (w[36], w[37], offset); w[61] = hc_bytealign (w[35], w[36], offset); w[60] = hc_bytealign (w[34], w[35], offset); w[59] = hc_bytealign (w[33], w[34], offset); w[58] = hc_bytealign (w[32], w[33], offset); w[57] = hc_bytealign (w[31], w[32], offset); w[56] = hc_bytealign (w[30], w[31], offset); w[55] = hc_bytealign (w[29], w[30], offset); w[54] = hc_bytealign (w[28], w[29], offset); w[53] = hc_bytealign (w[27], w[28], offset); w[52] = hc_bytealign (w[26], w[27], offset); w[51] = hc_bytealign (w[25], w[26], offset); w[50] = hc_bytealign (w[24], w[25], offset); w[49] = hc_bytealign (w[23], w[24], offset); w[48] = hc_bytealign (w[22], w[23], offset); w[47] = hc_bytealign (w[21], w[22], offset); w[46] = hc_bytealign (w[20], w[21], offset); w[45] = hc_bytealign (w[19], w[20], offset); w[44] = hc_bytealign (w[18], w[19], offset); w[43] = hc_bytealign (w[17], w[18], offset); w[42] = hc_bytealign (w[16], w[17], offset); w[41] = hc_bytealign (w[15], w[16], offset); w[40] = hc_bytealign (w[14], w[15], offset); w[39] = hc_bytealign (w[13], w[14], offset); w[38] = hc_bytealign (w[12], w[13], offset); w[37] = hc_bytealign (w[11], w[12], offset); w[36] = hc_bytealign (w[10], w[11], offset); w[35] = hc_bytealign (w[ 9], w[10], offset); w[34] = hc_bytealign (w[ 8], w[ 9], offset); w[33] = hc_bytealign (w[ 7], w[ 8], offset); w[32] = hc_bytealign (w[ 6], w[ 7], offset); w[31] = hc_bytealign (w[ 5], w[ 6], offset); w[30] = hc_bytealign (w[ 4], w[ 5], offset); w[29] = hc_bytealign (w[ 3], w[ 4], offset); w[28] = hc_bytealign (w[ 2], w[ 3], offset); w[27] = hc_bytealign (w[ 1], w[ 2], offset); w[26] = hc_bytealign (w[ 0], w[ 1], offset); w[25] = hc_bytealign ( 0, w[ 0], offset); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_bytealign (w[36], w[37], offset); w[62] = hc_bytealign (w[35], w[36], offset); w[61] = hc_bytealign (w[34], w[35], offset); w[60] = hc_bytealign (w[33], w[34], offset); w[59] = hc_bytealign (w[32], w[33], offset); w[58] = hc_bytealign (w[31], w[32], offset); w[57] = hc_bytealign (w[30], w[31], offset); w[56] = hc_bytealign (w[29], w[30], offset); w[55] = hc_bytealign (w[28], w[29], offset); w[54] = hc_bytealign (w[27], w[28], offset); w[53] = hc_bytealign (w[26], w[27], offset); w[52] = hc_bytealign (w[25], w[26], offset); w[51] = hc_bytealign (w[24], w[25], offset); w[50] = hc_bytealign (w[23], w[24], offset); w[49] = hc_bytealign (w[22], w[23], offset); w[48] = hc_bytealign (w[21], w[22], offset); w[47] = hc_bytealign (w[20], w[21], offset); w[46] = hc_bytealign (w[19], w[20], offset); w[45] = hc_bytealign (w[18], w[19], offset); w[44] = hc_bytealign (w[17], w[18], offset); w[43] = hc_bytealign (w[16], w[17], offset); w[42] = hc_bytealign (w[15], w[16], offset); w[41] = hc_bytealign (w[14], w[15], offset); w[40] = hc_bytealign (w[13], w[14], offset); w[39] = hc_bytealign (w[12], w[13], offset); w[38] = hc_bytealign (w[11], w[12], offset); w[37] = hc_bytealign (w[10], w[11], offset); w[36] = hc_bytealign (w[ 9], w[10], offset); w[35] = hc_bytealign (w[ 8], w[ 9], offset); w[34] = hc_bytealign (w[ 7], w[ 8], offset); w[33] = hc_bytealign (w[ 6], w[ 7], offset); w[32] = hc_bytealign (w[ 5], w[ 6], offset); w[31] = hc_bytealign (w[ 4], w[ 5], offset); w[30] = hc_bytealign (w[ 3], w[ 4], offset); w[29] = hc_bytealign (w[ 2], w[ 3], offset); w[28] = hc_bytealign (w[ 1], w[ 2], offset); w[27] = hc_bytealign (w[ 0], w[ 1], offset); w[26] = hc_bytealign ( 0, w[ 0], offset); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_bytealign (w[35], w[36], offset); w[62] = hc_bytealign (w[34], w[35], offset); w[61] = hc_bytealign (w[33], w[34], offset); w[60] = hc_bytealign (w[32], w[33], offset); w[59] = hc_bytealign (w[31], w[32], offset); w[58] = hc_bytealign (w[30], w[31], offset); w[57] = hc_bytealign (w[29], w[30], offset); w[56] = hc_bytealign (w[28], w[29], offset); w[55] = hc_bytealign (w[27], w[28], offset); w[54] = hc_bytealign (w[26], w[27], offset); w[53] = hc_bytealign (w[25], w[26], offset); w[52] = hc_bytealign (w[24], w[25], offset); w[51] = hc_bytealign (w[23], w[24], offset); w[50] = hc_bytealign (w[22], w[23], offset); w[49] = hc_bytealign (w[21], w[22], offset); w[48] = hc_bytealign (w[20], w[21], offset); w[47] = hc_bytealign (w[19], w[20], offset); w[46] = hc_bytealign (w[18], w[19], offset); w[45] = hc_bytealign (w[17], w[18], offset); w[44] = hc_bytealign (w[16], w[17], offset); w[43] = hc_bytealign (w[15], w[16], offset); w[42] = hc_bytealign (w[14], w[15], offset); w[41] = hc_bytealign (w[13], w[14], offset); w[40] = hc_bytealign (w[12], w[13], offset); w[39] = hc_bytealign (w[11], w[12], offset); w[38] = hc_bytealign (w[10], w[11], offset); w[37] = hc_bytealign (w[ 9], w[10], offset); w[36] = hc_bytealign (w[ 8], w[ 9], offset); w[35] = hc_bytealign (w[ 7], w[ 8], offset); w[34] = hc_bytealign (w[ 6], w[ 7], offset); w[33] = hc_bytealign (w[ 5], w[ 6], offset); w[32] = hc_bytealign (w[ 4], w[ 5], offset); w[31] = hc_bytealign (w[ 3], w[ 4], offset); w[30] = hc_bytealign (w[ 2], w[ 3], offset); w[29] = hc_bytealign (w[ 1], w[ 2], offset); w[28] = hc_bytealign (w[ 0], w[ 1], offset); w[27] = hc_bytealign ( 0, w[ 0], offset); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_bytealign (w[34], w[35], offset); w[62] = hc_bytealign (w[33], w[34], offset); w[61] = hc_bytealign (w[32], w[33], offset); w[60] = hc_bytealign (w[31], w[32], offset); w[59] = hc_bytealign (w[30], w[31], offset); w[58] = hc_bytealign (w[29], w[30], offset); w[57] = hc_bytealign (w[28], w[29], offset); w[56] = hc_bytealign (w[27], w[28], offset); w[55] = hc_bytealign (w[26], w[27], offset); w[54] = hc_bytealign (w[25], w[26], offset); w[53] = hc_bytealign (w[24], w[25], offset); w[52] = hc_bytealign (w[23], w[24], offset); w[51] = hc_bytealign (w[22], w[23], offset); w[50] = hc_bytealign (w[21], w[22], offset); w[49] = hc_bytealign (w[20], w[21], offset); w[48] = hc_bytealign (w[19], w[20], offset); w[47] = hc_bytealign (w[18], w[19], offset); w[46] = hc_bytealign (w[17], w[18], offset); w[45] = hc_bytealign (w[16], w[17], offset); w[44] = hc_bytealign (w[15], w[16], offset); w[43] = hc_bytealign (w[14], w[15], offset); w[42] = hc_bytealign (w[13], w[14], offset); w[41] = hc_bytealign (w[12], w[13], offset); w[40] = hc_bytealign (w[11], w[12], offset); w[39] = hc_bytealign (w[10], w[11], offset); w[38] = hc_bytealign (w[ 9], w[10], offset); w[37] = hc_bytealign (w[ 8], w[ 9], offset); w[36] = hc_bytealign (w[ 7], w[ 8], offset); w[35] = hc_bytealign (w[ 6], w[ 7], offset); w[34] = hc_bytealign (w[ 5], w[ 6], offset); w[33] = hc_bytealign (w[ 4], w[ 5], offset); w[32] = hc_bytealign (w[ 3], w[ 4], offset); w[31] = hc_bytealign (w[ 2], w[ 3], offset); w[30] = hc_bytealign (w[ 1], w[ 2], offset); w[29] = hc_bytealign (w[ 0], w[ 1], offset); w[28] = hc_bytealign ( 0, w[ 0], offset); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_bytealign (w[33], w[34], offset); w[62] = hc_bytealign (w[32], w[33], offset); w[61] = hc_bytealign (w[31], w[32], offset); w[60] = hc_bytealign (w[30], w[31], offset); w[59] = hc_bytealign (w[29], w[30], offset); w[58] = hc_bytealign (w[28], w[29], offset); w[57] = hc_bytealign (w[27], w[28], offset); w[56] = hc_bytealign (w[26], w[27], offset); w[55] = hc_bytealign (w[25], w[26], offset); w[54] = hc_bytealign (w[24], w[25], offset); w[53] = hc_bytealign (w[23], w[24], offset); w[52] = hc_bytealign (w[22], w[23], offset); w[51] = hc_bytealign (w[21], w[22], offset); w[50] = hc_bytealign (w[20], w[21], offset); w[49] = hc_bytealign (w[19], w[20], offset); w[48] = hc_bytealign (w[18], w[19], offset); w[47] = hc_bytealign (w[17], w[18], offset); w[46] = hc_bytealign (w[16], w[17], offset); w[45] = hc_bytealign (w[15], w[16], offset); w[44] = hc_bytealign (w[14], w[15], offset); w[43] = hc_bytealign (w[13], w[14], offset); w[42] = hc_bytealign (w[12], w[13], offset); w[41] = hc_bytealign (w[11], w[12], offset); w[40] = hc_bytealign (w[10], w[11], offset); w[39] = hc_bytealign (w[ 9], w[10], offset); w[38] = hc_bytealign (w[ 8], w[ 9], offset); w[37] = hc_bytealign (w[ 7], w[ 8], offset); w[36] = hc_bytealign (w[ 6], w[ 7], offset); w[35] = hc_bytealign (w[ 5], w[ 6], offset); w[34] = hc_bytealign (w[ 4], w[ 5], offset); w[33] = hc_bytealign (w[ 3], w[ 4], offset); w[32] = hc_bytealign (w[ 2], w[ 3], offset); w[31] = hc_bytealign (w[ 1], w[ 2], offset); w[30] = hc_bytealign (w[ 0], w[ 1], offset); w[29] = hc_bytealign ( 0, w[ 0], offset); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_bytealign (w[32], w[33], offset); w[62] = hc_bytealign (w[31], w[32], offset); w[61] = hc_bytealign (w[30], w[31], offset); w[60] = hc_bytealign (w[29], w[30], offset); w[59] = hc_bytealign (w[28], w[29], offset); w[58] = hc_bytealign (w[27], w[28], offset); w[57] = hc_bytealign (w[26], w[27], offset); w[56] = hc_bytealign (w[25], w[26], offset); w[55] = hc_bytealign (w[24], w[25], offset); w[54] = hc_bytealign (w[23], w[24], offset); w[53] = hc_bytealign (w[22], w[23], offset); w[52] = hc_bytealign (w[21], w[22], offset); w[51] = hc_bytealign (w[20], w[21], offset); w[50] = hc_bytealign (w[19], w[20], offset); w[49] = hc_bytealign (w[18], w[19], offset); w[48] = hc_bytealign (w[17], w[18], offset); w[47] = hc_bytealign (w[16], w[17], offset); w[46] = hc_bytealign (w[15], w[16], offset); w[45] = hc_bytealign (w[14], w[15], offset); w[44] = hc_bytealign (w[13], w[14], offset); w[43] = hc_bytealign (w[12], w[13], offset); w[42] = hc_bytealign (w[11], w[12], offset); w[41] = hc_bytealign (w[10], w[11], offset); w[40] = hc_bytealign (w[ 9], w[10], offset); w[39] = hc_bytealign (w[ 8], w[ 9], offset); w[38] = hc_bytealign (w[ 7], w[ 8], offset); w[37] = hc_bytealign (w[ 6], w[ 7], offset); w[36] = hc_bytealign (w[ 5], w[ 6], offset); w[35] = hc_bytealign (w[ 4], w[ 5], offset); w[34] = hc_bytealign (w[ 3], w[ 4], offset); w[33] = hc_bytealign (w[ 2], w[ 3], offset); w[32] = hc_bytealign (w[ 1], w[ 2], offset); w[31] = hc_bytealign (w[ 0], w[ 1], offset); w[30] = hc_bytealign ( 0, w[ 0], offset); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_bytealign (w[31], w[32], offset); w[62] = hc_bytealign (w[30], w[31], offset); w[61] = hc_bytealign (w[29], w[30], offset); w[60] = hc_bytealign (w[28], w[29], offset); w[59] = hc_bytealign (w[27], w[28], offset); w[58] = hc_bytealign (w[26], w[27], offset); w[57] = hc_bytealign (w[25], w[26], offset); w[56] = hc_bytealign (w[24], w[25], offset); w[55] = hc_bytealign (w[23], w[24], offset); w[54] = hc_bytealign (w[22], w[23], offset); w[53] = hc_bytealign (w[21], w[22], offset); w[52] = hc_bytealign (w[20], w[21], offset); w[51] = hc_bytealign (w[19], w[20], offset); w[50] = hc_bytealign (w[18], w[19], offset); w[49] = hc_bytealign (w[17], w[18], offset); w[48] = hc_bytealign (w[16], w[17], offset); w[47] = hc_bytealign (w[15], w[16], offset); w[46] = hc_bytealign (w[14], w[15], offset); w[45] = hc_bytealign (w[13], w[14], offset); w[44] = hc_bytealign (w[12], w[13], offset); w[43] = hc_bytealign (w[11], w[12], offset); w[42] = hc_bytealign (w[10], w[11], offset); w[41] = hc_bytealign (w[ 9], w[10], offset); w[40] = hc_bytealign (w[ 8], w[ 9], offset); w[39] = hc_bytealign (w[ 7], w[ 8], offset); w[38] = hc_bytealign (w[ 6], w[ 7], offset); w[37] = hc_bytealign (w[ 5], w[ 6], offset); w[36] = hc_bytealign (w[ 4], w[ 5], offset); w[35] = hc_bytealign (w[ 3], w[ 4], offset); w[34] = hc_bytealign (w[ 2], w[ 3], offset); w[33] = hc_bytealign (w[ 1], w[ 2], offset); w[32] = hc_bytealign (w[ 0], w[ 1], offset); w[31] = hc_bytealign ( 0, w[ 0], offset); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_bytealign (w[30], w[31], offset); w[62] = hc_bytealign (w[29], w[30], offset); w[61] = hc_bytealign (w[28], w[29], offset); w[60] = hc_bytealign (w[27], w[28], offset); w[59] = hc_bytealign (w[26], w[27], offset); w[58] = hc_bytealign (w[25], w[26], offset); w[57] = hc_bytealign (w[24], w[25], offset); w[56] = hc_bytealign (w[23], w[24], offset); w[55] = hc_bytealign (w[22], w[23], offset); w[54] = hc_bytealign (w[21], w[22], offset); w[53] = hc_bytealign (w[20], w[21], offset); w[52] = hc_bytealign (w[19], w[20], offset); w[51] = hc_bytealign (w[18], w[19], offset); w[50] = hc_bytealign (w[17], w[18], offset); w[49] = hc_bytealign (w[16], w[17], offset); w[48] = hc_bytealign (w[15], w[16], offset); w[47] = hc_bytealign (w[14], w[15], offset); w[46] = hc_bytealign (w[13], w[14], offset); w[45] = hc_bytealign (w[12], w[13], offset); w[44] = hc_bytealign (w[11], w[12], offset); w[43] = hc_bytealign (w[10], w[11], offset); w[42] = hc_bytealign (w[ 9], w[10], offset); w[41] = hc_bytealign (w[ 8], w[ 9], offset); w[40] = hc_bytealign (w[ 7], w[ 8], offset); w[39] = hc_bytealign (w[ 6], w[ 7], offset); w[38] = hc_bytealign (w[ 5], w[ 6], offset); w[37] = hc_bytealign (w[ 4], w[ 5], offset); w[36] = hc_bytealign (w[ 3], w[ 4], offset); w[35] = hc_bytealign (w[ 2], w[ 3], offset); w[34] = hc_bytealign (w[ 1], w[ 2], offset); w[33] = hc_bytealign (w[ 0], w[ 1], offset); w[32] = hc_bytealign ( 0, w[ 0], offset); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_bytealign (w[29], w[30], offset); w[62] = hc_bytealign (w[28], w[29], offset); w[61] = hc_bytealign (w[27], w[28], offset); w[60] = hc_bytealign (w[26], w[27], offset); w[59] = hc_bytealign (w[25], w[26], offset); w[58] = hc_bytealign (w[24], w[25], offset); w[57] = hc_bytealign (w[23], w[24], offset); w[56] = hc_bytealign (w[22], w[23], offset); w[55] = hc_bytealign (w[21], w[22], offset); w[54] = hc_bytealign (w[20], w[21], offset); w[53] = hc_bytealign (w[19], w[20], offset); w[52] = hc_bytealign (w[18], w[19], offset); w[51] = hc_bytealign (w[17], w[18], offset); w[50] = hc_bytealign (w[16], w[17], offset); w[49] = hc_bytealign (w[15], w[16], offset); w[48] = hc_bytealign (w[14], w[15], offset); w[47] = hc_bytealign (w[13], w[14], offset); w[46] = hc_bytealign (w[12], w[13], offset); w[45] = hc_bytealign (w[11], w[12], offset); w[44] = hc_bytealign (w[10], w[11], offset); w[43] = hc_bytealign (w[ 9], w[10], offset); w[42] = hc_bytealign (w[ 8], w[ 9], offset); w[41] = hc_bytealign (w[ 7], w[ 8], offset); w[40] = hc_bytealign (w[ 6], w[ 7], offset); w[39] = hc_bytealign (w[ 5], w[ 6], offset); w[38] = hc_bytealign (w[ 4], w[ 5], offset); w[37] = hc_bytealign (w[ 3], w[ 4], offset); w[36] = hc_bytealign (w[ 2], w[ 3], offset); w[35] = hc_bytealign (w[ 1], w[ 2], offset); w[34] = hc_bytealign (w[ 0], w[ 1], offset); w[33] = hc_bytealign ( 0, w[ 0], offset); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_bytealign (w[28], w[29], offset); w[62] = hc_bytealign (w[27], w[28], offset); w[61] = hc_bytealign (w[26], w[27], offset); w[60] = hc_bytealign (w[25], w[26], offset); w[59] = hc_bytealign (w[24], w[25], offset); w[58] = hc_bytealign (w[23], w[24], offset); w[57] = hc_bytealign (w[22], w[23], offset); w[56] = hc_bytealign (w[21], w[22], offset); w[55] = hc_bytealign (w[20], w[21], offset); w[54] = hc_bytealign (w[19], w[20], offset); w[53] = hc_bytealign (w[18], w[19], offset); w[52] = hc_bytealign (w[17], w[18], offset); w[51] = hc_bytealign (w[16], w[17], offset); w[50] = hc_bytealign (w[15], w[16], offset); w[49] = hc_bytealign (w[14], w[15], offset); w[48] = hc_bytealign (w[13], w[14], offset); w[47] = hc_bytealign (w[12], w[13], offset); w[46] = hc_bytealign (w[11], w[12], offset); w[45] = hc_bytealign (w[10], w[11], offset); w[44] = hc_bytealign (w[ 9], w[10], offset); w[43] = hc_bytealign (w[ 8], w[ 9], offset); w[42] = hc_bytealign (w[ 7], w[ 8], offset); w[41] = hc_bytealign (w[ 6], w[ 7], offset); w[40] = hc_bytealign (w[ 5], w[ 6], offset); w[39] = hc_bytealign (w[ 4], w[ 5], offset); w[38] = hc_bytealign (w[ 3], w[ 4], offset); w[37] = hc_bytealign (w[ 2], w[ 3], offset); w[36] = hc_bytealign (w[ 1], w[ 2], offset); w[35] = hc_bytealign (w[ 0], w[ 1], offset); w[34] = hc_bytealign ( 0, w[ 0], offset); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_bytealign (w[27], w[28], offset); w[62] = hc_bytealign (w[26], w[27], offset); w[61] = hc_bytealign (w[25], w[26], offset); w[60] = hc_bytealign (w[24], w[25], offset); w[59] = hc_bytealign (w[23], w[24], offset); w[58] = hc_bytealign (w[22], w[23], offset); w[57] = hc_bytealign (w[21], w[22], offset); w[56] = hc_bytealign (w[20], w[21], offset); w[55] = hc_bytealign (w[19], w[20], offset); w[54] = hc_bytealign (w[18], w[19], offset); w[53] = hc_bytealign (w[17], w[18], offset); w[52] = hc_bytealign (w[16], w[17], offset); w[51] = hc_bytealign (w[15], w[16], offset); w[50] = hc_bytealign (w[14], w[15], offset); w[49] = hc_bytealign (w[13], w[14], offset); w[48] = hc_bytealign (w[12], w[13], offset); w[47] = hc_bytealign (w[11], w[12], offset); w[46] = hc_bytealign (w[10], w[11], offset); w[45] = hc_bytealign (w[ 9], w[10], offset); w[44] = hc_bytealign (w[ 8], w[ 9], offset); w[43] = hc_bytealign (w[ 7], w[ 8], offset); w[42] = hc_bytealign (w[ 6], w[ 7], offset); w[41] = hc_bytealign (w[ 5], w[ 6], offset); w[40] = hc_bytealign (w[ 4], w[ 5], offset); w[39] = hc_bytealign (w[ 3], w[ 4], offset); w[38] = hc_bytealign (w[ 2], w[ 3], offset); w[37] = hc_bytealign (w[ 1], w[ 2], offset); w[36] = hc_bytealign (w[ 0], w[ 1], offset); w[35] = hc_bytealign ( 0, w[ 0], offset); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_bytealign (w[26], w[27], offset); w[62] = hc_bytealign (w[25], w[26], offset); w[61] = hc_bytealign (w[24], w[25], offset); w[60] = hc_bytealign (w[23], w[24], offset); w[59] = hc_bytealign (w[22], w[23], offset); w[58] = hc_bytealign (w[21], w[22], offset); w[57] = hc_bytealign (w[20], w[21], offset); w[56] = hc_bytealign (w[19], w[20], offset); w[55] = hc_bytealign (w[18], w[19], offset); w[54] = hc_bytealign (w[17], w[18], offset); w[53] = hc_bytealign (w[16], w[17], offset); w[52] = hc_bytealign (w[15], w[16], offset); w[51] = hc_bytealign (w[14], w[15], offset); w[50] = hc_bytealign (w[13], w[14], offset); w[49] = hc_bytealign (w[12], w[13], offset); w[48] = hc_bytealign (w[11], w[12], offset); w[47] = hc_bytealign (w[10], w[11], offset); w[46] = hc_bytealign (w[ 9], w[10], offset); w[45] = hc_bytealign (w[ 8], w[ 9], offset); w[44] = hc_bytealign (w[ 7], w[ 8], offset); w[43] = hc_bytealign (w[ 6], w[ 7], offset); w[42] = hc_bytealign (w[ 5], w[ 6], offset); w[41] = hc_bytealign (w[ 4], w[ 5], offset); w[40] = hc_bytealign (w[ 3], w[ 4], offset); w[39] = hc_bytealign (w[ 2], w[ 3], offset); w[38] = hc_bytealign (w[ 1], w[ 2], offset); w[37] = hc_bytealign (w[ 0], w[ 1], offset); w[36] = hc_bytealign ( 0, w[ 0], offset); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_bytealign (w[25], w[26], offset); w[62] = hc_bytealign (w[24], w[25], offset); w[61] = hc_bytealign (w[23], w[24], offset); w[60] = hc_bytealign (w[22], w[23], offset); w[59] = hc_bytealign (w[21], w[22], offset); w[58] = hc_bytealign (w[20], w[21], offset); w[57] = hc_bytealign (w[19], w[20], offset); w[56] = hc_bytealign (w[18], w[19], offset); w[55] = hc_bytealign (w[17], w[18], offset); w[54] = hc_bytealign (w[16], w[17], offset); w[53] = hc_bytealign (w[15], w[16], offset); w[52] = hc_bytealign (w[14], w[15], offset); w[51] = hc_bytealign (w[13], w[14], offset); w[50] = hc_bytealign (w[12], w[13], offset); w[49] = hc_bytealign (w[11], w[12], offset); w[48] = hc_bytealign (w[10], w[11], offset); w[47] = hc_bytealign (w[ 9], w[10], offset); w[46] = hc_bytealign (w[ 8], w[ 9], offset); w[45] = hc_bytealign (w[ 7], w[ 8], offset); w[44] = hc_bytealign (w[ 6], w[ 7], offset); w[43] = hc_bytealign (w[ 5], w[ 6], offset); w[42] = hc_bytealign (w[ 4], w[ 5], offset); w[41] = hc_bytealign (w[ 3], w[ 4], offset); w[40] = hc_bytealign (w[ 2], w[ 3], offset); w[39] = hc_bytealign (w[ 1], w[ 2], offset); w[38] = hc_bytealign (w[ 0], w[ 1], offset); w[37] = hc_bytealign ( 0, w[ 0], offset); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_bytealign (w[24], w[25], offset); w[62] = hc_bytealign (w[23], w[24], offset); w[61] = hc_bytealign (w[22], w[23], offset); w[60] = hc_bytealign (w[21], w[22], offset); w[59] = hc_bytealign (w[20], w[21], offset); w[58] = hc_bytealign (w[19], w[20], offset); w[57] = hc_bytealign (w[18], w[19], offset); w[56] = hc_bytealign (w[17], w[18], offset); w[55] = hc_bytealign (w[16], w[17], offset); w[54] = hc_bytealign (w[15], w[16], offset); w[53] = hc_bytealign (w[14], w[15], offset); w[52] = hc_bytealign (w[13], w[14], offset); w[51] = hc_bytealign (w[12], w[13], offset); w[50] = hc_bytealign (w[11], w[12], offset); w[49] = hc_bytealign (w[10], w[11], offset); w[48] = hc_bytealign (w[ 9], w[10], offset); w[47] = hc_bytealign (w[ 8], w[ 9], offset); w[46] = hc_bytealign (w[ 7], w[ 8], offset); w[45] = hc_bytealign (w[ 6], w[ 7], offset); w[44] = hc_bytealign (w[ 5], w[ 6], offset); w[43] = hc_bytealign (w[ 4], w[ 5], offset); w[42] = hc_bytealign (w[ 3], w[ 4], offset); w[41] = hc_bytealign (w[ 2], w[ 3], offset); w[40] = hc_bytealign (w[ 1], w[ 2], offset); w[39] = hc_bytealign (w[ 0], w[ 1], offset); w[38] = hc_bytealign ( 0, w[ 0], offset); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_bytealign (w[23], w[24], offset); w[62] = hc_bytealign (w[22], w[23], offset); w[61] = hc_bytealign (w[21], w[22], offset); w[60] = hc_bytealign (w[20], w[21], offset); w[59] = hc_bytealign (w[19], w[20], offset); w[58] = hc_bytealign (w[18], w[19], offset); w[57] = hc_bytealign (w[17], w[18], offset); w[56] = hc_bytealign (w[16], w[17], offset); w[55] = hc_bytealign (w[15], w[16], offset); w[54] = hc_bytealign (w[14], w[15], offset); w[53] = hc_bytealign (w[13], w[14], offset); w[52] = hc_bytealign (w[12], w[13], offset); w[51] = hc_bytealign (w[11], w[12], offset); w[50] = hc_bytealign (w[10], w[11], offset); w[49] = hc_bytealign (w[ 9], w[10], offset); w[48] = hc_bytealign (w[ 8], w[ 9], offset); w[47] = hc_bytealign (w[ 7], w[ 8], offset); w[46] = hc_bytealign (w[ 6], w[ 7], offset); w[45] = hc_bytealign (w[ 5], w[ 6], offset); w[44] = hc_bytealign (w[ 4], w[ 5], offset); w[43] = hc_bytealign (w[ 3], w[ 4], offset); w[42] = hc_bytealign (w[ 2], w[ 3], offset); w[41] = hc_bytealign (w[ 1], w[ 2], offset); w[40] = hc_bytealign (w[ 0], w[ 1], offset); w[39] = hc_bytealign ( 0, w[ 0], offset); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_bytealign (w[22], w[23], offset); w[62] = hc_bytealign (w[21], w[22], offset); w[61] = hc_bytealign (w[20], w[21], offset); w[60] = hc_bytealign (w[19], w[20], offset); w[59] = hc_bytealign (w[18], w[19], offset); w[58] = hc_bytealign (w[17], w[18], offset); w[57] = hc_bytealign (w[16], w[17], offset); w[56] = hc_bytealign (w[15], w[16], offset); w[55] = hc_bytealign (w[14], w[15], offset); w[54] = hc_bytealign (w[13], w[14], offset); w[53] = hc_bytealign (w[12], w[13], offset); w[52] = hc_bytealign (w[11], w[12], offset); w[51] = hc_bytealign (w[10], w[11], offset); w[50] = hc_bytealign (w[ 9], w[10], offset); w[49] = hc_bytealign (w[ 8], w[ 9], offset); w[48] = hc_bytealign (w[ 7], w[ 8], offset); w[47] = hc_bytealign (w[ 6], w[ 7], offset); w[46] = hc_bytealign (w[ 5], w[ 6], offset); w[45] = hc_bytealign (w[ 4], w[ 5], offset); w[44] = hc_bytealign (w[ 3], w[ 4], offset); w[43] = hc_bytealign (w[ 2], w[ 3], offset); w[42] = hc_bytealign (w[ 1], w[ 2], offset); w[41] = hc_bytealign (w[ 0], w[ 1], offset); w[40] = hc_bytealign ( 0, w[ 0], offset); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_bytealign (w[21], w[22], offset); w[62] = hc_bytealign (w[20], w[21], offset); w[61] = hc_bytealign (w[19], w[20], offset); w[60] = hc_bytealign (w[18], w[19], offset); w[59] = hc_bytealign (w[17], w[18], offset); w[58] = hc_bytealign (w[16], w[17], offset); w[57] = hc_bytealign (w[15], w[16], offset); w[56] = hc_bytealign (w[14], w[15], offset); w[55] = hc_bytealign (w[13], w[14], offset); w[54] = hc_bytealign (w[12], w[13], offset); w[53] = hc_bytealign (w[11], w[12], offset); w[52] = hc_bytealign (w[10], w[11], offset); w[51] = hc_bytealign (w[ 9], w[10], offset); w[50] = hc_bytealign (w[ 8], w[ 9], offset); w[49] = hc_bytealign (w[ 7], w[ 8], offset); w[48] = hc_bytealign (w[ 6], w[ 7], offset); w[47] = hc_bytealign (w[ 5], w[ 6], offset); w[46] = hc_bytealign (w[ 4], w[ 5], offset); w[45] = hc_bytealign (w[ 3], w[ 4], offset); w[44] = hc_bytealign (w[ 2], w[ 3], offset); w[43] = hc_bytealign (w[ 1], w[ 2], offset); w[42] = hc_bytealign (w[ 0], w[ 1], offset); w[41] = hc_bytealign ( 0, w[ 0], offset); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_bytealign (w[20], w[21], offset); w[62] = hc_bytealign (w[19], w[20], offset); w[61] = hc_bytealign (w[18], w[19], offset); w[60] = hc_bytealign (w[17], w[18], offset); w[59] = hc_bytealign (w[16], w[17], offset); w[58] = hc_bytealign (w[15], w[16], offset); w[57] = hc_bytealign (w[14], w[15], offset); w[56] = hc_bytealign (w[13], w[14], offset); w[55] = hc_bytealign (w[12], w[13], offset); w[54] = hc_bytealign (w[11], w[12], offset); w[53] = hc_bytealign (w[10], w[11], offset); w[52] = hc_bytealign (w[ 9], w[10], offset); w[51] = hc_bytealign (w[ 8], w[ 9], offset); w[50] = hc_bytealign (w[ 7], w[ 8], offset); w[49] = hc_bytealign (w[ 6], w[ 7], offset); w[48] = hc_bytealign (w[ 5], w[ 6], offset); w[47] = hc_bytealign (w[ 4], w[ 5], offset); w[46] = hc_bytealign (w[ 3], w[ 4], offset); w[45] = hc_bytealign (w[ 2], w[ 3], offset); w[44] = hc_bytealign (w[ 1], w[ 2], offset); w[43] = hc_bytealign (w[ 0], w[ 1], offset); w[42] = hc_bytealign ( 0, w[ 0], offset); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_bytealign (w[19], w[20], offset); w[62] = hc_bytealign (w[18], w[19], offset); w[61] = hc_bytealign (w[17], w[18], offset); w[60] = hc_bytealign (w[16], w[17], offset); w[59] = hc_bytealign (w[15], w[16], offset); w[58] = hc_bytealign (w[14], w[15], offset); w[57] = hc_bytealign (w[13], w[14], offset); w[56] = hc_bytealign (w[12], w[13], offset); w[55] = hc_bytealign (w[11], w[12], offset); w[54] = hc_bytealign (w[10], w[11], offset); w[53] = hc_bytealign (w[ 9], w[10], offset); w[52] = hc_bytealign (w[ 8], w[ 9], offset); w[51] = hc_bytealign (w[ 7], w[ 8], offset); w[50] = hc_bytealign (w[ 6], w[ 7], offset); w[49] = hc_bytealign (w[ 5], w[ 6], offset); w[48] = hc_bytealign (w[ 4], w[ 5], offset); w[47] = hc_bytealign (w[ 3], w[ 4], offset); w[46] = hc_bytealign (w[ 2], w[ 3], offset); w[45] = hc_bytealign (w[ 1], w[ 2], offset); w[44] = hc_bytealign (w[ 0], w[ 1], offset); w[43] = hc_bytealign ( 0, w[ 0], offset); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_bytealign (w[18], w[19], offset); w[62] = hc_bytealign (w[17], w[18], offset); w[61] = hc_bytealign (w[16], w[17], offset); w[60] = hc_bytealign (w[15], w[16], offset); w[59] = hc_bytealign (w[14], w[15], offset); w[58] = hc_bytealign (w[13], w[14], offset); w[57] = hc_bytealign (w[12], w[13], offset); w[56] = hc_bytealign (w[11], w[12], offset); w[55] = hc_bytealign (w[10], w[11], offset); w[54] = hc_bytealign (w[ 9], w[10], offset); w[53] = hc_bytealign (w[ 8], w[ 9], offset); w[52] = hc_bytealign (w[ 7], w[ 8], offset); w[51] = hc_bytealign (w[ 6], w[ 7], offset); w[50] = hc_bytealign (w[ 5], w[ 6], offset); w[49] = hc_bytealign (w[ 4], w[ 5], offset); w[48] = hc_bytealign (w[ 3], w[ 4], offset); w[47] = hc_bytealign (w[ 2], w[ 3], offset); w[46] = hc_bytealign (w[ 1], w[ 2], offset); w[45] = hc_bytealign (w[ 0], w[ 1], offset); w[44] = hc_bytealign ( 0, w[ 0], offset); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_bytealign (w[17], w[18], offset); w[62] = hc_bytealign (w[16], w[17], offset); w[61] = hc_bytealign (w[15], w[16], offset); w[60] = hc_bytealign (w[14], w[15], offset); w[59] = hc_bytealign (w[13], w[14], offset); w[58] = hc_bytealign (w[12], w[13], offset); w[57] = hc_bytealign (w[11], w[12], offset); w[56] = hc_bytealign (w[10], w[11], offset); w[55] = hc_bytealign (w[ 9], w[10], offset); w[54] = hc_bytealign (w[ 8], w[ 9], offset); w[53] = hc_bytealign (w[ 7], w[ 8], offset); w[52] = hc_bytealign (w[ 6], w[ 7], offset); w[51] = hc_bytealign (w[ 5], w[ 6], offset); w[50] = hc_bytealign (w[ 4], w[ 5], offset); w[49] = hc_bytealign (w[ 3], w[ 4], offset); w[48] = hc_bytealign (w[ 2], w[ 3], offset); w[47] = hc_bytealign (w[ 1], w[ 2], offset); w[46] = hc_bytealign (w[ 0], w[ 1], offset); w[45] = hc_bytealign ( 0, w[ 0], offset); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_bytealign (w[16], w[17], offset); w[62] = hc_bytealign (w[15], w[16], offset); w[61] = hc_bytealign (w[14], w[15], offset); w[60] = hc_bytealign (w[13], w[14], offset); w[59] = hc_bytealign (w[12], w[13], offset); w[58] = hc_bytealign (w[11], w[12], offset); w[57] = hc_bytealign (w[10], w[11], offset); w[56] = hc_bytealign (w[ 9], w[10], offset); w[55] = hc_bytealign (w[ 8], w[ 9], offset); w[54] = hc_bytealign (w[ 7], w[ 8], offset); w[53] = hc_bytealign (w[ 6], w[ 7], offset); w[52] = hc_bytealign (w[ 5], w[ 6], offset); w[51] = hc_bytealign (w[ 4], w[ 5], offset); w[50] = hc_bytealign (w[ 3], w[ 4], offset); w[49] = hc_bytealign (w[ 2], w[ 3], offset); w[48] = hc_bytealign (w[ 1], w[ 2], offset); w[47] = hc_bytealign (w[ 0], w[ 1], offset); w[46] = hc_bytealign ( 0, w[ 0], offset); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_bytealign (w[15], w[16], offset); w[62] = hc_bytealign (w[14], w[15], offset); w[61] = hc_bytealign (w[13], w[14], offset); w[60] = hc_bytealign (w[12], w[13], offset); w[59] = hc_bytealign (w[11], w[12], offset); w[58] = hc_bytealign (w[10], w[11], offset); w[57] = hc_bytealign (w[ 9], w[10], offset); w[56] = hc_bytealign (w[ 8], w[ 9], offset); w[55] = hc_bytealign (w[ 7], w[ 8], offset); w[54] = hc_bytealign (w[ 6], w[ 7], offset); w[53] = hc_bytealign (w[ 5], w[ 6], offset); w[52] = hc_bytealign (w[ 4], w[ 5], offset); w[51] = hc_bytealign (w[ 3], w[ 4], offset); w[50] = hc_bytealign (w[ 2], w[ 3], offset); w[49] = hc_bytealign (w[ 1], w[ 2], offset); w[48] = hc_bytealign (w[ 0], w[ 1], offset); w[47] = hc_bytealign ( 0, w[ 0], offset); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_bytealign (w[14], w[15], offset); w[62] = hc_bytealign (w[13], w[14], offset); w[61] = hc_bytealign (w[12], w[13], offset); w[60] = hc_bytealign (w[11], w[12], offset); w[59] = hc_bytealign (w[10], w[11], offset); w[58] = hc_bytealign (w[ 9], w[10], offset); w[57] = hc_bytealign (w[ 8], w[ 9], offset); w[56] = hc_bytealign (w[ 7], w[ 8], offset); w[55] = hc_bytealign (w[ 6], w[ 7], offset); w[54] = hc_bytealign (w[ 5], w[ 6], offset); w[53] = hc_bytealign (w[ 4], w[ 5], offset); w[52] = hc_bytealign (w[ 3], w[ 4], offset); w[51] = hc_bytealign (w[ 2], w[ 3], offset); w[50] = hc_bytealign (w[ 1], w[ 2], offset); w[49] = hc_bytealign (w[ 0], w[ 1], offset); w[48] = hc_bytealign ( 0, w[ 0], offset); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_bytealign (w[13], w[14], offset); w[62] = hc_bytealign (w[12], w[13], offset); w[61] = hc_bytealign (w[11], w[12], offset); w[60] = hc_bytealign (w[10], w[11], offset); w[59] = hc_bytealign (w[ 9], w[10], offset); w[58] = hc_bytealign (w[ 8], w[ 9], offset); w[57] = hc_bytealign (w[ 7], w[ 8], offset); w[56] = hc_bytealign (w[ 6], w[ 7], offset); w[55] = hc_bytealign (w[ 5], w[ 6], offset); w[54] = hc_bytealign (w[ 4], w[ 5], offset); w[53] = hc_bytealign (w[ 3], w[ 4], offset); w[52] = hc_bytealign (w[ 2], w[ 3], offset); w[51] = hc_bytealign (w[ 1], w[ 2], offset); w[50] = hc_bytealign (w[ 0], w[ 1], offset); w[49] = hc_bytealign ( 0, w[ 0], offset); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_bytealign (w[12], w[13], offset); w[62] = hc_bytealign (w[11], w[12], offset); w[61] = hc_bytealign (w[10], w[11], offset); w[60] = hc_bytealign (w[ 9], w[10], offset); w[59] = hc_bytealign (w[ 8], w[ 9], offset); w[58] = hc_bytealign (w[ 7], w[ 8], offset); w[57] = hc_bytealign (w[ 6], w[ 7], offset); w[56] = hc_bytealign (w[ 5], w[ 6], offset); w[55] = hc_bytealign (w[ 4], w[ 5], offset); w[54] = hc_bytealign (w[ 3], w[ 4], offset); w[53] = hc_bytealign (w[ 2], w[ 3], offset); w[52] = hc_bytealign (w[ 1], w[ 2], offset); w[51] = hc_bytealign (w[ 0], w[ 1], offset); w[50] = hc_bytealign ( 0, w[ 0], offset); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_bytealign (w[11], w[12], offset); w[62] = hc_bytealign (w[10], w[11], offset); w[61] = hc_bytealign (w[ 9], w[10], offset); w[60] = hc_bytealign (w[ 8], w[ 9], offset); w[59] = hc_bytealign (w[ 7], w[ 8], offset); w[58] = hc_bytealign (w[ 6], w[ 7], offset); w[57] = hc_bytealign (w[ 5], w[ 6], offset); w[56] = hc_bytealign (w[ 4], w[ 5], offset); w[55] = hc_bytealign (w[ 3], w[ 4], offset); w[54] = hc_bytealign (w[ 2], w[ 3], offset); w[53] = hc_bytealign (w[ 1], w[ 2], offset); w[52] = hc_bytealign (w[ 0], w[ 1], offset); w[51] = hc_bytealign ( 0, w[ 0], offset); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_bytealign (w[10], w[11], offset); w[62] = hc_bytealign (w[ 9], w[10], offset); w[61] = hc_bytealign (w[ 8], w[ 9], offset); w[60] = hc_bytealign (w[ 7], w[ 8], offset); w[59] = hc_bytealign (w[ 6], w[ 7], offset); w[58] = hc_bytealign (w[ 5], w[ 6], offset); w[57] = hc_bytealign (w[ 4], w[ 5], offset); w[56] = hc_bytealign (w[ 3], w[ 4], offset); w[55] = hc_bytealign (w[ 2], w[ 3], offset); w[54] = hc_bytealign (w[ 1], w[ 2], offset); w[53] = hc_bytealign (w[ 0], w[ 1], offset); w[52] = hc_bytealign ( 0, w[ 0], offset); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_bytealign (w[ 9], w[10], offset); w[62] = hc_bytealign (w[ 8], w[ 9], offset); w[61] = hc_bytealign (w[ 7], w[ 8], offset); w[60] = hc_bytealign (w[ 6], w[ 7], offset); w[59] = hc_bytealign (w[ 5], w[ 6], offset); w[58] = hc_bytealign (w[ 4], w[ 5], offset); w[57] = hc_bytealign (w[ 3], w[ 4], offset); w[56] = hc_bytealign (w[ 2], w[ 3], offset); w[55] = hc_bytealign (w[ 1], w[ 2], offset); w[54] = hc_bytealign (w[ 0], w[ 1], offset); w[53] = hc_bytealign ( 0, w[ 0], offset); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_bytealign (w[ 8], w[ 9], offset); w[62] = hc_bytealign (w[ 7], w[ 8], offset); w[61] = hc_bytealign (w[ 6], w[ 7], offset); w[60] = hc_bytealign (w[ 5], w[ 6], offset); w[59] = hc_bytealign (w[ 4], w[ 5], offset); w[58] = hc_bytealign (w[ 3], w[ 4], offset); w[57] = hc_bytealign (w[ 2], w[ 3], offset); w[56] = hc_bytealign (w[ 1], w[ 2], offset); w[55] = hc_bytealign (w[ 0], w[ 1], offset); w[54] = hc_bytealign ( 0, w[ 0], offset); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_bytealign (w[ 7], w[ 8], offset); w[62] = hc_bytealign (w[ 6], w[ 7], offset); w[61] = hc_bytealign (w[ 5], w[ 6], offset); w[60] = hc_bytealign (w[ 4], w[ 5], offset); w[59] = hc_bytealign (w[ 3], w[ 4], offset); w[58] = hc_bytealign (w[ 2], w[ 3], offset); w[57] = hc_bytealign (w[ 1], w[ 2], offset); w[56] = hc_bytealign (w[ 0], w[ 1], offset); w[55] = hc_bytealign ( 0, w[ 0], offset); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_bytealign (w[ 6], w[ 7], offset); w[62] = hc_bytealign (w[ 5], w[ 6], offset); w[61] = hc_bytealign (w[ 4], w[ 5], offset); w[60] = hc_bytealign (w[ 3], w[ 4], offset); w[59] = hc_bytealign (w[ 2], w[ 3], offset); w[58] = hc_bytealign (w[ 1], w[ 2], offset); w[57] = hc_bytealign (w[ 0], w[ 1], offset); w[56] = hc_bytealign ( 0, w[ 0], offset); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_bytealign (w[ 5], w[ 6], offset); w[62] = hc_bytealign (w[ 4], w[ 5], offset); w[61] = hc_bytealign (w[ 3], w[ 4], offset); w[60] = hc_bytealign (w[ 2], w[ 3], offset); w[59] = hc_bytealign (w[ 1], w[ 2], offset); w[58] = hc_bytealign (w[ 0], w[ 1], offset); w[57] = hc_bytealign ( 0, w[ 0], offset); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_bytealign (w[ 4], w[ 5], offset); w[62] = hc_bytealign (w[ 3], w[ 4], offset); w[61] = hc_bytealign (w[ 2], w[ 3], offset); w[60] = hc_bytealign (w[ 1], w[ 2], offset); w[59] = hc_bytealign (w[ 0], w[ 1], offset); w[58] = hc_bytealign ( 0, w[ 0], offset); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_bytealign (w[ 3], w[ 4], offset); w[62] = hc_bytealign (w[ 2], w[ 3], offset); w[61] = hc_bytealign (w[ 1], w[ 2], offset); w[60] = hc_bytealign (w[ 0], w[ 1], offset); w[59] = hc_bytealign ( 0, w[ 0], offset); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_bytealign (w[ 2], w[ 3], offset); w[62] = hc_bytealign (w[ 1], w[ 2], offset); w[61] = hc_bytealign (w[ 0], w[ 1], offset); w[60] = hc_bytealign ( 0, w[ 0], offset); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_bytealign (w[ 1], w[ 2], offset); w[62] = hc_bytealign (w[ 0], w[ 1], offset); w[61] = hc_bytealign ( 0, w[ 0], offset); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_bytealign (w[ 0], w[ 1], offset); w[62] = hc_bytealign ( 0, w[ 0], offset); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_bytealign ( 0, w[ 0], offset); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w[63] = hc_byte_perm (w[62], w[63], selector); w[62] = hc_byte_perm (w[61], w[62], selector); w[61] = hc_byte_perm (w[60], w[61], selector); w[60] = hc_byte_perm (w[59], w[60], selector); w[59] = hc_byte_perm (w[58], w[59], selector); w[58] = hc_byte_perm (w[57], w[58], selector); w[57] = hc_byte_perm (w[56], w[57], selector); w[56] = hc_byte_perm (w[55], w[56], selector); w[55] = hc_byte_perm (w[54], w[55], selector); w[54] = hc_byte_perm (w[53], w[54], selector); w[53] = hc_byte_perm (w[52], w[53], selector); w[52] = hc_byte_perm (w[51], w[52], selector); w[51] = hc_byte_perm (w[50], w[51], selector); w[50] = hc_byte_perm (w[49], w[50], selector); w[49] = hc_byte_perm (w[48], w[49], selector); w[48] = hc_byte_perm (w[47], w[48], selector); w[47] = hc_byte_perm (w[46], w[47], selector); w[46] = hc_byte_perm (w[45], w[46], selector); w[45] = hc_byte_perm (w[44], w[45], selector); w[44] = hc_byte_perm (w[43], w[44], selector); w[43] = hc_byte_perm (w[42], w[43], selector); w[42] = hc_byte_perm (w[41], w[42], selector); w[41] = hc_byte_perm (w[40], w[41], selector); w[40] = hc_byte_perm (w[39], w[40], selector); w[39] = hc_byte_perm (w[38], w[39], selector); w[38] = hc_byte_perm (w[37], w[38], selector); w[37] = hc_byte_perm (w[36], w[37], selector); w[36] = hc_byte_perm (w[35], w[36], selector); w[35] = hc_byte_perm (w[34], w[35], selector); w[34] = hc_byte_perm (w[33], w[34], selector); w[33] = hc_byte_perm (w[32], w[33], selector); w[32] = hc_byte_perm (w[31], w[32], selector); w[31] = hc_byte_perm (w[30], w[31], selector); w[30] = hc_byte_perm (w[29], w[30], selector); w[29] = hc_byte_perm (w[28], w[29], selector); w[28] = hc_byte_perm (w[27], w[28], selector); w[27] = hc_byte_perm (w[26], w[27], selector); w[26] = hc_byte_perm (w[25], w[26], selector); w[25] = hc_byte_perm (w[24], w[25], selector); w[24] = hc_byte_perm (w[23], w[24], selector); w[23] = hc_byte_perm (w[22], w[23], selector); w[22] = hc_byte_perm (w[21], w[22], selector); w[21] = hc_byte_perm (w[20], w[21], selector); w[20] = hc_byte_perm (w[19], w[20], selector); w[19] = hc_byte_perm (w[18], w[19], selector); w[18] = hc_byte_perm (w[17], w[18], selector); w[17] = hc_byte_perm (w[16], w[17], selector); w[16] = hc_byte_perm (w[15], w[16], selector); w[15] = hc_byte_perm (w[14], w[15], selector); w[14] = hc_byte_perm (w[13], w[14], selector); w[13] = hc_byte_perm (w[12], w[13], selector); w[12] = hc_byte_perm (w[11], w[12], selector); w[11] = hc_byte_perm (w[10], w[11], selector); w[10] = hc_byte_perm (w[ 9], w[10], selector); w[ 9] = hc_byte_perm (w[ 8], w[ 9], selector); w[ 8] = hc_byte_perm (w[ 7], w[ 8], selector); w[ 7] = hc_byte_perm (w[ 6], w[ 7], selector); w[ 6] = hc_byte_perm (w[ 5], w[ 6], selector); w[ 5] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 4] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 3] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 2] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 1] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 0] = hc_byte_perm ( 0, w[ 0], selector); break; case 1: w[63] = hc_byte_perm (w[61], w[62], selector); w[62] = hc_byte_perm (w[60], w[61], selector); w[61] = hc_byte_perm (w[59], w[60], selector); w[60] = hc_byte_perm (w[58], w[59], selector); w[59] = hc_byte_perm (w[57], w[58], selector); w[58] = hc_byte_perm (w[56], w[57], selector); w[57] = hc_byte_perm (w[55], w[56], selector); w[56] = hc_byte_perm (w[54], w[55], selector); w[55] = hc_byte_perm (w[53], w[54], selector); w[54] = hc_byte_perm (w[52], w[53], selector); w[53] = hc_byte_perm (w[51], w[52], selector); w[52] = hc_byte_perm (w[50], w[51], selector); w[51] = hc_byte_perm (w[49], w[50], selector); w[50] = hc_byte_perm (w[48], w[49], selector); w[49] = hc_byte_perm (w[47], w[48], selector); w[48] = hc_byte_perm (w[46], w[47], selector); w[47] = hc_byte_perm (w[45], w[46], selector); w[46] = hc_byte_perm (w[44], w[45], selector); w[45] = hc_byte_perm (w[43], w[44], selector); w[44] = hc_byte_perm (w[42], w[43], selector); w[43] = hc_byte_perm (w[41], w[42], selector); w[42] = hc_byte_perm (w[40], w[41], selector); w[41] = hc_byte_perm (w[39], w[40], selector); w[40] = hc_byte_perm (w[38], w[39], selector); w[39] = hc_byte_perm (w[37], w[38], selector); w[38] = hc_byte_perm (w[36], w[37], selector); w[37] = hc_byte_perm (w[35], w[36], selector); w[36] = hc_byte_perm (w[34], w[35], selector); w[35] = hc_byte_perm (w[33], w[34], selector); w[34] = hc_byte_perm (w[32], w[33], selector); w[33] = hc_byte_perm (w[31], w[32], selector); w[32] = hc_byte_perm (w[30], w[31], selector); w[31] = hc_byte_perm (w[29], w[30], selector); w[30] = hc_byte_perm (w[28], w[29], selector); w[29] = hc_byte_perm (w[27], w[28], selector); w[28] = hc_byte_perm (w[26], w[27], selector); w[27] = hc_byte_perm (w[25], w[26], selector); w[26] = hc_byte_perm (w[24], w[25], selector); w[25] = hc_byte_perm (w[23], w[24], selector); w[24] = hc_byte_perm (w[22], w[23], selector); w[23] = hc_byte_perm (w[21], w[22], selector); w[22] = hc_byte_perm (w[20], w[21], selector); w[21] = hc_byte_perm (w[19], w[20], selector); w[20] = hc_byte_perm (w[18], w[19], selector); w[19] = hc_byte_perm (w[17], w[18], selector); w[18] = hc_byte_perm (w[16], w[17], selector); w[17] = hc_byte_perm (w[15], w[16], selector); w[16] = hc_byte_perm (w[14], w[15], selector); w[15] = hc_byte_perm (w[13], w[14], selector); w[14] = hc_byte_perm (w[12], w[13], selector); w[13] = hc_byte_perm (w[11], w[12], selector); w[12] = hc_byte_perm (w[10], w[11], selector); w[11] = hc_byte_perm (w[ 9], w[10], selector); w[10] = hc_byte_perm (w[ 8], w[ 9], selector); w[ 9] = hc_byte_perm (w[ 7], w[ 8], selector); w[ 8] = hc_byte_perm (w[ 6], w[ 7], selector); w[ 7] = hc_byte_perm (w[ 5], w[ 6], selector); w[ 6] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 5] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 4] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 3] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 2] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 1] = hc_byte_perm ( 0, w[ 0], selector); w[ 0] = 0; break; case 2: w[63] = hc_byte_perm (w[60], w[61], selector); w[62] = hc_byte_perm (w[59], w[60], selector); w[61] = hc_byte_perm (w[58], w[59], selector); w[60] = hc_byte_perm (w[57], w[58], selector); w[59] = hc_byte_perm (w[56], w[57], selector); w[58] = hc_byte_perm (w[55], w[56], selector); w[57] = hc_byte_perm (w[54], w[55], selector); w[56] = hc_byte_perm (w[53], w[54], selector); w[55] = hc_byte_perm (w[52], w[53], selector); w[54] = hc_byte_perm (w[51], w[52], selector); w[53] = hc_byte_perm (w[50], w[51], selector); w[52] = hc_byte_perm (w[49], w[50], selector); w[51] = hc_byte_perm (w[48], w[49], selector); w[50] = hc_byte_perm (w[47], w[48], selector); w[49] = hc_byte_perm (w[46], w[47], selector); w[48] = hc_byte_perm (w[45], w[46], selector); w[47] = hc_byte_perm (w[44], w[45], selector); w[46] = hc_byte_perm (w[43], w[44], selector); w[45] = hc_byte_perm (w[42], w[43], selector); w[44] = hc_byte_perm (w[41], w[42], selector); w[43] = hc_byte_perm (w[40], w[41], selector); w[42] = hc_byte_perm (w[39], w[40], selector); w[41] = hc_byte_perm (w[38], w[39], selector); w[40] = hc_byte_perm (w[37], w[38], selector); w[39] = hc_byte_perm (w[36], w[37], selector); w[38] = hc_byte_perm (w[35], w[36], selector); w[37] = hc_byte_perm (w[34], w[35], selector); w[36] = hc_byte_perm (w[33], w[34], selector); w[35] = hc_byte_perm (w[32], w[33], selector); w[34] = hc_byte_perm (w[31], w[32], selector); w[33] = hc_byte_perm (w[30], w[31], selector); w[32] = hc_byte_perm (w[29], w[30], selector); w[31] = hc_byte_perm (w[28], w[29], selector); w[30] = hc_byte_perm (w[27], w[28], selector); w[29] = hc_byte_perm (w[26], w[27], selector); w[28] = hc_byte_perm (w[25], w[26], selector); w[27] = hc_byte_perm (w[24], w[25], selector); w[26] = hc_byte_perm (w[23], w[24], selector); w[25] = hc_byte_perm (w[22], w[23], selector); w[24] = hc_byte_perm (w[21], w[22], selector); w[23] = hc_byte_perm (w[20], w[21], selector); w[22] = hc_byte_perm (w[19], w[20], selector); w[21] = hc_byte_perm (w[18], w[19], selector); w[20] = hc_byte_perm (w[17], w[18], selector); w[19] = hc_byte_perm (w[16], w[17], selector); w[18] = hc_byte_perm (w[15], w[16], selector); w[17] = hc_byte_perm (w[14], w[15], selector); w[16] = hc_byte_perm (w[13], w[14], selector); w[15] = hc_byte_perm (w[12], w[13], selector); w[14] = hc_byte_perm (w[11], w[12], selector); w[13] = hc_byte_perm (w[10], w[11], selector); w[12] = hc_byte_perm (w[ 9], w[10], selector); w[11] = hc_byte_perm (w[ 8], w[ 9], selector); w[10] = hc_byte_perm (w[ 7], w[ 8], selector); w[ 9] = hc_byte_perm (w[ 6], w[ 7], selector); w[ 8] = hc_byte_perm (w[ 5], w[ 6], selector); w[ 7] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 6] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 5] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 4] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 3] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 2] = hc_byte_perm ( 0, w[ 0], selector); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_byte_perm (w[59], w[60], selector); w[62] = hc_byte_perm (w[58], w[59], selector); w[61] = hc_byte_perm (w[57], w[58], selector); w[60] = hc_byte_perm (w[56], w[57], selector); w[59] = hc_byte_perm (w[55], w[56], selector); w[58] = hc_byte_perm (w[54], w[55], selector); w[57] = hc_byte_perm (w[53], w[54], selector); w[56] = hc_byte_perm (w[52], w[53], selector); w[55] = hc_byte_perm (w[51], w[52], selector); w[54] = hc_byte_perm (w[50], w[51], selector); w[53] = hc_byte_perm (w[49], w[50], selector); w[52] = hc_byte_perm (w[48], w[49], selector); w[51] = hc_byte_perm (w[47], w[48], selector); w[50] = hc_byte_perm (w[46], w[47], selector); w[49] = hc_byte_perm (w[45], w[46], selector); w[48] = hc_byte_perm (w[44], w[45], selector); w[47] = hc_byte_perm (w[43], w[44], selector); w[46] = hc_byte_perm (w[42], w[43], selector); w[45] = hc_byte_perm (w[41], w[42], selector); w[44] = hc_byte_perm (w[40], w[41], selector); w[43] = hc_byte_perm (w[39], w[40], selector); w[42] = hc_byte_perm (w[38], w[39], selector); w[41] = hc_byte_perm (w[37], w[38], selector); w[40] = hc_byte_perm (w[36], w[37], selector); w[39] = hc_byte_perm (w[35], w[36], selector); w[38] = hc_byte_perm (w[34], w[35], selector); w[37] = hc_byte_perm (w[33], w[34], selector); w[36] = hc_byte_perm (w[32], w[33], selector); w[35] = hc_byte_perm (w[31], w[32], selector); w[34] = hc_byte_perm (w[30], w[31], selector); w[33] = hc_byte_perm (w[29], w[30], selector); w[32] = hc_byte_perm (w[28], w[29], selector); w[31] = hc_byte_perm (w[27], w[28], selector); w[30] = hc_byte_perm (w[26], w[27], selector); w[29] = hc_byte_perm (w[25], w[26], selector); w[28] = hc_byte_perm (w[24], w[25], selector); w[27] = hc_byte_perm (w[23], w[24], selector); w[26] = hc_byte_perm (w[22], w[23], selector); w[25] = hc_byte_perm (w[21], w[22], selector); w[24] = hc_byte_perm (w[20], w[21], selector); w[23] = hc_byte_perm (w[19], w[20], selector); w[22] = hc_byte_perm (w[18], w[19], selector); w[21] = hc_byte_perm (w[17], w[18], selector); w[20] = hc_byte_perm (w[16], w[17], selector); w[19] = hc_byte_perm (w[15], w[16], selector); w[18] = hc_byte_perm (w[14], w[15], selector); w[17] = hc_byte_perm (w[13], w[14], selector); w[16] = hc_byte_perm (w[12], w[13], selector); w[15] = hc_byte_perm (w[11], w[12], selector); w[14] = hc_byte_perm (w[10], w[11], selector); w[13] = hc_byte_perm (w[ 9], w[10], selector); w[12] = hc_byte_perm (w[ 8], w[ 9], selector); w[11] = hc_byte_perm (w[ 7], w[ 8], selector); w[10] = hc_byte_perm (w[ 6], w[ 7], selector); w[ 9] = hc_byte_perm (w[ 5], w[ 6], selector); w[ 8] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 7] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 6] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 5] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 4] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 3] = hc_byte_perm ( 0, w[ 0], selector); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_byte_perm (w[58], w[59], selector); w[62] = hc_byte_perm (w[57], w[58], selector); w[61] = hc_byte_perm (w[56], w[57], selector); w[60] = hc_byte_perm (w[55], w[56], selector); w[59] = hc_byte_perm (w[54], w[55], selector); w[58] = hc_byte_perm (w[53], w[54], selector); w[57] = hc_byte_perm (w[52], w[53], selector); w[56] = hc_byte_perm (w[51], w[52], selector); w[55] = hc_byte_perm (w[50], w[51], selector); w[54] = hc_byte_perm (w[49], w[50], selector); w[53] = hc_byte_perm (w[48], w[49], selector); w[52] = hc_byte_perm (w[47], w[48], selector); w[51] = hc_byte_perm (w[46], w[47], selector); w[50] = hc_byte_perm (w[45], w[46], selector); w[49] = hc_byte_perm (w[44], w[45], selector); w[48] = hc_byte_perm (w[43], w[44], selector); w[47] = hc_byte_perm (w[42], w[43], selector); w[46] = hc_byte_perm (w[41], w[42], selector); w[45] = hc_byte_perm (w[40], w[41], selector); w[44] = hc_byte_perm (w[39], w[40], selector); w[43] = hc_byte_perm (w[38], w[39], selector); w[42] = hc_byte_perm (w[37], w[38], selector); w[41] = hc_byte_perm (w[36], w[37], selector); w[40] = hc_byte_perm (w[35], w[36], selector); w[39] = hc_byte_perm (w[34], w[35], selector); w[38] = hc_byte_perm (w[33], w[34], selector); w[37] = hc_byte_perm (w[32], w[33], selector); w[36] = hc_byte_perm (w[31], w[32], selector); w[35] = hc_byte_perm (w[30], w[31], selector); w[34] = hc_byte_perm (w[29], w[30], selector); w[33] = hc_byte_perm (w[28], w[29], selector); w[32] = hc_byte_perm (w[27], w[28], selector); w[31] = hc_byte_perm (w[26], w[27], selector); w[30] = hc_byte_perm (w[25], w[26], selector); w[29] = hc_byte_perm (w[24], w[25], selector); w[28] = hc_byte_perm (w[23], w[24], selector); w[27] = hc_byte_perm (w[22], w[23], selector); w[26] = hc_byte_perm (w[21], w[22], selector); w[25] = hc_byte_perm (w[20], w[21], selector); w[24] = hc_byte_perm (w[19], w[20], selector); w[23] = hc_byte_perm (w[18], w[19], selector); w[22] = hc_byte_perm (w[17], w[18], selector); w[21] = hc_byte_perm (w[16], w[17], selector); w[20] = hc_byte_perm (w[15], w[16], selector); w[19] = hc_byte_perm (w[14], w[15], selector); w[18] = hc_byte_perm (w[13], w[14], selector); w[17] = hc_byte_perm (w[12], w[13], selector); w[16] = hc_byte_perm (w[11], w[12], selector); w[15] = hc_byte_perm (w[10], w[11], selector); w[14] = hc_byte_perm (w[ 9], w[10], selector); w[13] = hc_byte_perm (w[ 8], w[ 9], selector); w[12] = hc_byte_perm (w[ 7], w[ 8], selector); w[11] = hc_byte_perm (w[ 6], w[ 7], selector); w[10] = hc_byte_perm (w[ 5], w[ 6], selector); w[ 9] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 8] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 7] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 6] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 5] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 4] = hc_byte_perm ( 0, w[ 0], selector); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_byte_perm (w[57], w[58], selector); w[62] = hc_byte_perm (w[56], w[57], selector); w[61] = hc_byte_perm (w[55], w[56], selector); w[60] = hc_byte_perm (w[54], w[55], selector); w[59] = hc_byte_perm (w[53], w[54], selector); w[58] = hc_byte_perm (w[52], w[53], selector); w[57] = hc_byte_perm (w[51], w[52], selector); w[56] = hc_byte_perm (w[50], w[51], selector); w[55] = hc_byte_perm (w[49], w[50], selector); w[54] = hc_byte_perm (w[48], w[49], selector); w[53] = hc_byte_perm (w[47], w[48], selector); w[52] = hc_byte_perm (w[46], w[47], selector); w[51] = hc_byte_perm (w[45], w[46], selector); w[50] = hc_byte_perm (w[44], w[45], selector); w[49] = hc_byte_perm (w[43], w[44], selector); w[48] = hc_byte_perm (w[42], w[43], selector); w[47] = hc_byte_perm (w[41], w[42], selector); w[46] = hc_byte_perm (w[40], w[41], selector); w[45] = hc_byte_perm (w[39], w[40], selector); w[44] = hc_byte_perm (w[38], w[39], selector); w[43] = hc_byte_perm (w[37], w[38], selector); w[42] = hc_byte_perm (w[36], w[37], selector); w[41] = hc_byte_perm (w[35], w[36], selector); w[40] = hc_byte_perm (w[34], w[35], selector); w[39] = hc_byte_perm (w[33], w[34], selector); w[38] = hc_byte_perm (w[32], w[33], selector); w[37] = hc_byte_perm (w[31], w[32], selector); w[36] = hc_byte_perm (w[30], w[31], selector); w[35] = hc_byte_perm (w[29], w[30], selector); w[34] = hc_byte_perm (w[28], w[29], selector); w[33] = hc_byte_perm (w[27], w[28], selector); w[32] = hc_byte_perm (w[26], w[27], selector); w[31] = hc_byte_perm (w[25], w[26], selector); w[30] = hc_byte_perm (w[24], w[25], selector); w[29] = hc_byte_perm (w[23], w[24], selector); w[28] = hc_byte_perm (w[22], w[23], selector); w[27] = hc_byte_perm (w[21], w[22], selector); w[26] = hc_byte_perm (w[20], w[21], selector); w[25] = hc_byte_perm (w[19], w[20], selector); w[24] = hc_byte_perm (w[18], w[19], selector); w[23] = hc_byte_perm (w[17], w[18], selector); w[22] = hc_byte_perm (w[16], w[17], selector); w[21] = hc_byte_perm (w[15], w[16], selector); w[20] = hc_byte_perm (w[14], w[15], selector); w[19] = hc_byte_perm (w[13], w[14], selector); w[18] = hc_byte_perm (w[12], w[13], selector); w[17] = hc_byte_perm (w[11], w[12], selector); w[16] = hc_byte_perm (w[10], w[11], selector); w[15] = hc_byte_perm (w[ 9], w[10], selector); w[14] = hc_byte_perm (w[ 8], w[ 9], selector); w[13] = hc_byte_perm (w[ 7], w[ 8], selector); w[12] = hc_byte_perm (w[ 6], w[ 7], selector); w[11] = hc_byte_perm (w[ 5], w[ 6], selector); w[10] = hc_byte_perm (w[ 4], w[ 5], selector); w[ 9] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 8] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 7] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 6] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 5] = hc_byte_perm ( 0, w[ 0], selector); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_byte_perm (w[56], w[57], selector); w[62] = hc_byte_perm (w[55], w[56], selector); w[61] = hc_byte_perm (w[54], w[55], selector); w[60] = hc_byte_perm (w[53], w[54], selector); w[59] = hc_byte_perm (w[52], w[53], selector); w[58] = hc_byte_perm (w[51], w[52], selector); w[57] = hc_byte_perm (w[50], w[51], selector); w[56] = hc_byte_perm (w[49], w[50], selector); w[55] = hc_byte_perm (w[48], w[49], selector); w[54] = hc_byte_perm (w[47], w[48], selector); w[53] = hc_byte_perm (w[46], w[47], selector); w[52] = hc_byte_perm (w[45], w[46], selector); w[51] = hc_byte_perm (w[44], w[45], selector); w[50] = hc_byte_perm (w[43], w[44], selector); w[49] = hc_byte_perm (w[42], w[43], selector); w[48] = hc_byte_perm (w[41], w[42], selector); w[47] = hc_byte_perm (w[40], w[41], selector); w[46] = hc_byte_perm (w[39], w[40], selector); w[45] = hc_byte_perm (w[38], w[39], selector); w[44] = hc_byte_perm (w[37], w[38], selector); w[43] = hc_byte_perm (w[36], w[37], selector); w[42] = hc_byte_perm (w[35], w[36], selector); w[41] = hc_byte_perm (w[34], w[35], selector); w[40] = hc_byte_perm (w[33], w[34], selector); w[39] = hc_byte_perm (w[32], w[33], selector); w[38] = hc_byte_perm (w[31], w[32], selector); w[37] = hc_byte_perm (w[30], w[31], selector); w[36] = hc_byte_perm (w[29], w[30], selector); w[35] = hc_byte_perm (w[28], w[29], selector); w[34] = hc_byte_perm (w[27], w[28], selector); w[33] = hc_byte_perm (w[26], w[27], selector); w[32] = hc_byte_perm (w[25], w[26], selector); w[31] = hc_byte_perm (w[24], w[25], selector); w[30] = hc_byte_perm (w[23], w[24], selector); w[29] = hc_byte_perm (w[22], w[23], selector); w[28] = hc_byte_perm (w[21], w[22], selector); w[27] = hc_byte_perm (w[20], w[21], selector); w[26] = hc_byte_perm (w[19], w[20], selector); w[25] = hc_byte_perm (w[18], w[19], selector); w[24] = hc_byte_perm (w[17], w[18], selector); w[23] = hc_byte_perm (w[16], w[17], selector); w[22] = hc_byte_perm (w[15], w[16], selector); w[21] = hc_byte_perm (w[14], w[15], selector); w[20] = hc_byte_perm (w[13], w[14], selector); w[19] = hc_byte_perm (w[12], w[13], selector); w[18] = hc_byte_perm (w[11], w[12], selector); w[17] = hc_byte_perm (w[10], w[11], selector); w[16] = hc_byte_perm (w[ 9], w[10], selector); w[15] = hc_byte_perm (w[ 8], w[ 9], selector); w[14] = hc_byte_perm (w[ 7], w[ 8], selector); w[13] = hc_byte_perm (w[ 6], w[ 7], selector); w[12] = hc_byte_perm (w[ 5], w[ 6], selector); w[11] = hc_byte_perm (w[ 4], w[ 5], selector); w[10] = hc_byte_perm (w[ 3], w[ 4], selector); w[ 9] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 8] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 7] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 6] = hc_byte_perm ( 0, w[ 0], selector); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_byte_perm (w[55], w[56], selector); w[62] = hc_byte_perm (w[54], w[55], selector); w[61] = hc_byte_perm (w[53], w[54], selector); w[60] = hc_byte_perm (w[52], w[53], selector); w[59] = hc_byte_perm (w[51], w[52], selector); w[58] = hc_byte_perm (w[50], w[51], selector); w[57] = hc_byte_perm (w[49], w[50], selector); w[56] = hc_byte_perm (w[48], w[49], selector); w[55] = hc_byte_perm (w[47], w[48], selector); w[54] = hc_byte_perm (w[46], w[47], selector); w[53] = hc_byte_perm (w[45], w[46], selector); w[52] = hc_byte_perm (w[44], w[45], selector); w[51] = hc_byte_perm (w[43], w[44], selector); w[50] = hc_byte_perm (w[42], w[43], selector); w[49] = hc_byte_perm (w[41], w[42], selector); w[48] = hc_byte_perm (w[40], w[41], selector); w[47] = hc_byte_perm (w[39], w[40], selector); w[46] = hc_byte_perm (w[38], w[39], selector); w[45] = hc_byte_perm (w[37], w[38], selector); w[44] = hc_byte_perm (w[36], w[37], selector); w[43] = hc_byte_perm (w[35], w[36], selector); w[42] = hc_byte_perm (w[34], w[35], selector); w[41] = hc_byte_perm (w[33], w[34], selector); w[40] = hc_byte_perm (w[32], w[33], selector); w[39] = hc_byte_perm (w[31], w[32], selector); w[38] = hc_byte_perm (w[30], w[31], selector); w[37] = hc_byte_perm (w[29], w[30], selector); w[36] = hc_byte_perm (w[28], w[29], selector); w[35] = hc_byte_perm (w[27], w[28], selector); w[34] = hc_byte_perm (w[26], w[27], selector); w[33] = hc_byte_perm (w[25], w[26], selector); w[32] = hc_byte_perm (w[24], w[25], selector); w[31] = hc_byte_perm (w[23], w[24], selector); w[30] = hc_byte_perm (w[22], w[23], selector); w[29] = hc_byte_perm (w[21], w[22], selector); w[28] = hc_byte_perm (w[20], w[21], selector); w[27] = hc_byte_perm (w[19], w[20], selector); w[26] = hc_byte_perm (w[18], w[19], selector); w[25] = hc_byte_perm (w[17], w[18], selector); w[24] = hc_byte_perm (w[16], w[17], selector); w[23] = hc_byte_perm (w[15], w[16], selector); w[22] = hc_byte_perm (w[14], w[15], selector); w[21] = hc_byte_perm (w[13], w[14], selector); w[20] = hc_byte_perm (w[12], w[13], selector); w[19] = hc_byte_perm (w[11], w[12], selector); w[18] = hc_byte_perm (w[10], w[11], selector); w[17] = hc_byte_perm (w[ 9], w[10], selector); w[16] = hc_byte_perm (w[ 8], w[ 9], selector); w[15] = hc_byte_perm (w[ 7], w[ 8], selector); w[14] = hc_byte_perm (w[ 6], w[ 7], selector); w[13] = hc_byte_perm (w[ 5], w[ 6], selector); w[12] = hc_byte_perm (w[ 4], w[ 5], selector); w[11] = hc_byte_perm (w[ 3], w[ 4], selector); w[10] = hc_byte_perm (w[ 2], w[ 3], selector); w[ 9] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 8] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 7] = hc_byte_perm ( 0, w[ 0], selector); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_byte_perm (w[54], w[55], selector); w[62] = hc_byte_perm (w[53], w[54], selector); w[61] = hc_byte_perm (w[52], w[53], selector); w[60] = hc_byte_perm (w[51], w[52], selector); w[59] = hc_byte_perm (w[50], w[51], selector); w[58] = hc_byte_perm (w[49], w[50], selector); w[57] = hc_byte_perm (w[48], w[49], selector); w[56] = hc_byte_perm (w[47], w[48], selector); w[55] = hc_byte_perm (w[46], w[47], selector); w[54] = hc_byte_perm (w[45], w[46], selector); w[53] = hc_byte_perm (w[44], w[45], selector); w[52] = hc_byte_perm (w[43], w[44], selector); w[51] = hc_byte_perm (w[42], w[43], selector); w[50] = hc_byte_perm (w[41], w[42], selector); w[49] = hc_byte_perm (w[40], w[41], selector); w[48] = hc_byte_perm (w[39], w[40], selector); w[47] = hc_byte_perm (w[38], w[39], selector); w[46] = hc_byte_perm (w[37], w[38], selector); w[45] = hc_byte_perm (w[36], w[37], selector); w[44] = hc_byte_perm (w[35], w[36], selector); w[43] = hc_byte_perm (w[34], w[35], selector); w[42] = hc_byte_perm (w[33], w[34], selector); w[41] = hc_byte_perm (w[32], w[33], selector); w[40] = hc_byte_perm (w[31], w[32], selector); w[39] = hc_byte_perm (w[30], w[31], selector); w[38] = hc_byte_perm (w[29], w[30], selector); w[37] = hc_byte_perm (w[28], w[29], selector); w[36] = hc_byte_perm (w[27], w[28], selector); w[35] = hc_byte_perm (w[26], w[27], selector); w[34] = hc_byte_perm (w[25], w[26], selector); w[33] = hc_byte_perm (w[24], w[25], selector); w[32] = hc_byte_perm (w[23], w[24], selector); w[31] = hc_byte_perm (w[22], w[23], selector); w[30] = hc_byte_perm (w[21], w[22], selector); w[29] = hc_byte_perm (w[20], w[21], selector); w[28] = hc_byte_perm (w[19], w[20], selector); w[27] = hc_byte_perm (w[18], w[19], selector); w[26] = hc_byte_perm (w[17], w[18], selector); w[25] = hc_byte_perm (w[16], w[17], selector); w[24] = hc_byte_perm (w[15], w[16], selector); w[23] = hc_byte_perm (w[14], w[15], selector); w[22] = hc_byte_perm (w[13], w[14], selector); w[21] = hc_byte_perm (w[12], w[13], selector); w[20] = hc_byte_perm (w[11], w[12], selector); w[19] = hc_byte_perm (w[10], w[11], selector); w[18] = hc_byte_perm (w[ 9], w[10], selector); w[17] = hc_byte_perm (w[ 8], w[ 9], selector); w[16] = hc_byte_perm (w[ 7], w[ 8], selector); w[15] = hc_byte_perm (w[ 6], w[ 7], selector); w[14] = hc_byte_perm (w[ 5], w[ 6], selector); w[13] = hc_byte_perm (w[ 4], w[ 5], selector); w[12] = hc_byte_perm (w[ 3], w[ 4], selector); w[11] = hc_byte_perm (w[ 2], w[ 3], selector); w[10] = hc_byte_perm (w[ 1], w[ 2], selector); w[ 9] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 8] = hc_byte_perm ( 0, w[ 0], selector); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_byte_perm (w[53], w[54], selector); w[62] = hc_byte_perm (w[52], w[53], selector); w[61] = hc_byte_perm (w[51], w[52], selector); w[60] = hc_byte_perm (w[50], w[51], selector); w[59] = hc_byte_perm (w[49], w[50], selector); w[58] = hc_byte_perm (w[48], w[49], selector); w[57] = hc_byte_perm (w[47], w[48], selector); w[56] = hc_byte_perm (w[46], w[47], selector); w[55] = hc_byte_perm (w[45], w[46], selector); w[54] = hc_byte_perm (w[44], w[45], selector); w[53] = hc_byte_perm (w[43], w[44], selector); w[52] = hc_byte_perm (w[42], w[43], selector); w[51] = hc_byte_perm (w[41], w[42], selector); w[50] = hc_byte_perm (w[40], w[41], selector); w[49] = hc_byte_perm (w[39], w[40], selector); w[48] = hc_byte_perm (w[38], w[39], selector); w[47] = hc_byte_perm (w[37], w[38], selector); w[46] = hc_byte_perm (w[36], w[37], selector); w[45] = hc_byte_perm (w[35], w[36], selector); w[44] = hc_byte_perm (w[34], w[35], selector); w[43] = hc_byte_perm (w[33], w[34], selector); w[42] = hc_byte_perm (w[32], w[33], selector); w[41] = hc_byte_perm (w[31], w[32], selector); w[40] = hc_byte_perm (w[30], w[31], selector); w[39] = hc_byte_perm (w[29], w[30], selector); w[38] = hc_byte_perm (w[28], w[29], selector); w[37] = hc_byte_perm (w[27], w[28], selector); w[36] = hc_byte_perm (w[26], w[27], selector); w[35] = hc_byte_perm (w[25], w[26], selector); w[34] = hc_byte_perm (w[24], w[25], selector); w[33] = hc_byte_perm (w[23], w[24], selector); w[32] = hc_byte_perm (w[22], w[23], selector); w[31] = hc_byte_perm (w[21], w[22], selector); w[30] = hc_byte_perm (w[20], w[21], selector); w[29] = hc_byte_perm (w[19], w[20], selector); w[28] = hc_byte_perm (w[18], w[19], selector); w[27] = hc_byte_perm (w[17], w[18], selector); w[26] = hc_byte_perm (w[16], w[17], selector); w[25] = hc_byte_perm (w[15], w[16], selector); w[24] = hc_byte_perm (w[14], w[15], selector); w[23] = hc_byte_perm (w[13], w[14], selector); w[22] = hc_byte_perm (w[12], w[13], selector); w[21] = hc_byte_perm (w[11], w[12], selector); w[20] = hc_byte_perm (w[10], w[11], selector); w[19] = hc_byte_perm (w[ 9], w[10], selector); w[18] = hc_byte_perm (w[ 8], w[ 9], selector); w[17] = hc_byte_perm (w[ 7], w[ 8], selector); w[16] = hc_byte_perm (w[ 6], w[ 7], selector); w[15] = hc_byte_perm (w[ 5], w[ 6], selector); w[14] = hc_byte_perm (w[ 4], w[ 5], selector); w[13] = hc_byte_perm (w[ 3], w[ 4], selector); w[12] = hc_byte_perm (w[ 2], w[ 3], selector); w[11] = hc_byte_perm (w[ 1], w[ 2], selector); w[10] = hc_byte_perm (w[ 0], w[ 1], selector); w[ 9] = hc_byte_perm ( 0, w[ 0], selector); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_byte_perm (w[52], w[53], selector); w[62] = hc_byte_perm (w[51], w[52], selector); w[61] = hc_byte_perm (w[50], w[51], selector); w[60] = hc_byte_perm (w[49], w[50], selector); w[59] = hc_byte_perm (w[48], w[49], selector); w[58] = hc_byte_perm (w[47], w[48], selector); w[57] = hc_byte_perm (w[46], w[47], selector); w[56] = hc_byte_perm (w[45], w[46], selector); w[55] = hc_byte_perm (w[44], w[45], selector); w[54] = hc_byte_perm (w[43], w[44], selector); w[53] = hc_byte_perm (w[42], w[43], selector); w[52] = hc_byte_perm (w[41], w[42], selector); w[51] = hc_byte_perm (w[40], w[41], selector); w[50] = hc_byte_perm (w[39], w[40], selector); w[49] = hc_byte_perm (w[38], w[39], selector); w[48] = hc_byte_perm (w[37], w[38], selector); w[47] = hc_byte_perm (w[36], w[37], selector); w[46] = hc_byte_perm (w[35], w[36], selector); w[45] = hc_byte_perm (w[34], w[35], selector); w[44] = hc_byte_perm (w[33], w[34], selector); w[43] = hc_byte_perm (w[32], w[33], selector); w[42] = hc_byte_perm (w[31], w[32], selector); w[41] = hc_byte_perm (w[30], w[31], selector); w[40] = hc_byte_perm (w[29], w[30], selector); w[39] = hc_byte_perm (w[28], w[29], selector); w[38] = hc_byte_perm (w[27], w[28], selector); w[37] = hc_byte_perm (w[26], w[27], selector); w[36] = hc_byte_perm (w[25], w[26], selector); w[35] = hc_byte_perm (w[24], w[25], selector); w[34] = hc_byte_perm (w[23], w[24], selector); w[33] = hc_byte_perm (w[22], w[23], selector); w[32] = hc_byte_perm (w[21], w[22], selector); w[31] = hc_byte_perm (w[20], w[21], selector); w[30] = hc_byte_perm (w[19], w[20], selector); w[29] = hc_byte_perm (w[18], w[19], selector); w[28] = hc_byte_perm (w[17], w[18], selector); w[27] = hc_byte_perm (w[16], w[17], selector); w[26] = hc_byte_perm (w[15], w[16], selector); w[25] = hc_byte_perm (w[14], w[15], selector); w[24] = hc_byte_perm (w[13], w[14], selector); w[23] = hc_byte_perm (w[12], w[13], selector); w[22] = hc_byte_perm (w[11], w[12], selector); w[21] = hc_byte_perm (w[10], w[11], selector); w[20] = hc_byte_perm (w[ 9], w[10], selector); w[19] = hc_byte_perm (w[ 8], w[ 9], selector); w[18] = hc_byte_perm (w[ 7], w[ 8], selector); w[17] = hc_byte_perm (w[ 6], w[ 7], selector); w[16] = hc_byte_perm (w[ 5], w[ 6], selector); w[15] = hc_byte_perm (w[ 4], w[ 5], selector); w[14] = hc_byte_perm (w[ 3], w[ 4], selector); w[13] = hc_byte_perm (w[ 2], w[ 3], selector); w[12] = hc_byte_perm (w[ 1], w[ 2], selector); w[11] = hc_byte_perm (w[ 0], w[ 1], selector); w[10] = hc_byte_perm ( 0, w[ 0], selector); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_byte_perm (w[51], w[52], selector); w[62] = hc_byte_perm (w[50], w[51], selector); w[61] = hc_byte_perm (w[49], w[50], selector); w[60] = hc_byte_perm (w[48], w[49], selector); w[59] = hc_byte_perm (w[47], w[48], selector); w[58] = hc_byte_perm (w[46], w[47], selector); w[57] = hc_byte_perm (w[45], w[46], selector); w[56] = hc_byte_perm (w[44], w[45], selector); w[55] = hc_byte_perm (w[43], w[44], selector); w[54] = hc_byte_perm (w[42], w[43], selector); w[53] = hc_byte_perm (w[41], w[42], selector); w[52] = hc_byte_perm (w[40], w[41], selector); w[51] = hc_byte_perm (w[39], w[40], selector); w[50] = hc_byte_perm (w[38], w[39], selector); w[49] = hc_byte_perm (w[37], w[38], selector); w[48] = hc_byte_perm (w[36], w[37], selector); w[47] = hc_byte_perm (w[35], w[36], selector); w[46] = hc_byte_perm (w[34], w[35], selector); w[45] = hc_byte_perm (w[33], w[34], selector); w[44] = hc_byte_perm (w[32], w[33], selector); w[43] = hc_byte_perm (w[31], w[32], selector); w[42] = hc_byte_perm (w[30], w[31], selector); w[41] = hc_byte_perm (w[29], w[30], selector); w[40] = hc_byte_perm (w[28], w[29], selector); w[39] = hc_byte_perm (w[27], w[28], selector); w[38] = hc_byte_perm (w[26], w[27], selector); w[37] = hc_byte_perm (w[25], w[26], selector); w[36] = hc_byte_perm (w[24], w[25], selector); w[35] = hc_byte_perm (w[23], w[24], selector); w[34] = hc_byte_perm (w[22], w[23], selector); w[33] = hc_byte_perm (w[21], w[22], selector); w[32] = hc_byte_perm (w[20], w[21], selector); w[31] = hc_byte_perm (w[19], w[20], selector); w[30] = hc_byte_perm (w[18], w[19], selector); w[29] = hc_byte_perm (w[17], w[18], selector); w[28] = hc_byte_perm (w[16], w[17], selector); w[27] = hc_byte_perm (w[15], w[16], selector); w[26] = hc_byte_perm (w[14], w[15], selector); w[25] = hc_byte_perm (w[13], w[14], selector); w[24] = hc_byte_perm (w[12], w[13], selector); w[23] = hc_byte_perm (w[11], w[12], selector); w[22] = hc_byte_perm (w[10], w[11], selector); w[21] = hc_byte_perm (w[ 9], w[10], selector); w[20] = hc_byte_perm (w[ 8], w[ 9], selector); w[19] = hc_byte_perm (w[ 7], w[ 8], selector); w[18] = hc_byte_perm (w[ 6], w[ 7], selector); w[17] = hc_byte_perm (w[ 5], w[ 6], selector); w[16] = hc_byte_perm (w[ 4], w[ 5], selector); w[15] = hc_byte_perm (w[ 3], w[ 4], selector); w[14] = hc_byte_perm (w[ 2], w[ 3], selector); w[13] = hc_byte_perm (w[ 1], w[ 2], selector); w[12] = hc_byte_perm (w[ 0], w[ 1], selector); w[11] = hc_byte_perm ( 0, w[ 0], selector); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_byte_perm (w[50], w[51], selector); w[62] = hc_byte_perm (w[49], w[50], selector); w[61] = hc_byte_perm (w[48], w[49], selector); w[60] = hc_byte_perm (w[47], w[48], selector); w[59] = hc_byte_perm (w[46], w[47], selector); w[58] = hc_byte_perm (w[45], w[46], selector); w[57] = hc_byte_perm (w[44], w[45], selector); w[56] = hc_byte_perm (w[43], w[44], selector); w[55] = hc_byte_perm (w[42], w[43], selector); w[54] = hc_byte_perm (w[41], w[42], selector); w[53] = hc_byte_perm (w[40], w[41], selector); w[52] = hc_byte_perm (w[39], w[40], selector); w[51] = hc_byte_perm (w[38], w[39], selector); w[50] = hc_byte_perm (w[37], w[38], selector); w[49] = hc_byte_perm (w[36], w[37], selector); w[48] = hc_byte_perm (w[35], w[36], selector); w[47] = hc_byte_perm (w[34], w[35], selector); w[46] = hc_byte_perm (w[33], w[34], selector); w[45] = hc_byte_perm (w[32], w[33], selector); w[44] = hc_byte_perm (w[31], w[32], selector); w[43] = hc_byte_perm (w[30], w[31], selector); w[42] = hc_byte_perm (w[29], w[30], selector); w[41] = hc_byte_perm (w[28], w[29], selector); w[40] = hc_byte_perm (w[27], w[28], selector); w[39] = hc_byte_perm (w[26], w[27], selector); w[38] = hc_byte_perm (w[25], w[26], selector); w[37] = hc_byte_perm (w[24], w[25], selector); w[36] = hc_byte_perm (w[23], w[24], selector); w[35] = hc_byte_perm (w[22], w[23], selector); w[34] = hc_byte_perm (w[21], w[22], selector); w[33] = hc_byte_perm (w[20], w[21], selector); w[32] = hc_byte_perm (w[19], w[20], selector); w[31] = hc_byte_perm (w[18], w[19], selector); w[30] = hc_byte_perm (w[17], w[18], selector); w[29] = hc_byte_perm (w[16], w[17], selector); w[28] = hc_byte_perm (w[15], w[16], selector); w[27] = hc_byte_perm (w[14], w[15], selector); w[26] = hc_byte_perm (w[13], w[14], selector); w[25] = hc_byte_perm (w[12], w[13], selector); w[24] = hc_byte_perm (w[11], w[12], selector); w[23] = hc_byte_perm (w[10], w[11], selector); w[22] = hc_byte_perm (w[ 9], w[10], selector); w[21] = hc_byte_perm (w[ 8], w[ 9], selector); w[20] = hc_byte_perm (w[ 7], w[ 8], selector); w[19] = hc_byte_perm (w[ 6], w[ 7], selector); w[18] = hc_byte_perm (w[ 5], w[ 6], selector); w[17] = hc_byte_perm (w[ 4], w[ 5], selector); w[16] = hc_byte_perm (w[ 3], w[ 4], selector); w[15] = hc_byte_perm (w[ 2], w[ 3], selector); w[14] = hc_byte_perm (w[ 1], w[ 2], selector); w[13] = hc_byte_perm (w[ 0], w[ 1], selector); w[12] = hc_byte_perm ( 0, w[ 0], selector); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_byte_perm (w[49], w[50], selector); w[62] = hc_byte_perm (w[48], w[49], selector); w[61] = hc_byte_perm (w[47], w[48], selector); w[60] = hc_byte_perm (w[46], w[47], selector); w[59] = hc_byte_perm (w[45], w[46], selector); w[58] = hc_byte_perm (w[44], w[45], selector); w[57] = hc_byte_perm (w[43], w[44], selector); w[56] = hc_byte_perm (w[42], w[43], selector); w[55] = hc_byte_perm (w[41], w[42], selector); w[54] = hc_byte_perm (w[40], w[41], selector); w[53] = hc_byte_perm (w[39], w[40], selector); w[52] = hc_byte_perm (w[38], w[39], selector); w[51] = hc_byte_perm (w[37], w[38], selector); w[50] = hc_byte_perm (w[36], w[37], selector); w[49] = hc_byte_perm (w[35], w[36], selector); w[48] = hc_byte_perm (w[34], w[35], selector); w[47] = hc_byte_perm (w[33], w[34], selector); w[46] = hc_byte_perm (w[32], w[33], selector); w[45] = hc_byte_perm (w[31], w[32], selector); w[44] = hc_byte_perm (w[30], w[31], selector); w[43] = hc_byte_perm (w[29], w[30], selector); w[42] = hc_byte_perm (w[28], w[29], selector); w[41] = hc_byte_perm (w[27], w[28], selector); w[40] = hc_byte_perm (w[26], w[27], selector); w[39] = hc_byte_perm (w[25], w[26], selector); w[38] = hc_byte_perm (w[24], w[25], selector); w[37] = hc_byte_perm (w[23], w[24], selector); w[36] = hc_byte_perm (w[22], w[23], selector); w[35] = hc_byte_perm (w[21], w[22], selector); w[34] = hc_byte_perm (w[20], w[21], selector); w[33] = hc_byte_perm (w[19], w[20], selector); w[32] = hc_byte_perm (w[18], w[19], selector); w[31] = hc_byte_perm (w[17], w[18], selector); w[30] = hc_byte_perm (w[16], w[17], selector); w[29] = hc_byte_perm (w[15], w[16], selector); w[28] = hc_byte_perm (w[14], w[15], selector); w[27] = hc_byte_perm (w[13], w[14], selector); w[26] = hc_byte_perm (w[12], w[13], selector); w[25] = hc_byte_perm (w[11], w[12], selector); w[24] = hc_byte_perm (w[10], w[11], selector); w[23] = hc_byte_perm (w[ 9], w[10], selector); w[22] = hc_byte_perm (w[ 8], w[ 9], selector); w[21] = hc_byte_perm (w[ 7], w[ 8], selector); w[20] = hc_byte_perm (w[ 6], w[ 7], selector); w[19] = hc_byte_perm (w[ 5], w[ 6], selector); w[18] = hc_byte_perm (w[ 4], w[ 5], selector); w[17] = hc_byte_perm (w[ 3], w[ 4], selector); w[16] = hc_byte_perm (w[ 2], w[ 3], selector); w[15] = hc_byte_perm (w[ 1], w[ 2], selector); w[14] = hc_byte_perm (w[ 0], w[ 1], selector); w[13] = hc_byte_perm ( 0, w[ 0], selector); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_byte_perm (w[48], w[49], selector); w[62] = hc_byte_perm (w[47], w[48], selector); w[61] = hc_byte_perm (w[46], w[47], selector); w[60] = hc_byte_perm (w[45], w[46], selector); w[59] = hc_byte_perm (w[44], w[45], selector); w[58] = hc_byte_perm (w[43], w[44], selector); w[57] = hc_byte_perm (w[42], w[43], selector); w[56] = hc_byte_perm (w[41], w[42], selector); w[55] = hc_byte_perm (w[40], w[41], selector); w[54] = hc_byte_perm (w[39], w[40], selector); w[53] = hc_byte_perm (w[38], w[39], selector); w[52] = hc_byte_perm (w[37], w[38], selector); w[51] = hc_byte_perm (w[36], w[37], selector); w[50] = hc_byte_perm (w[35], w[36], selector); w[49] = hc_byte_perm (w[34], w[35], selector); w[48] = hc_byte_perm (w[33], w[34], selector); w[47] = hc_byte_perm (w[32], w[33], selector); w[46] = hc_byte_perm (w[31], w[32], selector); w[45] = hc_byte_perm (w[30], w[31], selector); w[44] = hc_byte_perm (w[29], w[30], selector); w[43] = hc_byte_perm (w[28], w[29], selector); w[42] = hc_byte_perm (w[27], w[28], selector); w[41] = hc_byte_perm (w[26], w[27], selector); w[40] = hc_byte_perm (w[25], w[26], selector); w[39] = hc_byte_perm (w[24], w[25], selector); w[38] = hc_byte_perm (w[23], w[24], selector); w[37] = hc_byte_perm (w[22], w[23], selector); w[36] = hc_byte_perm (w[21], w[22], selector); w[35] = hc_byte_perm (w[20], w[21], selector); w[34] = hc_byte_perm (w[19], w[20], selector); w[33] = hc_byte_perm (w[18], w[19], selector); w[32] = hc_byte_perm (w[17], w[18], selector); w[31] = hc_byte_perm (w[16], w[17], selector); w[30] = hc_byte_perm (w[15], w[16], selector); w[29] = hc_byte_perm (w[14], w[15], selector); w[28] = hc_byte_perm (w[13], w[14], selector); w[27] = hc_byte_perm (w[12], w[13], selector); w[26] = hc_byte_perm (w[11], w[12], selector); w[25] = hc_byte_perm (w[10], w[11], selector); w[24] = hc_byte_perm (w[ 9], w[10], selector); w[23] = hc_byte_perm (w[ 8], w[ 9], selector); w[22] = hc_byte_perm (w[ 7], w[ 8], selector); w[21] = hc_byte_perm (w[ 6], w[ 7], selector); w[20] = hc_byte_perm (w[ 5], w[ 6], selector); w[19] = hc_byte_perm (w[ 4], w[ 5], selector); w[18] = hc_byte_perm (w[ 3], w[ 4], selector); w[17] = hc_byte_perm (w[ 2], w[ 3], selector); w[16] = hc_byte_perm (w[ 1], w[ 2], selector); w[15] = hc_byte_perm (w[ 0], w[ 1], selector); w[14] = hc_byte_perm ( 0, w[ 0], selector); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_byte_perm (w[47], w[48], selector); w[62] = hc_byte_perm (w[46], w[47], selector); w[61] = hc_byte_perm (w[45], w[46], selector); w[60] = hc_byte_perm (w[44], w[45], selector); w[59] = hc_byte_perm (w[43], w[44], selector); w[58] = hc_byte_perm (w[42], w[43], selector); w[57] = hc_byte_perm (w[41], w[42], selector); w[56] = hc_byte_perm (w[40], w[41], selector); w[55] = hc_byte_perm (w[39], w[40], selector); w[54] = hc_byte_perm (w[38], w[39], selector); w[53] = hc_byte_perm (w[37], w[38], selector); w[52] = hc_byte_perm (w[36], w[37], selector); w[51] = hc_byte_perm (w[35], w[36], selector); w[50] = hc_byte_perm (w[34], w[35], selector); w[49] = hc_byte_perm (w[33], w[34], selector); w[48] = hc_byte_perm (w[32], w[33], selector); w[47] = hc_byte_perm (w[31], w[32], selector); w[46] = hc_byte_perm (w[30], w[31], selector); w[45] = hc_byte_perm (w[29], w[30], selector); w[44] = hc_byte_perm (w[28], w[29], selector); w[43] = hc_byte_perm (w[27], w[28], selector); w[42] = hc_byte_perm (w[26], w[27], selector); w[41] = hc_byte_perm (w[25], w[26], selector); w[40] = hc_byte_perm (w[24], w[25], selector); w[39] = hc_byte_perm (w[23], w[24], selector); w[38] = hc_byte_perm (w[22], w[23], selector); w[37] = hc_byte_perm (w[21], w[22], selector); w[36] = hc_byte_perm (w[20], w[21], selector); w[35] = hc_byte_perm (w[19], w[20], selector); w[34] = hc_byte_perm (w[18], w[19], selector); w[33] = hc_byte_perm (w[17], w[18], selector); w[32] = hc_byte_perm (w[16], w[17], selector); w[31] = hc_byte_perm (w[15], w[16], selector); w[30] = hc_byte_perm (w[14], w[15], selector); w[29] = hc_byte_perm (w[13], w[14], selector); w[28] = hc_byte_perm (w[12], w[13], selector); w[27] = hc_byte_perm (w[11], w[12], selector); w[26] = hc_byte_perm (w[10], w[11], selector); w[25] = hc_byte_perm (w[ 9], w[10], selector); w[24] = hc_byte_perm (w[ 8], w[ 9], selector); w[23] = hc_byte_perm (w[ 7], w[ 8], selector); w[22] = hc_byte_perm (w[ 6], w[ 7], selector); w[21] = hc_byte_perm (w[ 5], w[ 6], selector); w[20] = hc_byte_perm (w[ 4], w[ 5], selector); w[19] = hc_byte_perm (w[ 3], w[ 4], selector); w[18] = hc_byte_perm (w[ 2], w[ 3], selector); w[17] = hc_byte_perm (w[ 1], w[ 2], selector); w[16] = hc_byte_perm (w[ 0], w[ 1], selector); w[15] = hc_byte_perm ( 0, w[ 0], selector); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_byte_perm (w[46], w[47], selector); w[62] = hc_byte_perm (w[45], w[46], selector); w[61] = hc_byte_perm (w[44], w[45], selector); w[60] = hc_byte_perm (w[43], w[44], selector); w[59] = hc_byte_perm (w[42], w[43], selector); w[58] = hc_byte_perm (w[41], w[42], selector); w[57] = hc_byte_perm (w[40], w[41], selector); w[56] = hc_byte_perm (w[39], w[40], selector); w[55] = hc_byte_perm (w[38], w[39], selector); w[54] = hc_byte_perm (w[37], w[38], selector); w[53] = hc_byte_perm (w[36], w[37], selector); w[52] = hc_byte_perm (w[35], w[36], selector); w[51] = hc_byte_perm (w[34], w[35], selector); w[50] = hc_byte_perm (w[33], w[34], selector); w[49] = hc_byte_perm (w[32], w[33], selector); w[48] = hc_byte_perm (w[31], w[32], selector); w[47] = hc_byte_perm (w[30], w[31], selector); w[46] = hc_byte_perm (w[29], w[30], selector); w[45] = hc_byte_perm (w[28], w[29], selector); w[44] = hc_byte_perm (w[27], w[28], selector); w[43] = hc_byte_perm (w[26], w[27], selector); w[42] = hc_byte_perm (w[25], w[26], selector); w[41] = hc_byte_perm (w[24], w[25], selector); w[40] = hc_byte_perm (w[23], w[24], selector); w[39] = hc_byte_perm (w[22], w[23], selector); w[38] = hc_byte_perm (w[21], w[22], selector); w[37] = hc_byte_perm (w[20], w[21], selector); w[36] = hc_byte_perm (w[19], w[20], selector); w[35] = hc_byte_perm (w[18], w[19], selector); w[34] = hc_byte_perm (w[17], w[18], selector); w[33] = hc_byte_perm (w[16], w[17], selector); w[32] = hc_byte_perm (w[15], w[16], selector); w[31] = hc_byte_perm (w[14], w[15], selector); w[30] = hc_byte_perm (w[13], w[14], selector); w[29] = hc_byte_perm (w[12], w[13], selector); w[28] = hc_byte_perm (w[11], w[12], selector); w[27] = hc_byte_perm (w[10], w[11], selector); w[26] = hc_byte_perm (w[ 9], w[10], selector); w[25] = hc_byte_perm (w[ 8], w[ 9], selector); w[24] = hc_byte_perm (w[ 7], w[ 8], selector); w[23] = hc_byte_perm (w[ 6], w[ 7], selector); w[22] = hc_byte_perm (w[ 5], w[ 6], selector); w[21] = hc_byte_perm (w[ 4], w[ 5], selector); w[20] = hc_byte_perm (w[ 3], w[ 4], selector); w[19] = hc_byte_perm (w[ 2], w[ 3], selector); w[18] = hc_byte_perm (w[ 1], w[ 2], selector); w[17] = hc_byte_perm (w[ 0], w[ 1], selector); w[16] = hc_byte_perm ( 0, w[ 0], selector); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_byte_perm (w[45], w[46], selector); w[62] = hc_byte_perm (w[44], w[45], selector); w[61] = hc_byte_perm (w[43], w[44], selector); w[60] = hc_byte_perm (w[42], w[43], selector); w[59] = hc_byte_perm (w[41], w[42], selector); w[58] = hc_byte_perm (w[40], w[41], selector); w[57] = hc_byte_perm (w[39], w[40], selector); w[56] = hc_byte_perm (w[38], w[39], selector); w[55] = hc_byte_perm (w[37], w[38], selector); w[54] = hc_byte_perm (w[36], w[37], selector); w[53] = hc_byte_perm (w[35], w[36], selector); w[52] = hc_byte_perm (w[34], w[35], selector); w[51] = hc_byte_perm (w[33], w[34], selector); w[50] = hc_byte_perm (w[32], w[33], selector); w[49] = hc_byte_perm (w[31], w[32], selector); w[48] = hc_byte_perm (w[30], w[31], selector); w[47] = hc_byte_perm (w[29], w[30], selector); w[46] = hc_byte_perm (w[28], w[29], selector); w[45] = hc_byte_perm (w[27], w[28], selector); w[44] = hc_byte_perm (w[26], w[27], selector); w[43] = hc_byte_perm (w[25], w[26], selector); w[42] = hc_byte_perm (w[24], w[25], selector); w[41] = hc_byte_perm (w[23], w[24], selector); w[40] = hc_byte_perm (w[22], w[23], selector); w[39] = hc_byte_perm (w[21], w[22], selector); w[38] = hc_byte_perm (w[20], w[21], selector); w[37] = hc_byte_perm (w[19], w[20], selector); w[36] = hc_byte_perm (w[18], w[19], selector); w[35] = hc_byte_perm (w[17], w[18], selector); w[34] = hc_byte_perm (w[16], w[17], selector); w[33] = hc_byte_perm (w[15], w[16], selector); w[32] = hc_byte_perm (w[14], w[15], selector); w[31] = hc_byte_perm (w[13], w[14], selector); w[30] = hc_byte_perm (w[12], w[13], selector); w[29] = hc_byte_perm (w[11], w[12], selector); w[28] = hc_byte_perm (w[10], w[11], selector); w[27] = hc_byte_perm (w[ 9], w[10], selector); w[26] = hc_byte_perm (w[ 8], w[ 9], selector); w[25] = hc_byte_perm (w[ 7], w[ 8], selector); w[24] = hc_byte_perm (w[ 6], w[ 7], selector); w[23] = hc_byte_perm (w[ 5], w[ 6], selector); w[22] = hc_byte_perm (w[ 4], w[ 5], selector); w[21] = hc_byte_perm (w[ 3], w[ 4], selector); w[20] = hc_byte_perm (w[ 2], w[ 3], selector); w[19] = hc_byte_perm (w[ 1], w[ 2], selector); w[18] = hc_byte_perm (w[ 0], w[ 1], selector); w[17] = hc_byte_perm ( 0, w[ 0], selector); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_byte_perm (w[44], w[45], selector); w[62] = hc_byte_perm (w[43], w[44], selector); w[61] = hc_byte_perm (w[42], w[43], selector); w[60] = hc_byte_perm (w[41], w[42], selector); w[59] = hc_byte_perm (w[40], w[41], selector); w[58] = hc_byte_perm (w[39], w[40], selector); w[57] = hc_byte_perm (w[38], w[39], selector); w[56] = hc_byte_perm (w[37], w[38], selector); w[55] = hc_byte_perm (w[36], w[37], selector); w[54] = hc_byte_perm (w[35], w[36], selector); w[53] = hc_byte_perm (w[34], w[35], selector); w[52] = hc_byte_perm (w[33], w[34], selector); w[51] = hc_byte_perm (w[32], w[33], selector); w[50] = hc_byte_perm (w[31], w[32], selector); w[49] = hc_byte_perm (w[30], w[31], selector); w[48] = hc_byte_perm (w[29], w[30], selector); w[47] = hc_byte_perm (w[28], w[29], selector); w[46] = hc_byte_perm (w[27], w[28], selector); w[45] = hc_byte_perm (w[26], w[27], selector); w[44] = hc_byte_perm (w[25], w[26], selector); w[43] = hc_byte_perm (w[24], w[25], selector); w[42] = hc_byte_perm (w[23], w[24], selector); w[41] = hc_byte_perm (w[22], w[23], selector); w[40] = hc_byte_perm (w[21], w[22], selector); w[39] = hc_byte_perm (w[20], w[21], selector); w[38] = hc_byte_perm (w[19], w[20], selector); w[37] = hc_byte_perm (w[18], w[19], selector); w[36] = hc_byte_perm (w[17], w[18], selector); w[35] = hc_byte_perm (w[16], w[17], selector); w[34] = hc_byte_perm (w[15], w[16], selector); w[33] = hc_byte_perm (w[14], w[15], selector); w[32] = hc_byte_perm (w[13], w[14], selector); w[31] = hc_byte_perm (w[12], w[13], selector); w[30] = hc_byte_perm (w[11], w[12], selector); w[29] = hc_byte_perm (w[10], w[11], selector); w[28] = hc_byte_perm (w[ 9], w[10], selector); w[27] = hc_byte_perm (w[ 8], w[ 9], selector); w[26] = hc_byte_perm (w[ 7], w[ 8], selector); w[25] = hc_byte_perm (w[ 6], w[ 7], selector); w[24] = hc_byte_perm (w[ 5], w[ 6], selector); w[23] = hc_byte_perm (w[ 4], w[ 5], selector); w[22] = hc_byte_perm (w[ 3], w[ 4], selector); w[21] = hc_byte_perm (w[ 2], w[ 3], selector); w[20] = hc_byte_perm (w[ 1], w[ 2], selector); w[19] = hc_byte_perm (w[ 0], w[ 1], selector); w[18] = hc_byte_perm ( 0, w[ 0], selector); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_byte_perm (w[43], w[44], selector); w[62] = hc_byte_perm (w[42], w[43], selector); w[61] = hc_byte_perm (w[41], w[42], selector); w[60] = hc_byte_perm (w[40], w[41], selector); w[59] = hc_byte_perm (w[39], w[40], selector); w[58] = hc_byte_perm (w[38], w[39], selector); w[57] = hc_byte_perm (w[37], w[38], selector); w[56] = hc_byte_perm (w[36], w[37], selector); w[55] = hc_byte_perm (w[35], w[36], selector); w[54] = hc_byte_perm (w[34], w[35], selector); w[53] = hc_byte_perm (w[33], w[34], selector); w[52] = hc_byte_perm (w[32], w[33], selector); w[51] = hc_byte_perm (w[31], w[32], selector); w[50] = hc_byte_perm (w[30], w[31], selector); w[49] = hc_byte_perm (w[29], w[30], selector); w[48] = hc_byte_perm (w[28], w[29], selector); w[47] = hc_byte_perm (w[27], w[28], selector); w[46] = hc_byte_perm (w[26], w[27], selector); w[45] = hc_byte_perm (w[25], w[26], selector); w[44] = hc_byte_perm (w[24], w[25], selector); w[43] = hc_byte_perm (w[23], w[24], selector); w[42] = hc_byte_perm (w[22], w[23], selector); w[41] = hc_byte_perm (w[21], w[22], selector); w[40] = hc_byte_perm (w[20], w[21], selector); w[39] = hc_byte_perm (w[19], w[20], selector); w[38] = hc_byte_perm (w[18], w[19], selector); w[37] = hc_byte_perm (w[17], w[18], selector); w[36] = hc_byte_perm (w[16], w[17], selector); w[35] = hc_byte_perm (w[15], w[16], selector); w[34] = hc_byte_perm (w[14], w[15], selector); w[33] = hc_byte_perm (w[13], w[14], selector); w[32] = hc_byte_perm (w[12], w[13], selector); w[31] = hc_byte_perm (w[11], w[12], selector); w[30] = hc_byte_perm (w[10], w[11], selector); w[29] = hc_byte_perm (w[ 9], w[10], selector); w[28] = hc_byte_perm (w[ 8], w[ 9], selector); w[27] = hc_byte_perm (w[ 7], w[ 8], selector); w[26] = hc_byte_perm (w[ 6], w[ 7], selector); w[25] = hc_byte_perm (w[ 5], w[ 6], selector); w[24] = hc_byte_perm (w[ 4], w[ 5], selector); w[23] = hc_byte_perm (w[ 3], w[ 4], selector); w[22] = hc_byte_perm (w[ 2], w[ 3], selector); w[21] = hc_byte_perm (w[ 1], w[ 2], selector); w[20] = hc_byte_perm (w[ 0], w[ 1], selector); w[19] = hc_byte_perm ( 0, w[ 0], selector); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_byte_perm (w[42], w[43], selector); w[62] = hc_byte_perm (w[41], w[42], selector); w[61] = hc_byte_perm (w[40], w[41], selector); w[60] = hc_byte_perm (w[39], w[40], selector); w[59] = hc_byte_perm (w[38], w[39], selector); w[58] = hc_byte_perm (w[37], w[38], selector); w[57] = hc_byte_perm (w[36], w[37], selector); w[56] = hc_byte_perm (w[35], w[36], selector); w[55] = hc_byte_perm (w[34], w[35], selector); w[54] = hc_byte_perm (w[33], w[34], selector); w[53] = hc_byte_perm (w[32], w[33], selector); w[52] = hc_byte_perm (w[31], w[32], selector); w[51] = hc_byte_perm (w[30], w[31], selector); w[50] = hc_byte_perm (w[29], w[30], selector); w[49] = hc_byte_perm (w[28], w[29], selector); w[48] = hc_byte_perm (w[27], w[28], selector); w[47] = hc_byte_perm (w[26], w[27], selector); w[46] = hc_byte_perm (w[25], w[26], selector); w[45] = hc_byte_perm (w[24], w[25], selector); w[44] = hc_byte_perm (w[23], w[24], selector); w[43] = hc_byte_perm (w[22], w[23], selector); w[42] = hc_byte_perm (w[21], w[22], selector); w[41] = hc_byte_perm (w[20], w[21], selector); w[40] = hc_byte_perm (w[19], w[20], selector); w[39] = hc_byte_perm (w[18], w[19], selector); w[38] = hc_byte_perm (w[17], w[18], selector); w[37] = hc_byte_perm (w[16], w[17], selector); w[36] = hc_byte_perm (w[15], w[16], selector); w[35] = hc_byte_perm (w[14], w[15], selector); w[34] = hc_byte_perm (w[13], w[14], selector); w[33] = hc_byte_perm (w[12], w[13], selector); w[32] = hc_byte_perm (w[11], w[12], selector); w[31] = hc_byte_perm (w[10], w[11], selector); w[30] = hc_byte_perm (w[ 9], w[10], selector); w[29] = hc_byte_perm (w[ 8], w[ 9], selector); w[28] = hc_byte_perm (w[ 7], w[ 8], selector); w[27] = hc_byte_perm (w[ 6], w[ 7], selector); w[26] = hc_byte_perm (w[ 5], w[ 6], selector); w[25] = hc_byte_perm (w[ 4], w[ 5], selector); w[24] = hc_byte_perm (w[ 3], w[ 4], selector); w[23] = hc_byte_perm (w[ 2], w[ 3], selector); w[22] = hc_byte_perm (w[ 1], w[ 2], selector); w[21] = hc_byte_perm (w[ 0], w[ 1], selector); w[20] = hc_byte_perm ( 0, w[ 0], selector); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_byte_perm (w[41], w[42], selector); w[62] = hc_byte_perm (w[40], w[41], selector); w[61] = hc_byte_perm (w[39], w[40], selector); w[60] = hc_byte_perm (w[38], w[39], selector); w[59] = hc_byte_perm (w[37], w[38], selector); w[58] = hc_byte_perm (w[36], w[37], selector); w[57] = hc_byte_perm (w[35], w[36], selector); w[56] = hc_byte_perm (w[34], w[35], selector); w[55] = hc_byte_perm (w[33], w[34], selector); w[54] = hc_byte_perm (w[32], w[33], selector); w[53] = hc_byte_perm (w[31], w[32], selector); w[52] = hc_byte_perm (w[30], w[31], selector); w[51] = hc_byte_perm (w[29], w[30], selector); w[50] = hc_byte_perm (w[28], w[29], selector); w[49] = hc_byte_perm (w[27], w[28], selector); w[48] = hc_byte_perm (w[26], w[27], selector); w[47] = hc_byte_perm (w[25], w[26], selector); w[46] = hc_byte_perm (w[24], w[25], selector); w[45] = hc_byte_perm (w[23], w[24], selector); w[44] = hc_byte_perm (w[22], w[23], selector); w[43] = hc_byte_perm (w[21], w[22], selector); w[42] = hc_byte_perm (w[20], w[21], selector); w[41] = hc_byte_perm (w[19], w[20], selector); w[40] = hc_byte_perm (w[18], w[19], selector); w[39] = hc_byte_perm (w[17], w[18], selector); w[38] = hc_byte_perm (w[16], w[17], selector); w[37] = hc_byte_perm (w[15], w[16], selector); w[36] = hc_byte_perm (w[14], w[15], selector); w[35] = hc_byte_perm (w[13], w[14], selector); w[34] = hc_byte_perm (w[12], w[13], selector); w[33] = hc_byte_perm (w[11], w[12], selector); w[32] = hc_byte_perm (w[10], w[11], selector); w[31] = hc_byte_perm (w[ 9], w[10], selector); w[30] = hc_byte_perm (w[ 8], w[ 9], selector); w[29] = hc_byte_perm (w[ 7], w[ 8], selector); w[28] = hc_byte_perm (w[ 6], w[ 7], selector); w[27] = hc_byte_perm (w[ 5], w[ 6], selector); w[26] = hc_byte_perm (w[ 4], w[ 5], selector); w[25] = hc_byte_perm (w[ 3], w[ 4], selector); w[24] = hc_byte_perm (w[ 2], w[ 3], selector); w[23] = hc_byte_perm (w[ 1], w[ 2], selector); w[22] = hc_byte_perm (w[ 0], w[ 1], selector); w[21] = hc_byte_perm ( 0, w[ 0], selector); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_byte_perm (w[40], w[41], selector); w[62] = hc_byte_perm (w[39], w[40], selector); w[61] = hc_byte_perm (w[38], w[39], selector); w[60] = hc_byte_perm (w[37], w[38], selector); w[59] = hc_byte_perm (w[36], w[37], selector); w[58] = hc_byte_perm (w[35], w[36], selector); w[57] = hc_byte_perm (w[34], w[35], selector); w[56] = hc_byte_perm (w[33], w[34], selector); w[55] = hc_byte_perm (w[32], w[33], selector); w[54] = hc_byte_perm (w[31], w[32], selector); w[53] = hc_byte_perm (w[30], w[31], selector); w[52] = hc_byte_perm (w[29], w[30], selector); w[51] = hc_byte_perm (w[28], w[29], selector); w[50] = hc_byte_perm (w[27], w[28], selector); w[49] = hc_byte_perm (w[26], w[27], selector); w[48] = hc_byte_perm (w[25], w[26], selector); w[47] = hc_byte_perm (w[24], w[25], selector); w[46] = hc_byte_perm (w[23], w[24], selector); w[45] = hc_byte_perm (w[22], w[23], selector); w[44] = hc_byte_perm (w[21], w[22], selector); w[43] = hc_byte_perm (w[20], w[21], selector); w[42] = hc_byte_perm (w[19], w[20], selector); w[41] = hc_byte_perm (w[18], w[19], selector); w[40] = hc_byte_perm (w[17], w[18], selector); w[39] = hc_byte_perm (w[16], w[17], selector); w[38] = hc_byte_perm (w[15], w[16], selector); w[37] = hc_byte_perm (w[14], w[15], selector); w[36] = hc_byte_perm (w[13], w[14], selector); w[35] = hc_byte_perm (w[12], w[13], selector); w[34] = hc_byte_perm (w[11], w[12], selector); w[33] = hc_byte_perm (w[10], w[11], selector); w[32] = hc_byte_perm (w[ 9], w[10], selector); w[31] = hc_byte_perm (w[ 8], w[ 9], selector); w[30] = hc_byte_perm (w[ 7], w[ 8], selector); w[29] = hc_byte_perm (w[ 6], w[ 7], selector); w[28] = hc_byte_perm (w[ 5], w[ 6], selector); w[27] = hc_byte_perm (w[ 4], w[ 5], selector); w[26] = hc_byte_perm (w[ 3], w[ 4], selector); w[25] = hc_byte_perm (w[ 2], w[ 3], selector); w[24] = hc_byte_perm (w[ 1], w[ 2], selector); w[23] = hc_byte_perm (w[ 0], w[ 1], selector); w[22] = hc_byte_perm ( 0, w[ 0], selector); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_byte_perm (w[39], w[40], selector); w[62] = hc_byte_perm (w[38], w[39], selector); w[61] = hc_byte_perm (w[37], w[38], selector); w[60] = hc_byte_perm (w[36], w[37], selector); w[59] = hc_byte_perm (w[35], w[36], selector); w[58] = hc_byte_perm (w[34], w[35], selector); w[57] = hc_byte_perm (w[33], w[34], selector); w[56] = hc_byte_perm (w[32], w[33], selector); w[55] = hc_byte_perm (w[31], w[32], selector); w[54] = hc_byte_perm (w[30], w[31], selector); w[53] = hc_byte_perm (w[29], w[30], selector); w[52] = hc_byte_perm (w[28], w[29], selector); w[51] = hc_byte_perm (w[27], w[28], selector); w[50] = hc_byte_perm (w[26], w[27], selector); w[49] = hc_byte_perm (w[25], w[26], selector); w[48] = hc_byte_perm (w[24], w[25], selector); w[47] = hc_byte_perm (w[23], w[24], selector); w[46] = hc_byte_perm (w[22], w[23], selector); w[45] = hc_byte_perm (w[21], w[22], selector); w[44] = hc_byte_perm (w[20], w[21], selector); w[43] = hc_byte_perm (w[19], w[20], selector); w[42] = hc_byte_perm (w[18], w[19], selector); w[41] = hc_byte_perm (w[17], w[18], selector); w[40] = hc_byte_perm (w[16], w[17], selector); w[39] = hc_byte_perm (w[15], w[16], selector); w[38] = hc_byte_perm (w[14], w[15], selector); w[37] = hc_byte_perm (w[13], w[14], selector); w[36] = hc_byte_perm (w[12], w[13], selector); w[35] = hc_byte_perm (w[11], w[12], selector); w[34] = hc_byte_perm (w[10], w[11], selector); w[33] = hc_byte_perm (w[ 9], w[10], selector); w[32] = hc_byte_perm (w[ 8], w[ 9], selector); w[31] = hc_byte_perm (w[ 7], w[ 8], selector); w[30] = hc_byte_perm (w[ 6], w[ 7], selector); w[29] = hc_byte_perm (w[ 5], w[ 6], selector); w[28] = hc_byte_perm (w[ 4], w[ 5], selector); w[27] = hc_byte_perm (w[ 3], w[ 4], selector); w[26] = hc_byte_perm (w[ 2], w[ 3], selector); w[25] = hc_byte_perm (w[ 1], w[ 2], selector); w[24] = hc_byte_perm (w[ 0], w[ 1], selector); w[23] = hc_byte_perm ( 0, w[ 0], selector); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_byte_perm (w[38], w[39], selector); w[62] = hc_byte_perm (w[37], w[38], selector); w[61] = hc_byte_perm (w[36], w[37], selector); w[60] = hc_byte_perm (w[35], w[36], selector); w[59] = hc_byte_perm (w[34], w[35], selector); w[58] = hc_byte_perm (w[33], w[34], selector); w[57] = hc_byte_perm (w[32], w[33], selector); w[56] = hc_byte_perm (w[31], w[32], selector); w[55] = hc_byte_perm (w[30], w[31], selector); w[54] = hc_byte_perm (w[29], w[30], selector); w[53] = hc_byte_perm (w[28], w[29], selector); w[52] = hc_byte_perm (w[27], w[28], selector); w[51] = hc_byte_perm (w[26], w[27], selector); w[50] = hc_byte_perm (w[25], w[26], selector); w[49] = hc_byte_perm (w[24], w[25], selector); w[48] = hc_byte_perm (w[23], w[24], selector); w[47] = hc_byte_perm (w[22], w[23], selector); w[46] = hc_byte_perm (w[21], w[22], selector); w[45] = hc_byte_perm (w[20], w[21], selector); w[44] = hc_byte_perm (w[19], w[20], selector); w[43] = hc_byte_perm (w[18], w[19], selector); w[42] = hc_byte_perm (w[17], w[18], selector); w[41] = hc_byte_perm (w[16], w[17], selector); w[40] = hc_byte_perm (w[15], w[16], selector); w[39] = hc_byte_perm (w[14], w[15], selector); w[38] = hc_byte_perm (w[13], w[14], selector); w[37] = hc_byte_perm (w[12], w[13], selector); w[36] = hc_byte_perm (w[11], w[12], selector); w[35] = hc_byte_perm (w[10], w[11], selector); w[34] = hc_byte_perm (w[ 9], w[10], selector); w[33] = hc_byte_perm (w[ 8], w[ 9], selector); w[32] = hc_byte_perm (w[ 7], w[ 8], selector); w[31] = hc_byte_perm (w[ 6], w[ 7], selector); w[30] = hc_byte_perm (w[ 5], w[ 6], selector); w[29] = hc_byte_perm (w[ 4], w[ 5], selector); w[28] = hc_byte_perm (w[ 3], w[ 4], selector); w[27] = hc_byte_perm (w[ 2], w[ 3], selector); w[26] = hc_byte_perm (w[ 1], w[ 2], selector); w[25] = hc_byte_perm (w[ 0], w[ 1], selector); w[24] = hc_byte_perm ( 0, w[ 0], selector); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_byte_perm (w[37], w[38], selector); w[62] = hc_byte_perm (w[36], w[37], selector); w[61] = hc_byte_perm (w[35], w[36], selector); w[60] = hc_byte_perm (w[34], w[35], selector); w[59] = hc_byte_perm (w[33], w[34], selector); w[58] = hc_byte_perm (w[32], w[33], selector); w[57] = hc_byte_perm (w[31], w[32], selector); w[56] = hc_byte_perm (w[30], w[31], selector); w[55] = hc_byte_perm (w[29], w[30], selector); w[54] = hc_byte_perm (w[28], w[29], selector); w[53] = hc_byte_perm (w[27], w[28], selector); w[52] = hc_byte_perm (w[26], w[27], selector); w[51] = hc_byte_perm (w[25], w[26], selector); w[50] = hc_byte_perm (w[24], w[25], selector); w[49] = hc_byte_perm (w[23], w[24], selector); w[48] = hc_byte_perm (w[22], w[23], selector); w[47] = hc_byte_perm (w[21], w[22], selector); w[46] = hc_byte_perm (w[20], w[21], selector); w[45] = hc_byte_perm (w[19], w[20], selector); w[44] = hc_byte_perm (w[18], w[19], selector); w[43] = hc_byte_perm (w[17], w[18], selector); w[42] = hc_byte_perm (w[16], w[17], selector); w[41] = hc_byte_perm (w[15], w[16], selector); w[40] = hc_byte_perm (w[14], w[15], selector); w[39] = hc_byte_perm (w[13], w[14], selector); w[38] = hc_byte_perm (w[12], w[13], selector); w[37] = hc_byte_perm (w[11], w[12], selector); w[36] = hc_byte_perm (w[10], w[11], selector); w[35] = hc_byte_perm (w[ 9], w[10], selector); w[34] = hc_byte_perm (w[ 8], w[ 9], selector); w[33] = hc_byte_perm (w[ 7], w[ 8], selector); w[32] = hc_byte_perm (w[ 6], w[ 7], selector); w[31] = hc_byte_perm (w[ 5], w[ 6], selector); w[30] = hc_byte_perm (w[ 4], w[ 5], selector); w[29] = hc_byte_perm (w[ 3], w[ 4], selector); w[28] = hc_byte_perm (w[ 2], w[ 3], selector); w[27] = hc_byte_perm (w[ 1], w[ 2], selector); w[26] = hc_byte_perm (w[ 0], w[ 1], selector); w[25] = hc_byte_perm ( 0, w[ 0], selector); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_byte_perm (w[36], w[37], selector); w[62] = hc_byte_perm (w[35], w[36], selector); w[61] = hc_byte_perm (w[34], w[35], selector); w[60] = hc_byte_perm (w[33], w[34], selector); w[59] = hc_byte_perm (w[32], w[33], selector); w[58] = hc_byte_perm (w[31], w[32], selector); w[57] = hc_byte_perm (w[30], w[31], selector); w[56] = hc_byte_perm (w[29], w[30], selector); w[55] = hc_byte_perm (w[28], w[29], selector); w[54] = hc_byte_perm (w[27], w[28], selector); w[53] = hc_byte_perm (w[26], w[27], selector); w[52] = hc_byte_perm (w[25], w[26], selector); w[51] = hc_byte_perm (w[24], w[25], selector); w[50] = hc_byte_perm (w[23], w[24], selector); w[49] = hc_byte_perm (w[22], w[23], selector); w[48] = hc_byte_perm (w[21], w[22], selector); w[47] = hc_byte_perm (w[20], w[21], selector); w[46] = hc_byte_perm (w[19], w[20], selector); w[45] = hc_byte_perm (w[18], w[19], selector); w[44] = hc_byte_perm (w[17], w[18], selector); w[43] = hc_byte_perm (w[16], w[17], selector); w[42] = hc_byte_perm (w[15], w[16], selector); w[41] = hc_byte_perm (w[14], w[15], selector); w[40] = hc_byte_perm (w[13], w[14], selector); w[39] = hc_byte_perm (w[12], w[13], selector); w[38] = hc_byte_perm (w[11], w[12], selector); w[37] = hc_byte_perm (w[10], w[11], selector); w[36] = hc_byte_perm (w[ 9], w[10], selector); w[35] = hc_byte_perm (w[ 8], w[ 9], selector); w[34] = hc_byte_perm (w[ 7], w[ 8], selector); w[33] = hc_byte_perm (w[ 6], w[ 7], selector); w[32] = hc_byte_perm (w[ 5], w[ 6], selector); w[31] = hc_byte_perm (w[ 4], w[ 5], selector); w[30] = hc_byte_perm (w[ 3], w[ 4], selector); w[29] = hc_byte_perm (w[ 2], w[ 3], selector); w[28] = hc_byte_perm (w[ 1], w[ 2], selector); w[27] = hc_byte_perm (w[ 0], w[ 1], selector); w[26] = hc_byte_perm ( 0, w[ 0], selector); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_byte_perm (w[35], w[36], selector); w[62] = hc_byte_perm (w[34], w[35], selector); w[61] = hc_byte_perm (w[33], w[34], selector); w[60] = hc_byte_perm (w[32], w[33], selector); w[59] = hc_byte_perm (w[31], w[32], selector); w[58] = hc_byte_perm (w[30], w[31], selector); w[57] = hc_byte_perm (w[29], w[30], selector); w[56] = hc_byte_perm (w[28], w[29], selector); w[55] = hc_byte_perm (w[27], w[28], selector); w[54] = hc_byte_perm (w[26], w[27], selector); w[53] = hc_byte_perm (w[25], w[26], selector); w[52] = hc_byte_perm (w[24], w[25], selector); w[51] = hc_byte_perm (w[23], w[24], selector); w[50] = hc_byte_perm (w[22], w[23], selector); w[49] = hc_byte_perm (w[21], w[22], selector); w[48] = hc_byte_perm (w[20], w[21], selector); w[47] = hc_byte_perm (w[19], w[20], selector); w[46] = hc_byte_perm (w[18], w[19], selector); w[45] = hc_byte_perm (w[17], w[18], selector); w[44] = hc_byte_perm (w[16], w[17], selector); w[43] = hc_byte_perm (w[15], w[16], selector); w[42] = hc_byte_perm (w[14], w[15], selector); w[41] = hc_byte_perm (w[13], w[14], selector); w[40] = hc_byte_perm (w[12], w[13], selector); w[39] = hc_byte_perm (w[11], w[12], selector); w[38] = hc_byte_perm (w[10], w[11], selector); w[37] = hc_byte_perm (w[ 9], w[10], selector); w[36] = hc_byte_perm (w[ 8], w[ 9], selector); w[35] = hc_byte_perm (w[ 7], w[ 8], selector); w[34] = hc_byte_perm (w[ 6], w[ 7], selector); w[33] = hc_byte_perm (w[ 5], w[ 6], selector); w[32] = hc_byte_perm (w[ 4], w[ 5], selector); w[31] = hc_byte_perm (w[ 3], w[ 4], selector); w[30] = hc_byte_perm (w[ 2], w[ 3], selector); w[29] = hc_byte_perm (w[ 1], w[ 2], selector); w[28] = hc_byte_perm (w[ 0], w[ 1], selector); w[27] = hc_byte_perm ( 0, w[ 0], selector); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_byte_perm (w[34], w[35], selector); w[62] = hc_byte_perm (w[33], w[34], selector); w[61] = hc_byte_perm (w[32], w[33], selector); w[60] = hc_byte_perm (w[31], w[32], selector); w[59] = hc_byte_perm (w[30], w[31], selector); w[58] = hc_byte_perm (w[29], w[30], selector); w[57] = hc_byte_perm (w[28], w[29], selector); w[56] = hc_byte_perm (w[27], w[28], selector); w[55] = hc_byte_perm (w[26], w[27], selector); w[54] = hc_byte_perm (w[25], w[26], selector); w[53] = hc_byte_perm (w[24], w[25], selector); w[52] = hc_byte_perm (w[23], w[24], selector); w[51] = hc_byte_perm (w[22], w[23], selector); w[50] = hc_byte_perm (w[21], w[22], selector); w[49] = hc_byte_perm (w[20], w[21], selector); w[48] = hc_byte_perm (w[19], w[20], selector); w[47] = hc_byte_perm (w[18], w[19], selector); w[46] = hc_byte_perm (w[17], w[18], selector); w[45] = hc_byte_perm (w[16], w[17], selector); w[44] = hc_byte_perm (w[15], w[16], selector); w[43] = hc_byte_perm (w[14], w[15], selector); w[42] = hc_byte_perm (w[13], w[14], selector); w[41] = hc_byte_perm (w[12], w[13], selector); w[40] = hc_byte_perm (w[11], w[12], selector); w[39] = hc_byte_perm (w[10], w[11], selector); w[38] = hc_byte_perm (w[ 9], w[10], selector); w[37] = hc_byte_perm (w[ 8], w[ 9], selector); w[36] = hc_byte_perm (w[ 7], w[ 8], selector); w[35] = hc_byte_perm (w[ 6], w[ 7], selector); w[34] = hc_byte_perm (w[ 5], w[ 6], selector); w[33] = hc_byte_perm (w[ 4], w[ 5], selector); w[32] = hc_byte_perm (w[ 3], w[ 4], selector); w[31] = hc_byte_perm (w[ 2], w[ 3], selector); w[30] = hc_byte_perm (w[ 1], w[ 2], selector); w[29] = hc_byte_perm (w[ 0], w[ 1], selector); w[28] = hc_byte_perm ( 0, w[ 0], selector); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_byte_perm (w[33], w[34], selector); w[62] = hc_byte_perm (w[32], w[33], selector); w[61] = hc_byte_perm (w[31], w[32], selector); w[60] = hc_byte_perm (w[30], w[31], selector); w[59] = hc_byte_perm (w[29], w[30], selector); w[58] = hc_byte_perm (w[28], w[29], selector); w[57] = hc_byte_perm (w[27], w[28], selector); w[56] = hc_byte_perm (w[26], w[27], selector); w[55] = hc_byte_perm (w[25], w[26], selector); w[54] = hc_byte_perm (w[24], w[25], selector); w[53] = hc_byte_perm (w[23], w[24], selector); w[52] = hc_byte_perm (w[22], w[23], selector); w[51] = hc_byte_perm (w[21], w[22], selector); w[50] = hc_byte_perm (w[20], w[21], selector); w[49] = hc_byte_perm (w[19], w[20], selector); w[48] = hc_byte_perm (w[18], w[19], selector); w[47] = hc_byte_perm (w[17], w[18], selector); w[46] = hc_byte_perm (w[16], w[17], selector); w[45] = hc_byte_perm (w[15], w[16], selector); w[44] = hc_byte_perm (w[14], w[15], selector); w[43] = hc_byte_perm (w[13], w[14], selector); w[42] = hc_byte_perm (w[12], w[13], selector); w[41] = hc_byte_perm (w[11], w[12], selector); w[40] = hc_byte_perm (w[10], w[11], selector); w[39] = hc_byte_perm (w[ 9], w[10], selector); w[38] = hc_byte_perm (w[ 8], w[ 9], selector); w[37] = hc_byte_perm (w[ 7], w[ 8], selector); w[36] = hc_byte_perm (w[ 6], w[ 7], selector); w[35] = hc_byte_perm (w[ 5], w[ 6], selector); w[34] = hc_byte_perm (w[ 4], w[ 5], selector); w[33] = hc_byte_perm (w[ 3], w[ 4], selector); w[32] = hc_byte_perm (w[ 2], w[ 3], selector); w[31] = hc_byte_perm (w[ 1], w[ 2], selector); w[30] = hc_byte_perm (w[ 0], w[ 1], selector); w[29] = hc_byte_perm ( 0, w[ 0], selector); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_byte_perm (w[32], w[33], selector); w[62] = hc_byte_perm (w[31], w[32], selector); w[61] = hc_byte_perm (w[30], w[31], selector); w[60] = hc_byte_perm (w[29], w[30], selector); w[59] = hc_byte_perm (w[28], w[29], selector); w[58] = hc_byte_perm (w[27], w[28], selector); w[57] = hc_byte_perm (w[26], w[27], selector); w[56] = hc_byte_perm (w[25], w[26], selector); w[55] = hc_byte_perm (w[24], w[25], selector); w[54] = hc_byte_perm (w[23], w[24], selector); w[53] = hc_byte_perm (w[22], w[23], selector); w[52] = hc_byte_perm (w[21], w[22], selector); w[51] = hc_byte_perm (w[20], w[21], selector); w[50] = hc_byte_perm (w[19], w[20], selector); w[49] = hc_byte_perm (w[18], w[19], selector); w[48] = hc_byte_perm (w[17], w[18], selector); w[47] = hc_byte_perm (w[16], w[17], selector); w[46] = hc_byte_perm (w[15], w[16], selector); w[45] = hc_byte_perm (w[14], w[15], selector); w[44] = hc_byte_perm (w[13], w[14], selector); w[43] = hc_byte_perm (w[12], w[13], selector); w[42] = hc_byte_perm (w[11], w[12], selector); w[41] = hc_byte_perm (w[10], w[11], selector); w[40] = hc_byte_perm (w[ 9], w[10], selector); w[39] = hc_byte_perm (w[ 8], w[ 9], selector); w[38] = hc_byte_perm (w[ 7], w[ 8], selector); w[37] = hc_byte_perm (w[ 6], w[ 7], selector); w[36] = hc_byte_perm (w[ 5], w[ 6], selector); w[35] = hc_byte_perm (w[ 4], w[ 5], selector); w[34] = hc_byte_perm (w[ 3], w[ 4], selector); w[33] = hc_byte_perm (w[ 2], w[ 3], selector); w[32] = hc_byte_perm (w[ 1], w[ 2], selector); w[31] = hc_byte_perm (w[ 0], w[ 1], selector); w[30] = hc_byte_perm ( 0, w[ 0], selector); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_byte_perm (w[31], w[32], selector); w[62] = hc_byte_perm (w[30], w[31], selector); w[61] = hc_byte_perm (w[29], w[30], selector); w[60] = hc_byte_perm (w[28], w[29], selector); w[59] = hc_byte_perm (w[27], w[28], selector); w[58] = hc_byte_perm (w[26], w[27], selector); w[57] = hc_byte_perm (w[25], w[26], selector); w[56] = hc_byte_perm (w[24], w[25], selector); w[55] = hc_byte_perm (w[23], w[24], selector); w[54] = hc_byte_perm (w[22], w[23], selector); w[53] = hc_byte_perm (w[21], w[22], selector); w[52] = hc_byte_perm (w[20], w[21], selector); w[51] = hc_byte_perm (w[19], w[20], selector); w[50] = hc_byte_perm (w[18], w[19], selector); w[49] = hc_byte_perm (w[17], w[18], selector); w[48] = hc_byte_perm (w[16], w[17], selector); w[47] = hc_byte_perm (w[15], w[16], selector); w[46] = hc_byte_perm (w[14], w[15], selector); w[45] = hc_byte_perm (w[13], w[14], selector); w[44] = hc_byte_perm (w[12], w[13], selector); w[43] = hc_byte_perm (w[11], w[12], selector); w[42] = hc_byte_perm (w[10], w[11], selector); w[41] = hc_byte_perm (w[ 9], w[10], selector); w[40] = hc_byte_perm (w[ 8], w[ 9], selector); w[39] = hc_byte_perm (w[ 7], w[ 8], selector); w[38] = hc_byte_perm (w[ 6], w[ 7], selector); w[37] = hc_byte_perm (w[ 5], w[ 6], selector); w[36] = hc_byte_perm (w[ 4], w[ 5], selector); w[35] = hc_byte_perm (w[ 3], w[ 4], selector); w[34] = hc_byte_perm (w[ 2], w[ 3], selector); w[33] = hc_byte_perm (w[ 1], w[ 2], selector); w[32] = hc_byte_perm (w[ 0], w[ 1], selector); w[31] = hc_byte_perm ( 0, w[ 0], selector); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_byte_perm (w[30], w[31], selector); w[62] = hc_byte_perm (w[29], w[30], selector); w[61] = hc_byte_perm (w[28], w[29], selector); w[60] = hc_byte_perm (w[27], w[28], selector); w[59] = hc_byte_perm (w[26], w[27], selector); w[58] = hc_byte_perm (w[25], w[26], selector); w[57] = hc_byte_perm (w[24], w[25], selector); w[56] = hc_byte_perm (w[23], w[24], selector); w[55] = hc_byte_perm (w[22], w[23], selector); w[54] = hc_byte_perm (w[21], w[22], selector); w[53] = hc_byte_perm (w[20], w[21], selector); w[52] = hc_byte_perm (w[19], w[20], selector); w[51] = hc_byte_perm (w[18], w[19], selector); w[50] = hc_byte_perm (w[17], w[18], selector); w[49] = hc_byte_perm (w[16], w[17], selector); w[48] = hc_byte_perm (w[15], w[16], selector); w[47] = hc_byte_perm (w[14], w[15], selector); w[46] = hc_byte_perm (w[13], w[14], selector); w[45] = hc_byte_perm (w[12], w[13], selector); w[44] = hc_byte_perm (w[11], w[12], selector); w[43] = hc_byte_perm (w[10], w[11], selector); w[42] = hc_byte_perm (w[ 9], w[10], selector); w[41] = hc_byte_perm (w[ 8], w[ 9], selector); w[40] = hc_byte_perm (w[ 7], w[ 8], selector); w[39] = hc_byte_perm (w[ 6], w[ 7], selector); w[38] = hc_byte_perm (w[ 5], w[ 6], selector); w[37] = hc_byte_perm (w[ 4], w[ 5], selector); w[36] = hc_byte_perm (w[ 3], w[ 4], selector); w[35] = hc_byte_perm (w[ 2], w[ 3], selector); w[34] = hc_byte_perm (w[ 1], w[ 2], selector); w[33] = hc_byte_perm (w[ 0], w[ 1], selector); w[32] = hc_byte_perm ( 0, w[ 0], selector); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_byte_perm (w[29], w[30], selector); w[62] = hc_byte_perm (w[28], w[29], selector); w[61] = hc_byte_perm (w[27], w[28], selector); w[60] = hc_byte_perm (w[26], w[27], selector); w[59] = hc_byte_perm (w[25], w[26], selector); w[58] = hc_byte_perm (w[24], w[25], selector); w[57] = hc_byte_perm (w[23], w[24], selector); w[56] = hc_byte_perm (w[22], w[23], selector); w[55] = hc_byte_perm (w[21], w[22], selector); w[54] = hc_byte_perm (w[20], w[21], selector); w[53] = hc_byte_perm (w[19], w[20], selector); w[52] = hc_byte_perm (w[18], w[19], selector); w[51] = hc_byte_perm (w[17], w[18], selector); w[50] = hc_byte_perm (w[16], w[17], selector); w[49] = hc_byte_perm (w[15], w[16], selector); w[48] = hc_byte_perm (w[14], w[15], selector); w[47] = hc_byte_perm (w[13], w[14], selector); w[46] = hc_byte_perm (w[12], w[13], selector); w[45] = hc_byte_perm (w[11], w[12], selector); w[44] = hc_byte_perm (w[10], w[11], selector); w[43] = hc_byte_perm (w[ 9], w[10], selector); w[42] = hc_byte_perm (w[ 8], w[ 9], selector); w[41] = hc_byte_perm (w[ 7], w[ 8], selector); w[40] = hc_byte_perm (w[ 6], w[ 7], selector); w[39] = hc_byte_perm (w[ 5], w[ 6], selector); w[38] = hc_byte_perm (w[ 4], w[ 5], selector); w[37] = hc_byte_perm (w[ 3], w[ 4], selector); w[36] = hc_byte_perm (w[ 2], w[ 3], selector); w[35] = hc_byte_perm (w[ 1], w[ 2], selector); w[34] = hc_byte_perm (w[ 0], w[ 1], selector); w[33] = hc_byte_perm ( 0, w[ 0], selector); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_byte_perm (w[28], w[29], selector); w[62] = hc_byte_perm (w[27], w[28], selector); w[61] = hc_byte_perm (w[26], w[27], selector); w[60] = hc_byte_perm (w[25], w[26], selector); w[59] = hc_byte_perm (w[24], w[25], selector); w[58] = hc_byte_perm (w[23], w[24], selector); w[57] = hc_byte_perm (w[22], w[23], selector); w[56] = hc_byte_perm (w[21], w[22], selector); w[55] = hc_byte_perm (w[20], w[21], selector); w[54] = hc_byte_perm (w[19], w[20], selector); w[53] = hc_byte_perm (w[18], w[19], selector); w[52] = hc_byte_perm (w[17], w[18], selector); w[51] = hc_byte_perm (w[16], w[17], selector); w[50] = hc_byte_perm (w[15], w[16], selector); w[49] = hc_byte_perm (w[14], w[15], selector); w[48] = hc_byte_perm (w[13], w[14], selector); w[47] = hc_byte_perm (w[12], w[13], selector); w[46] = hc_byte_perm (w[11], w[12], selector); w[45] = hc_byte_perm (w[10], w[11], selector); w[44] = hc_byte_perm (w[ 9], w[10], selector); w[43] = hc_byte_perm (w[ 8], w[ 9], selector); w[42] = hc_byte_perm (w[ 7], w[ 8], selector); w[41] = hc_byte_perm (w[ 6], w[ 7], selector); w[40] = hc_byte_perm (w[ 5], w[ 6], selector); w[39] = hc_byte_perm (w[ 4], w[ 5], selector); w[38] = hc_byte_perm (w[ 3], w[ 4], selector); w[37] = hc_byte_perm (w[ 2], w[ 3], selector); w[36] = hc_byte_perm (w[ 1], w[ 2], selector); w[35] = hc_byte_perm (w[ 0], w[ 1], selector); w[34] = hc_byte_perm ( 0, w[ 0], selector); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_byte_perm (w[27], w[28], selector); w[62] = hc_byte_perm (w[26], w[27], selector); w[61] = hc_byte_perm (w[25], w[26], selector); w[60] = hc_byte_perm (w[24], w[25], selector); w[59] = hc_byte_perm (w[23], w[24], selector); w[58] = hc_byte_perm (w[22], w[23], selector); w[57] = hc_byte_perm (w[21], w[22], selector); w[56] = hc_byte_perm (w[20], w[21], selector); w[55] = hc_byte_perm (w[19], w[20], selector); w[54] = hc_byte_perm (w[18], w[19], selector); w[53] = hc_byte_perm (w[17], w[18], selector); w[52] = hc_byte_perm (w[16], w[17], selector); w[51] = hc_byte_perm (w[15], w[16], selector); w[50] = hc_byte_perm (w[14], w[15], selector); w[49] = hc_byte_perm (w[13], w[14], selector); w[48] = hc_byte_perm (w[12], w[13], selector); w[47] = hc_byte_perm (w[11], w[12], selector); w[46] = hc_byte_perm (w[10], w[11], selector); w[45] = hc_byte_perm (w[ 9], w[10], selector); w[44] = hc_byte_perm (w[ 8], w[ 9], selector); w[43] = hc_byte_perm (w[ 7], w[ 8], selector); w[42] = hc_byte_perm (w[ 6], w[ 7], selector); w[41] = hc_byte_perm (w[ 5], w[ 6], selector); w[40] = hc_byte_perm (w[ 4], w[ 5], selector); w[39] = hc_byte_perm (w[ 3], w[ 4], selector); w[38] = hc_byte_perm (w[ 2], w[ 3], selector); w[37] = hc_byte_perm (w[ 1], w[ 2], selector); w[36] = hc_byte_perm (w[ 0], w[ 1], selector); w[35] = hc_byte_perm ( 0, w[ 0], selector); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_byte_perm (w[26], w[27], selector); w[62] = hc_byte_perm (w[25], w[26], selector); w[61] = hc_byte_perm (w[24], w[25], selector); w[60] = hc_byte_perm (w[23], w[24], selector); w[59] = hc_byte_perm (w[22], w[23], selector); w[58] = hc_byte_perm (w[21], w[22], selector); w[57] = hc_byte_perm (w[20], w[21], selector); w[56] = hc_byte_perm (w[19], w[20], selector); w[55] = hc_byte_perm (w[18], w[19], selector); w[54] = hc_byte_perm (w[17], w[18], selector); w[53] = hc_byte_perm (w[16], w[17], selector); w[52] = hc_byte_perm (w[15], w[16], selector); w[51] = hc_byte_perm (w[14], w[15], selector); w[50] = hc_byte_perm (w[13], w[14], selector); w[49] = hc_byte_perm (w[12], w[13], selector); w[48] = hc_byte_perm (w[11], w[12], selector); w[47] = hc_byte_perm (w[10], w[11], selector); w[46] = hc_byte_perm (w[ 9], w[10], selector); w[45] = hc_byte_perm (w[ 8], w[ 9], selector); w[44] = hc_byte_perm (w[ 7], w[ 8], selector); w[43] = hc_byte_perm (w[ 6], w[ 7], selector); w[42] = hc_byte_perm (w[ 5], w[ 6], selector); w[41] = hc_byte_perm (w[ 4], w[ 5], selector); w[40] = hc_byte_perm (w[ 3], w[ 4], selector); w[39] = hc_byte_perm (w[ 2], w[ 3], selector); w[38] = hc_byte_perm (w[ 1], w[ 2], selector); w[37] = hc_byte_perm (w[ 0], w[ 1], selector); w[36] = hc_byte_perm ( 0, w[ 0], selector); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_byte_perm (w[25], w[26], selector); w[62] = hc_byte_perm (w[24], w[25], selector); w[61] = hc_byte_perm (w[23], w[24], selector); w[60] = hc_byte_perm (w[22], w[23], selector); w[59] = hc_byte_perm (w[21], w[22], selector); w[58] = hc_byte_perm (w[20], w[21], selector); w[57] = hc_byte_perm (w[19], w[20], selector); w[56] = hc_byte_perm (w[18], w[19], selector); w[55] = hc_byte_perm (w[17], w[18], selector); w[54] = hc_byte_perm (w[16], w[17], selector); w[53] = hc_byte_perm (w[15], w[16], selector); w[52] = hc_byte_perm (w[14], w[15], selector); w[51] = hc_byte_perm (w[13], w[14], selector); w[50] = hc_byte_perm (w[12], w[13], selector); w[49] = hc_byte_perm (w[11], w[12], selector); w[48] = hc_byte_perm (w[10], w[11], selector); w[47] = hc_byte_perm (w[ 9], w[10], selector); w[46] = hc_byte_perm (w[ 8], w[ 9], selector); w[45] = hc_byte_perm (w[ 7], w[ 8], selector); w[44] = hc_byte_perm (w[ 6], w[ 7], selector); w[43] = hc_byte_perm (w[ 5], w[ 6], selector); w[42] = hc_byte_perm (w[ 4], w[ 5], selector); w[41] = hc_byte_perm (w[ 3], w[ 4], selector); w[40] = hc_byte_perm (w[ 2], w[ 3], selector); w[39] = hc_byte_perm (w[ 1], w[ 2], selector); w[38] = hc_byte_perm (w[ 0], w[ 1], selector); w[37] = hc_byte_perm ( 0, w[ 0], selector); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_byte_perm (w[24], w[25], selector); w[62] = hc_byte_perm (w[23], w[24], selector); w[61] = hc_byte_perm (w[22], w[23], selector); w[60] = hc_byte_perm (w[21], w[22], selector); w[59] = hc_byte_perm (w[20], w[21], selector); w[58] = hc_byte_perm (w[19], w[20], selector); w[57] = hc_byte_perm (w[18], w[19], selector); w[56] = hc_byte_perm (w[17], w[18], selector); w[55] = hc_byte_perm (w[16], w[17], selector); w[54] = hc_byte_perm (w[15], w[16], selector); w[53] = hc_byte_perm (w[14], w[15], selector); w[52] = hc_byte_perm (w[13], w[14], selector); w[51] = hc_byte_perm (w[12], w[13], selector); w[50] = hc_byte_perm (w[11], w[12], selector); w[49] = hc_byte_perm (w[10], w[11], selector); w[48] = hc_byte_perm (w[ 9], w[10], selector); w[47] = hc_byte_perm (w[ 8], w[ 9], selector); w[46] = hc_byte_perm (w[ 7], w[ 8], selector); w[45] = hc_byte_perm (w[ 6], w[ 7], selector); w[44] = hc_byte_perm (w[ 5], w[ 6], selector); w[43] = hc_byte_perm (w[ 4], w[ 5], selector); w[42] = hc_byte_perm (w[ 3], w[ 4], selector); w[41] = hc_byte_perm (w[ 2], w[ 3], selector); w[40] = hc_byte_perm (w[ 1], w[ 2], selector); w[39] = hc_byte_perm (w[ 0], w[ 1], selector); w[38] = hc_byte_perm ( 0, w[ 0], selector); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_byte_perm (w[23], w[24], selector); w[62] = hc_byte_perm (w[22], w[23], selector); w[61] = hc_byte_perm (w[21], w[22], selector); w[60] = hc_byte_perm (w[20], w[21], selector); w[59] = hc_byte_perm (w[19], w[20], selector); w[58] = hc_byte_perm (w[18], w[19], selector); w[57] = hc_byte_perm (w[17], w[18], selector); w[56] = hc_byte_perm (w[16], w[17], selector); w[55] = hc_byte_perm (w[15], w[16], selector); w[54] = hc_byte_perm (w[14], w[15], selector); w[53] = hc_byte_perm (w[13], w[14], selector); w[52] = hc_byte_perm (w[12], w[13], selector); w[51] = hc_byte_perm (w[11], w[12], selector); w[50] = hc_byte_perm (w[10], w[11], selector); w[49] = hc_byte_perm (w[ 9], w[10], selector); w[48] = hc_byte_perm (w[ 8], w[ 9], selector); w[47] = hc_byte_perm (w[ 7], w[ 8], selector); w[46] = hc_byte_perm (w[ 6], w[ 7], selector); w[45] = hc_byte_perm (w[ 5], w[ 6], selector); w[44] = hc_byte_perm (w[ 4], w[ 5], selector); w[43] = hc_byte_perm (w[ 3], w[ 4], selector); w[42] = hc_byte_perm (w[ 2], w[ 3], selector); w[41] = hc_byte_perm (w[ 1], w[ 2], selector); w[40] = hc_byte_perm (w[ 0], w[ 1], selector); w[39] = hc_byte_perm ( 0, w[ 0], selector); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_byte_perm (w[22], w[23], selector); w[62] = hc_byte_perm (w[21], w[22], selector); w[61] = hc_byte_perm (w[20], w[21], selector); w[60] = hc_byte_perm (w[19], w[20], selector); w[59] = hc_byte_perm (w[18], w[19], selector); w[58] = hc_byte_perm (w[17], w[18], selector); w[57] = hc_byte_perm (w[16], w[17], selector); w[56] = hc_byte_perm (w[15], w[16], selector); w[55] = hc_byte_perm (w[14], w[15], selector); w[54] = hc_byte_perm (w[13], w[14], selector); w[53] = hc_byte_perm (w[12], w[13], selector); w[52] = hc_byte_perm (w[11], w[12], selector); w[51] = hc_byte_perm (w[10], w[11], selector); w[50] = hc_byte_perm (w[ 9], w[10], selector); w[49] = hc_byte_perm (w[ 8], w[ 9], selector); w[48] = hc_byte_perm (w[ 7], w[ 8], selector); w[47] = hc_byte_perm (w[ 6], w[ 7], selector); w[46] = hc_byte_perm (w[ 5], w[ 6], selector); w[45] = hc_byte_perm (w[ 4], w[ 5], selector); w[44] = hc_byte_perm (w[ 3], w[ 4], selector); w[43] = hc_byte_perm (w[ 2], w[ 3], selector); w[42] = hc_byte_perm (w[ 1], w[ 2], selector); w[41] = hc_byte_perm (w[ 0], w[ 1], selector); w[40] = hc_byte_perm ( 0, w[ 0], selector); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_byte_perm (w[21], w[22], selector); w[62] = hc_byte_perm (w[20], w[21], selector); w[61] = hc_byte_perm (w[19], w[20], selector); w[60] = hc_byte_perm (w[18], w[19], selector); w[59] = hc_byte_perm (w[17], w[18], selector); w[58] = hc_byte_perm (w[16], w[17], selector); w[57] = hc_byte_perm (w[15], w[16], selector); w[56] = hc_byte_perm (w[14], w[15], selector); w[55] = hc_byte_perm (w[13], w[14], selector); w[54] = hc_byte_perm (w[12], w[13], selector); w[53] = hc_byte_perm (w[11], w[12], selector); w[52] = hc_byte_perm (w[10], w[11], selector); w[51] = hc_byte_perm (w[ 9], w[10], selector); w[50] = hc_byte_perm (w[ 8], w[ 9], selector); w[49] = hc_byte_perm (w[ 7], w[ 8], selector); w[48] = hc_byte_perm (w[ 6], w[ 7], selector); w[47] = hc_byte_perm (w[ 5], w[ 6], selector); w[46] = hc_byte_perm (w[ 4], w[ 5], selector); w[45] = hc_byte_perm (w[ 3], w[ 4], selector); w[44] = hc_byte_perm (w[ 2], w[ 3], selector); w[43] = hc_byte_perm (w[ 1], w[ 2], selector); w[42] = hc_byte_perm (w[ 0], w[ 1], selector); w[41] = hc_byte_perm ( 0, w[ 0], selector); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_byte_perm (w[20], w[21], selector); w[62] = hc_byte_perm (w[19], w[20], selector); w[61] = hc_byte_perm (w[18], w[19], selector); w[60] = hc_byte_perm (w[17], w[18], selector); w[59] = hc_byte_perm (w[16], w[17], selector); w[58] = hc_byte_perm (w[15], w[16], selector); w[57] = hc_byte_perm (w[14], w[15], selector); w[56] = hc_byte_perm (w[13], w[14], selector); w[55] = hc_byte_perm (w[12], w[13], selector); w[54] = hc_byte_perm (w[11], w[12], selector); w[53] = hc_byte_perm (w[10], w[11], selector); w[52] = hc_byte_perm (w[ 9], w[10], selector); w[51] = hc_byte_perm (w[ 8], w[ 9], selector); w[50] = hc_byte_perm (w[ 7], w[ 8], selector); w[49] = hc_byte_perm (w[ 6], w[ 7], selector); w[48] = hc_byte_perm (w[ 5], w[ 6], selector); w[47] = hc_byte_perm (w[ 4], w[ 5], selector); w[46] = hc_byte_perm (w[ 3], w[ 4], selector); w[45] = hc_byte_perm (w[ 2], w[ 3], selector); w[44] = hc_byte_perm (w[ 1], w[ 2], selector); w[43] = hc_byte_perm (w[ 0], w[ 1], selector); w[42] = hc_byte_perm ( 0, w[ 0], selector); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_byte_perm (w[19], w[20], selector); w[62] = hc_byte_perm (w[18], w[19], selector); w[61] = hc_byte_perm (w[17], w[18], selector); w[60] = hc_byte_perm (w[16], w[17], selector); w[59] = hc_byte_perm (w[15], w[16], selector); w[58] = hc_byte_perm (w[14], w[15], selector); w[57] = hc_byte_perm (w[13], w[14], selector); w[56] = hc_byte_perm (w[12], w[13], selector); w[55] = hc_byte_perm (w[11], w[12], selector); w[54] = hc_byte_perm (w[10], w[11], selector); w[53] = hc_byte_perm (w[ 9], w[10], selector); w[52] = hc_byte_perm (w[ 8], w[ 9], selector); w[51] = hc_byte_perm (w[ 7], w[ 8], selector); w[50] = hc_byte_perm (w[ 6], w[ 7], selector); w[49] = hc_byte_perm (w[ 5], w[ 6], selector); w[48] = hc_byte_perm (w[ 4], w[ 5], selector); w[47] = hc_byte_perm (w[ 3], w[ 4], selector); w[46] = hc_byte_perm (w[ 2], w[ 3], selector); w[45] = hc_byte_perm (w[ 1], w[ 2], selector); w[44] = hc_byte_perm (w[ 0], w[ 1], selector); w[43] = hc_byte_perm ( 0, w[ 0], selector); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_byte_perm (w[18], w[19], selector); w[62] = hc_byte_perm (w[17], w[18], selector); w[61] = hc_byte_perm (w[16], w[17], selector); w[60] = hc_byte_perm (w[15], w[16], selector); w[59] = hc_byte_perm (w[14], w[15], selector); w[58] = hc_byte_perm (w[13], w[14], selector); w[57] = hc_byte_perm (w[12], w[13], selector); w[56] = hc_byte_perm (w[11], w[12], selector); w[55] = hc_byte_perm (w[10], w[11], selector); w[54] = hc_byte_perm (w[ 9], w[10], selector); w[53] = hc_byte_perm (w[ 8], w[ 9], selector); w[52] = hc_byte_perm (w[ 7], w[ 8], selector); w[51] = hc_byte_perm (w[ 6], w[ 7], selector); w[50] = hc_byte_perm (w[ 5], w[ 6], selector); w[49] = hc_byte_perm (w[ 4], w[ 5], selector); w[48] = hc_byte_perm (w[ 3], w[ 4], selector); w[47] = hc_byte_perm (w[ 2], w[ 3], selector); w[46] = hc_byte_perm (w[ 1], w[ 2], selector); w[45] = hc_byte_perm (w[ 0], w[ 1], selector); w[44] = hc_byte_perm ( 0, w[ 0], selector); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_byte_perm (w[17], w[18], selector); w[62] = hc_byte_perm (w[16], w[17], selector); w[61] = hc_byte_perm (w[15], w[16], selector); w[60] = hc_byte_perm (w[14], w[15], selector); w[59] = hc_byte_perm (w[13], w[14], selector); w[58] = hc_byte_perm (w[12], w[13], selector); w[57] = hc_byte_perm (w[11], w[12], selector); w[56] = hc_byte_perm (w[10], w[11], selector); w[55] = hc_byte_perm (w[ 9], w[10], selector); w[54] = hc_byte_perm (w[ 8], w[ 9], selector); w[53] = hc_byte_perm (w[ 7], w[ 8], selector); w[52] = hc_byte_perm (w[ 6], w[ 7], selector); w[51] = hc_byte_perm (w[ 5], w[ 6], selector); w[50] = hc_byte_perm (w[ 4], w[ 5], selector); w[49] = hc_byte_perm (w[ 3], w[ 4], selector); w[48] = hc_byte_perm (w[ 2], w[ 3], selector); w[47] = hc_byte_perm (w[ 1], w[ 2], selector); w[46] = hc_byte_perm (w[ 0], w[ 1], selector); w[45] = hc_byte_perm ( 0, w[ 0], selector); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_byte_perm (w[16], w[17], selector); w[62] = hc_byte_perm (w[15], w[16], selector); w[61] = hc_byte_perm (w[14], w[15], selector); w[60] = hc_byte_perm (w[13], w[14], selector); w[59] = hc_byte_perm (w[12], w[13], selector); w[58] = hc_byte_perm (w[11], w[12], selector); w[57] = hc_byte_perm (w[10], w[11], selector); w[56] = hc_byte_perm (w[ 9], w[10], selector); w[55] = hc_byte_perm (w[ 8], w[ 9], selector); w[54] = hc_byte_perm (w[ 7], w[ 8], selector); w[53] = hc_byte_perm (w[ 6], w[ 7], selector); w[52] = hc_byte_perm (w[ 5], w[ 6], selector); w[51] = hc_byte_perm (w[ 4], w[ 5], selector); w[50] = hc_byte_perm (w[ 3], w[ 4], selector); w[49] = hc_byte_perm (w[ 2], w[ 3], selector); w[48] = hc_byte_perm (w[ 1], w[ 2], selector); w[47] = hc_byte_perm (w[ 0], w[ 1], selector); w[46] = hc_byte_perm ( 0, w[ 0], selector); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_byte_perm (w[15], w[16], selector); w[62] = hc_byte_perm (w[14], w[15], selector); w[61] = hc_byte_perm (w[13], w[14], selector); w[60] = hc_byte_perm (w[12], w[13], selector); w[59] = hc_byte_perm (w[11], w[12], selector); w[58] = hc_byte_perm (w[10], w[11], selector); w[57] = hc_byte_perm (w[ 9], w[10], selector); w[56] = hc_byte_perm (w[ 8], w[ 9], selector); w[55] = hc_byte_perm (w[ 7], w[ 8], selector); w[54] = hc_byte_perm (w[ 6], w[ 7], selector); w[53] = hc_byte_perm (w[ 5], w[ 6], selector); w[52] = hc_byte_perm (w[ 4], w[ 5], selector); w[51] = hc_byte_perm (w[ 3], w[ 4], selector); w[50] = hc_byte_perm (w[ 2], w[ 3], selector); w[49] = hc_byte_perm (w[ 1], w[ 2], selector); w[48] = hc_byte_perm (w[ 0], w[ 1], selector); w[47] = hc_byte_perm ( 0, w[ 0], selector); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_byte_perm (w[14], w[15], selector); w[62] = hc_byte_perm (w[13], w[14], selector); w[61] = hc_byte_perm (w[12], w[13], selector); w[60] = hc_byte_perm (w[11], w[12], selector); w[59] = hc_byte_perm (w[10], w[11], selector); w[58] = hc_byte_perm (w[ 9], w[10], selector); w[57] = hc_byte_perm (w[ 8], w[ 9], selector); w[56] = hc_byte_perm (w[ 7], w[ 8], selector); w[55] = hc_byte_perm (w[ 6], w[ 7], selector); w[54] = hc_byte_perm (w[ 5], w[ 6], selector); w[53] = hc_byte_perm (w[ 4], w[ 5], selector); w[52] = hc_byte_perm (w[ 3], w[ 4], selector); w[51] = hc_byte_perm (w[ 2], w[ 3], selector); w[50] = hc_byte_perm (w[ 1], w[ 2], selector); w[49] = hc_byte_perm (w[ 0], w[ 1], selector); w[48] = hc_byte_perm ( 0, w[ 0], selector); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_byte_perm (w[13], w[14], selector); w[62] = hc_byte_perm (w[12], w[13], selector); w[61] = hc_byte_perm (w[11], w[12], selector); w[60] = hc_byte_perm (w[10], w[11], selector); w[59] = hc_byte_perm (w[ 9], w[10], selector); w[58] = hc_byte_perm (w[ 8], w[ 9], selector); w[57] = hc_byte_perm (w[ 7], w[ 8], selector); w[56] = hc_byte_perm (w[ 6], w[ 7], selector); w[55] = hc_byte_perm (w[ 5], w[ 6], selector); w[54] = hc_byte_perm (w[ 4], w[ 5], selector); w[53] = hc_byte_perm (w[ 3], w[ 4], selector); w[52] = hc_byte_perm (w[ 2], w[ 3], selector); w[51] = hc_byte_perm (w[ 1], w[ 2], selector); w[50] = hc_byte_perm (w[ 0], w[ 1], selector); w[49] = hc_byte_perm ( 0, w[ 0], selector); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_byte_perm (w[12], w[13], selector); w[62] = hc_byte_perm (w[11], w[12], selector); w[61] = hc_byte_perm (w[10], w[11], selector); w[60] = hc_byte_perm (w[ 9], w[10], selector); w[59] = hc_byte_perm (w[ 8], w[ 9], selector); w[58] = hc_byte_perm (w[ 7], w[ 8], selector); w[57] = hc_byte_perm (w[ 6], w[ 7], selector); w[56] = hc_byte_perm (w[ 5], w[ 6], selector); w[55] = hc_byte_perm (w[ 4], w[ 5], selector); w[54] = hc_byte_perm (w[ 3], w[ 4], selector); w[53] = hc_byte_perm (w[ 2], w[ 3], selector); w[52] = hc_byte_perm (w[ 1], w[ 2], selector); w[51] = hc_byte_perm (w[ 0], w[ 1], selector); w[50] = hc_byte_perm ( 0, w[ 0], selector); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_byte_perm (w[11], w[12], selector); w[62] = hc_byte_perm (w[10], w[11], selector); w[61] = hc_byte_perm (w[ 9], w[10], selector); w[60] = hc_byte_perm (w[ 8], w[ 9], selector); w[59] = hc_byte_perm (w[ 7], w[ 8], selector); w[58] = hc_byte_perm (w[ 6], w[ 7], selector); w[57] = hc_byte_perm (w[ 5], w[ 6], selector); w[56] = hc_byte_perm (w[ 4], w[ 5], selector); w[55] = hc_byte_perm (w[ 3], w[ 4], selector); w[54] = hc_byte_perm (w[ 2], w[ 3], selector); w[53] = hc_byte_perm (w[ 1], w[ 2], selector); w[52] = hc_byte_perm (w[ 0], w[ 1], selector); w[51] = hc_byte_perm ( 0, w[ 0], selector); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_byte_perm (w[10], w[11], selector); w[62] = hc_byte_perm (w[ 9], w[10], selector); w[61] = hc_byte_perm (w[ 8], w[ 9], selector); w[60] = hc_byte_perm (w[ 7], w[ 8], selector); w[59] = hc_byte_perm (w[ 6], w[ 7], selector); w[58] = hc_byte_perm (w[ 5], w[ 6], selector); w[57] = hc_byte_perm (w[ 4], w[ 5], selector); w[56] = hc_byte_perm (w[ 3], w[ 4], selector); w[55] = hc_byte_perm (w[ 2], w[ 3], selector); w[54] = hc_byte_perm (w[ 1], w[ 2], selector); w[53] = hc_byte_perm (w[ 0], w[ 1], selector); w[52] = hc_byte_perm ( 0, w[ 0], selector); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_byte_perm (w[ 9], w[10], selector); w[62] = hc_byte_perm (w[ 8], w[ 9], selector); w[61] = hc_byte_perm (w[ 7], w[ 8], selector); w[60] = hc_byte_perm (w[ 6], w[ 7], selector); w[59] = hc_byte_perm (w[ 5], w[ 6], selector); w[58] = hc_byte_perm (w[ 4], w[ 5], selector); w[57] = hc_byte_perm (w[ 3], w[ 4], selector); w[56] = hc_byte_perm (w[ 2], w[ 3], selector); w[55] = hc_byte_perm (w[ 1], w[ 2], selector); w[54] = hc_byte_perm (w[ 0], w[ 1], selector); w[53] = hc_byte_perm ( 0, w[ 0], selector); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_byte_perm (w[ 8], w[ 9], selector); w[62] = hc_byte_perm (w[ 7], w[ 8], selector); w[61] = hc_byte_perm (w[ 6], w[ 7], selector); w[60] = hc_byte_perm (w[ 5], w[ 6], selector); w[59] = hc_byte_perm (w[ 4], w[ 5], selector); w[58] = hc_byte_perm (w[ 3], w[ 4], selector); w[57] = hc_byte_perm (w[ 2], w[ 3], selector); w[56] = hc_byte_perm (w[ 1], w[ 2], selector); w[55] = hc_byte_perm (w[ 0], w[ 1], selector); w[54] = hc_byte_perm ( 0, w[ 0], selector); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_byte_perm (w[ 7], w[ 8], selector); w[62] = hc_byte_perm (w[ 6], w[ 7], selector); w[61] = hc_byte_perm (w[ 5], w[ 6], selector); w[60] = hc_byte_perm (w[ 4], w[ 5], selector); w[59] = hc_byte_perm (w[ 3], w[ 4], selector); w[58] = hc_byte_perm (w[ 2], w[ 3], selector); w[57] = hc_byte_perm (w[ 1], w[ 2], selector); w[56] = hc_byte_perm (w[ 0], w[ 1], selector); w[55] = hc_byte_perm ( 0, w[ 0], selector); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_byte_perm (w[ 6], w[ 7], selector); w[62] = hc_byte_perm (w[ 5], w[ 6], selector); w[61] = hc_byte_perm (w[ 4], w[ 5], selector); w[60] = hc_byte_perm (w[ 3], w[ 4], selector); w[59] = hc_byte_perm (w[ 2], w[ 3], selector); w[58] = hc_byte_perm (w[ 1], w[ 2], selector); w[57] = hc_byte_perm (w[ 0], w[ 1], selector); w[56] = hc_byte_perm ( 0, w[ 0], selector); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_byte_perm (w[ 5], w[ 6], selector); w[62] = hc_byte_perm (w[ 4], w[ 5], selector); w[61] = hc_byte_perm (w[ 3], w[ 4], selector); w[60] = hc_byte_perm (w[ 2], w[ 3], selector); w[59] = hc_byte_perm (w[ 1], w[ 2], selector); w[58] = hc_byte_perm (w[ 0], w[ 1], selector); w[57] = hc_byte_perm ( 0, w[ 0], selector); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_byte_perm (w[ 4], w[ 5], selector); w[62] = hc_byte_perm (w[ 3], w[ 4], selector); w[61] = hc_byte_perm (w[ 2], w[ 3], selector); w[60] = hc_byte_perm (w[ 1], w[ 2], selector); w[59] = hc_byte_perm (w[ 0], w[ 1], selector); w[58] = hc_byte_perm ( 0, w[ 0], selector); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_byte_perm (w[ 3], w[ 4], selector); w[62] = hc_byte_perm (w[ 2], w[ 3], selector); w[61] = hc_byte_perm (w[ 1], w[ 2], selector); w[60] = hc_byte_perm (w[ 0], w[ 1], selector); w[59] = hc_byte_perm ( 0, w[ 0], selector); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_byte_perm (w[ 2], w[ 3], selector); w[62] = hc_byte_perm (w[ 1], w[ 2], selector); w[61] = hc_byte_perm (w[ 0], w[ 1], selector); w[60] = hc_byte_perm ( 0, w[ 0], selector); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_byte_perm (w[ 1], w[ 2], selector); w[62] = hc_byte_perm (w[ 0], w[ 1], selector); w[61] = hc_byte_perm ( 0, w[ 0], selector); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_byte_perm (w[ 0], w[ 1], selector); w[62] = hc_byte_perm ( 0, w[ 0], selector); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_byte_perm ( 0, w[ 0], selector); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_1x64_be (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w[63] = hc_bytealign_be (w[62], w[63], offset); w[62] = hc_bytealign_be (w[61], w[62], offset); w[61] = hc_bytealign_be (w[60], w[61], offset); w[60] = hc_bytealign_be (w[59], w[60], offset); w[59] = hc_bytealign_be (w[58], w[59], offset); w[58] = hc_bytealign_be (w[57], w[58], offset); w[57] = hc_bytealign_be (w[56], w[57], offset); w[56] = hc_bytealign_be (w[55], w[56], offset); w[55] = hc_bytealign_be (w[54], w[55], offset); w[54] = hc_bytealign_be (w[53], w[54], offset); w[53] = hc_bytealign_be (w[52], w[53], offset); w[52] = hc_bytealign_be (w[51], w[52], offset); w[51] = hc_bytealign_be (w[50], w[51], offset); w[50] = hc_bytealign_be (w[49], w[50], offset); w[49] = hc_bytealign_be (w[48], w[49], offset); w[48] = hc_bytealign_be (w[47], w[48], offset); w[47] = hc_bytealign_be (w[46], w[47], offset); w[46] = hc_bytealign_be (w[45], w[46], offset); w[45] = hc_bytealign_be (w[44], w[45], offset); w[44] = hc_bytealign_be (w[43], w[44], offset); w[43] = hc_bytealign_be (w[42], w[43], offset); w[42] = hc_bytealign_be (w[41], w[42], offset); w[41] = hc_bytealign_be (w[40], w[41], offset); w[40] = hc_bytealign_be (w[39], w[40], offset); w[39] = hc_bytealign_be (w[38], w[39], offset); w[38] = hc_bytealign_be (w[37], w[38], offset); w[37] = hc_bytealign_be (w[36], w[37], offset); w[36] = hc_bytealign_be (w[35], w[36], offset); w[35] = hc_bytealign_be (w[34], w[35], offset); w[34] = hc_bytealign_be (w[33], w[34], offset); w[33] = hc_bytealign_be (w[32], w[33], offset); w[32] = hc_bytealign_be (w[31], w[32], offset); w[31] = hc_bytealign_be (w[30], w[31], offset); w[30] = hc_bytealign_be (w[29], w[30], offset); w[29] = hc_bytealign_be (w[28], w[29], offset); w[28] = hc_bytealign_be (w[27], w[28], offset); w[27] = hc_bytealign_be (w[26], w[27], offset); w[26] = hc_bytealign_be (w[25], w[26], offset); w[25] = hc_bytealign_be (w[24], w[25], offset); w[24] = hc_bytealign_be (w[23], w[24], offset); w[23] = hc_bytealign_be (w[22], w[23], offset); w[22] = hc_bytealign_be (w[21], w[22], offset); w[21] = hc_bytealign_be (w[20], w[21], offset); w[20] = hc_bytealign_be (w[19], w[20], offset); w[19] = hc_bytealign_be (w[18], w[19], offset); w[18] = hc_bytealign_be (w[17], w[18], offset); w[17] = hc_bytealign_be (w[16], w[17], offset); w[16] = hc_bytealign_be (w[15], w[16], offset); w[15] = hc_bytealign_be (w[14], w[15], offset); w[14] = hc_bytealign_be (w[13], w[14], offset); w[13] = hc_bytealign_be (w[12], w[13], offset); w[12] = hc_bytealign_be (w[11], w[12], offset); w[11] = hc_bytealign_be (w[10], w[11], offset); w[10] = hc_bytealign_be (w[ 9], w[10], offset); w[ 9] = hc_bytealign_be (w[ 8], w[ 9], offset); w[ 8] = hc_bytealign_be (w[ 7], w[ 8], offset); w[ 7] = hc_bytealign_be (w[ 6], w[ 7], offset); w[ 6] = hc_bytealign_be (w[ 5], w[ 6], offset); w[ 5] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 4] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 3] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 2] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 1] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 0] = hc_bytealign_be ( 0, w[ 0], offset); break; case 1: w[63] = hc_bytealign_be (w[61], w[62], offset); w[62] = hc_bytealign_be (w[60], w[61], offset); w[61] = hc_bytealign_be (w[59], w[60], offset); w[60] = hc_bytealign_be (w[58], w[59], offset); w[59] = hc_bytealign_be (w[57], w[58], offset); w[58] = hc_bytealign_be (w[56], w[57], offset); w[57] = hc_bytealign_be (w[55], w[56], offset); w[56] = hc_bytealign_be (w[54], w[55], offset); w[55] = hc_bytealign_be (w[53], w[54], offset); w[54] = hc_bytealign_be (w[52], w[53], offset); w[53] = hc_bytealign_be (w[51], w[52], offset); w[52] = hc_bytealign_be (w[50], w[51], offset); w[51] = hc_bytealign_be (w[49], w[50], offset); w[50] = hc_bytealign_be (w[48], w[49], offset); w[49] = hc_bytealign_be (w[47], w[48], offset); w[48] = hc_bytealign_be (w[46], w[47], offset); w[47] = hc_bytealign_be (w[45], w[46], offset); w[46] = hc_bytealign_be (w[44], w[45], offset); w[45] = hc_bytealign_be (w[43], w[44], offset); w[44] = hc_bytealign_be (w[42], w[43], offset); w[43] = hc_bytealign_be (w[41], w[42], offset); w[42] = hc_bytealign_be (w[40], w[41], offset); w[41] = hc_bytealign_be (w[39], w[40], offset); w[40] = hc_bytealign_be (w[38], w[39], offset); w[39] = hc_bytealign_be (w[37], w[38], offset); w[38] = hc_bytealign_be (w[36], w[37], offset); w[37] = hc_bytealign_be (w[35], w[36], offset); w[36] = hc_bytealign_be (w[34], w[35], offset); w[35] = hc_bytealign_be (w[33], w[34], offset); w[34] = hc_bytealign_be (w[32], w[33], offset); w[33] = hc_bytealign_be (w[31], w[32], offset); w[32] = hc_bytealign_be (w[30], w[31], offset); w[31] = hc_bytealign_be (w[29], w[30], offset); w[30] = hc_bytealign_be (w[28], w[29], offset); w[29] = hc_bytealign_be (w[27], w[28], offset); w[28] = hc_bytealign_be (w[26], w[27], offset); w[27] = hc_bytealign_be (w[25], w[26], offset); w[26] = hc_bytealign_be (w[24], w[25], offset); w[25] = hc_bytealign_be (w[23], w[24], offset); w[24] = hc_bytealign_be (w[22], w[23], offset); w[23] = hc_bytealign_be (w[21], w[22], offset); w[22] = hc_bytealign_be (w[20], w[21], offset); w[21] = hc_bytealign_be (w[19], w[20], offset); w[20] = hc_bytealign_be (w[18], w[19], offset); w[19] = hc_bytealign_be (w[17], w[18], offset); w[18] = hc_bytealign_be (w[16], w[17], offset); w[17] = hc_bytealign_be (w[15], w[16], offset); w[16] = hc_bytealign_be (w[14], w[15], offset); w[15] = hc_bytealign_be (w[13], w[14], offset); w[14] = hc_bytealign_be (w[12], w[13], offset); w[13] = hc_bytealign_be (w[11], w[12], offset); w[12] = hc_bytealign_be (w[10], w[11], offset); w[11] = hc_bytealign_be (w[ 9], w[10], offset); w[10] = hc_bytealign_be (w[ 8], w[ 9], offset); w[ 9] = hc_bytealign_be (w[ 7], w[ 8], offset); w[ 8] = hc_bytealign_be (w[ 6], w[ 7], offset); w[ 7] = hc_bytealign_be (w[ 5], w[ 6], offset); w[ 6] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 5] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 4] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 3] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 2] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 1] = hc_bytealign_be ( 0, w[ 0], offset); w[ 0] = 0; break; case 2: w[63] = hc_bytealign_be (w[60], w[61], offset); w[62] = hc_bytealign_be (w[59], w[60], offset); w[61] = hc_bytealign_be (w[58], w[59], offset); w[60] = hc_bytealign_be (w[57], w[58], offset); w[59] = hc_bytealign_be (w[56], w[57], offset); w[58] = hc_bytealign_be (w[55], w[56], offset); w[57] = hc_bytealign_be (w[54], w[55], offset); w[56] = hc_bytealign_be (w[53], w[54], offset); w[55] = hc_bytealign_be (w[52], w[53], offset); w[54] = hc_bytealign_be (w[51], w[52], offset); w[53] = hc_bytealign_be (w[50], w[51], offset); w[52] = hc_bytealign_be (w[49], w[50], offset); w[51] = hc_bytealign_be (w[48], w[49], offset); w[50] = hc_bytealign_be (w[47], w[48], offset); w[49] = hc_bytealign_be (w[46], w[47], offset); w[48] = hc_bytealign_be (w[45], w[46], offset); w[47] = hc_bytealign_be (w[44], w[45], offset); w[46] = hc_bytealign_be (w[43], w[44], offset); w[45] = hc_bytealign_be (w[42], w[43], offset); w[44] = hc_bytealign_be (w[41], w[42], offset); w[43] = hc_bytealign_be (w[40], w[41], offset); w[42] = hc_bytealign_be (w[39], w[40], offset); w[41] = hc_bytealign_be (w[38], w[39], offset); w[40] = hc_bytealign_be (w[37], w[38], offset); w[39] = hc_bytealign_be (w[36], w[37], offset); w[38] = hc_bytealign_be (w[35], w[36], offset); w[37] = hc_bytealign_be (w[34], w[35], offset); w[36] = hc_bytealign_be (w[33], w[34], offset); w[35] = hc_bytealign_be (w[32], w[33], offset); w[34] = hc_bytealign_be (w[31], w[32], offset); w[33] = hc_bytealign_be (w[30], w[31], offset); w[32] = hc_bytealign_be (w[29], w[30], offset); w[31] = hc_bytealign_be (w[28], w[29], offset); w[30] = hc_bytealign_be (w[27], w[28], offset); w[29] = hc_bytealign_be (w[26], w[27], offset); w[28] = hc_bytealign_be (w[25], w[26], offset); w[27] = hc_bytealign_be (w[24], w[25], offset); w[26] = hc_bytealign_be (w[23], w[24], offset); w[25] = hc_bytealign_be (w[22], w[23], offset); w[24] = hc_bytealign_be (w[21], w[22], offset); w[23] = hc_bytealign_be (w[20], w[21], offset); w[22] = hc_bytealign_be (w[19], w[20], offset); w[21] = hc_bytealign_be (w[18], w[19], offset); w[20] = hc_bytealign_be (w[17], w[18], offset); w[19] = hc_bytealign_be (w[16], w[17], offset); w[18] = hc_bytealign_be (w[15], w[16], offset); w[17] = hc_bytealign_be (w[14], w[15], offset); w[16] = hc_bytealign_be (w[13], w[14], offset); w[15] = hc_bytealign_be (w[12], w[13], offset); w[14] = hc_bytealign_be (w[11], w[12], offset); w[13] = hc_bytealign_be (w[10], w[11], offset); w[12] = hc_bytealign_be (w[ 9], w[10], offset); w[11] = hc_bytealign_be (w[ 8], w[ 9], offset); w[10] = hc_bytealign_be (w[ 7], w[ 8], offset); w[ 9] = hc_bytealign_be (w[ 6], w[ 7], offset); w[ 8] = hc_bytealign_be (w[ 5], w[ 6], offset); w[ 7] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 6] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 5] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 4] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 3] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 2] = hc_bytealign_be ( 0, w[ 0], offset); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_bytealign_be (w[59], w[60], offset); w[62] = hc_bytealign_be (w[58], w[59], offset); w[61] = hc_bytealign_be (w[57], w[58], offset); w[60] = hc_bytealign_be (w[56], w[57], offset); w[59] = hc_bytealign_be (w[55], w[56], offset); w[58] = hc_bytealign_be (w[54], w[55], offset); w[57] = hc_bytealign_be (w[53], w[54], offset); w[56] = hc_bytealign_be (w[52], w[53], offset); w[55] = hc_bytealign_be (w[51], w[52], offset); w[54] = hc_bytealign_be (w[50], w[51], offset); w[53] = hc_bytealign_be (w[49], w[50], offset); w[52] = hc_bytealign_be (w[48], w[49], offset); w[51] = hc_bytealign_be (w[47], w[48], offset); w[50] = hc_bytealign_be (w[46], w[47], offset); w[49] = hc_bytealign_be (w[45], w[46], offset); w[48] = hc_bytealign_be (w[44], w[45], offset); w[47] = hc_bytealign_be (w[43], w[44], offset); w[46] = hc_bytealign_be (w[42], w[43], offset); w[45] = hc_bytealign_be (w[41], w[42], offset); w[44] = hc_bytealign_be (w[40], w[41], offset); w[43] = hc_bytealign_be (w[39], w[40], offset); w[42] = hc_bytealign_be (w[38], w[39], offset); w[41] = hc_bytealign_be (w[37], w[38], offset); w[40] = hc_bytealign_be (w[36], w[37], offset); w[39] = hc_bytealign_be (w[35], w[36], offset); w[38] = hc_bytealign_be (w[34], w[35], offset); w[37] = hc_bytealign_be (w[33], w[34], offset); w[36] = hc_bytealign_be (w[32], w[33], offset); w[35] = hc_bytealign_be (w[31], w[32], offset); w[34] = hc_bytealign_be (w[30], w[31], offset); w[33] = hc_bytealign_be (w[29], w[30], offset); w[32] = hc_bytealign_be (w[28], w[29], offset); w[31] = hc_bytealign_be (w[27], w[28], offset); w[30] = hc_bytealign_be (w[26], w[27], offset); w[29] = hc_bytealign_be (w[25], w[26], offset); w[28] = hc_bytealign_be (w[24], w[25], offset); w[27] = hc_bytealign_be (w[23], w[24], offset); w[26] = hc_bytealign_be (w[22], w[23], offset); w[25] = hc_bytealign_be (w[21], w[22], offset); w[24] = hc_bytealign_be (w[20], w[21], offset); w[23] = hc_bytealign_be (w[19], w[20], offset); w[22] = hc_bytealign_be (w[18], w[19], offset); w[21] = hc_bytealign_be (w[17], w[18], offset); w[20] = hc_bytealign_be (w[16], w[17], offset); w[19] = hc_bytealign_be (w[15], w[16], offset); w[18] = hc_bytealign_be (w[14], w[15], offset); w[17] = hc_bytealign_be (w[13], w[14], offset); w[16] = hc_bytealign_be (w[12], w[13], offset); w[15] = hc_bytealign_be (w[11], w[12], offset); w[14] = hc_bytealign_be (w[10], w[11], offset); w[13] = hc_bytealign_be (w[ 9], w[10], offset); w[12] = hc_bytealign_be (w[ 8], w[ 9], offset); w[11] = hc_bytealign_be (w[ 7], w[ 8], offset); w[10] = hc_bytealign_be (w[ 6], w[ 7], offset); w[ 9] = hc_bytealign_be (w[ 5], w[ 6], offset); w[ 8] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 7] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 6] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 5] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 4] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 3] = hc_bytealign_be ( 0, w[ 0], offset); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_bytealign_be (w[58], w[59], offset); w[62] = hc_bytealign_be (w[57], w[58], offset); w[61] = hc_bytealign_be (w[56], w[57], offset); w[60] = hc_bytealign_be (w[55], w[56], offset); w[59] = hc_bytealign_be (w[54], w[55], offset); w[58] = hc_bytealign_be (w[53], w[54], offset); w[57] = hc_bytealign_be (w[52], w[53], offset); w[56] = hc_bytealign_be (w[51], w[52], offset); w[55] = hc_bytealign_be (w[50], w[51], offset); w[54] = hc_bytealign_be (w[49], w[50], offset); w[53] = hc_bytealign_be (w[48], w[49], offset); w[52] = hc_bytealign_be (w[47], w[48], offset); w[51] = hc_bytealign_be (w[46], w[47], offset); w[50] = hc_bytealign_be (w[45], w[46], offset); w[49] = hc_bytealign_be (w[44], w[45], offset); w[48] = hc_bytealign_be (w[43], w[44], offset); w[47] = hc_bytealign_be (w[42], w[43], offset); w[46] = hc_bytealign_be (w[41], w[42], offset); w[45] = hc_bytealign_be (w[40], w[41], offset); w[44] = hc_bytealign_be (w[39], w[40], offset); w[43] = hc_bytealign_be (w[38], w[39], offset); w[42] = hc_bytealign_be (w[37], w[38], offset); w[41] = hc_bytealign_be (w[36], w[37], offset); w[40] = hc_bytealign_be (w[35], w[36], offset); w[39] = hc_bytealign_be (w[34], w[35], offset); w[38] = hc_bytealign_be (w[33], w[34], offset); w[37] = hc_bytealign_be (w[32], w[33], offset); w[36] = hc_bytealign_be (w[31], w[32], offset); w[35] = hc_bytealign_be (w[30], w[31], offset); w[34] = hc_bytealign_be (w[29], w[30], offset); w[33] = hc_bytealign_be (w[28], w[29], offset); w[32] = hc_bytealign_be (w[27], w[28], offset); w[31] = hc_bytealign_be (w[26], w[27], offset); w[30] = hc_bytealign_be (w[25], w[26], offset); w[29] = hc_bytealign_be (w[24], w[25], offset); w[28] = hc_bytealign_be (w[23], w[24], offset); w[27] = hc_bytealign_be (w[22], w[23], offset); w[26] = hc_bytealign_be (w[21], w[22], offset); w[25] = hc_bytealign_be (w[20], w[21], offset); w[24] = hc_bytealign_be (w[19], w[20], offset); w[23] = hc_bytealign_be (w[18], w[19], offset); w[22] = hc_bytealign_be (w[17], w[18], offset); w[21] = hc_bytealign_be (w[16], w[17], offset); w[20] = hc_bytealign_be (w[15], w[16], offset); w[19] = hc_bytealign_be (w[14], w[15], offset); w[18] = hc_bytealign_be (w[13], w[14], offset); w[17] = hc_bytealign_be (w[12], w[13], offset); w[16] = hc_bytealign_be (w[11], w[12], offset); w[15] = hc_bytealign_be (w[10], w[11], offset); w[14] = hc_bytealign_be (w[ 9], w[10], offset); w[13] = hc_bytealign_be (w[ 8], w[ 9], offset); w[12] = hc_bytealign_be (w[ 7], w[ 8], offset); w[11] = hc_bytealign_be (w[ 6], w[ 7], offset); w[10] = hc_bytealign_be (w[ 5], w[ 6], offset); w[ 9] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 8] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 7] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 6] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 5] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 4] = hc_bytealign_be ( 0, w[ 0], offset); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_bytealign_be (w[57], w[58], offset); w[62] = hc_bytealign_be (w[56], w[57], offset); w[61] = hc_bytealign_be (w[55], w[56], offset); w[60] = hc_bytealign_be (w[54], w[55], offset); w[59] = hc_bytealign_be (w[53], w[54], offset); w[58] = hc_bytealign_be (w[52], w[53], offset); w[57] = hc_bytealign_be (w[51], w[52], offset); w[56] = hc_bytealign_be (w[50], w[51], offset); w[55] = hc_bytealign_be (w[49], w[50], offset); w[54] = hc_bytealign_be (w[48], w[49], offset); w[53] = hc_bytealign_be (w[47], w[48], offset); w[52] = hc_bytealign_be (w[46], w[47], offset); w[51] = hc_bytealign_be (w[45], w[46], offset); w[50] = hc_bytealign_be (w[44], w[45], offset); w[49] = hc_bytealign_be (w[43], w[44], offset); w[48] = hc_bytealign_be (w[42], w[43], offset); w[47] = hc_bytealign_be (w[41], w[42], offset); w[46] = hc_bytealign_be (w[40], w[41], offset); w[45] = hc_bytealign_be (w[39], w[40], offset); w[44] = hc_bytealign_be (w[38], w[39], offset); w[43] = hc_bytealign_be (w[37], w[38], offset); w[42] = hc_bytealign_be (w[36], w[37], offset); w[41] = hc_bytealign_be (w[35], w[36], offset); w[40] = hc_bytealign_be (w[34], w[35], offset); w[39] = hc_bytealign_be (w[33], w[34], offset); w[38] = hc_bytealign_be (w[32], w[33], offset); w[37] = hc_bytealign_be (w[31], w[32], offset); w[36] = hc_bytealign_be (w[30], w[31], offset); w[35] = hc_bytealign_be (w[29], w[30], offset); w[34] = hc_bytealign_be (w[28], w[29], offset); w[33] = hc_bytealign_be (w[27], w[28], offset); w[32] = hc_bytealign_be (w[26], w[27], offset); w[31] = hc_bytealign_be (w[25], w[26], offset); w[30] = hc_bytealign_be (w[24], w[25], offset); w[29] = hc_bytealign_be (w[23], w[24], offset); w[28] = hc_bytealign_be (w[22], w[23], offset); w[27] = hc_bytealign_be (w[21], w[22], offset); w[26] = hc_bytealign_be (w[20], w[21], offset); w[25] = hc_bytealign_be (w[19], w[20], offset); w[24] = hc_bytealign_be (w[18], w[19], offset); w[23] = hc_bytealign_be (w[17], w[18], offset); w[22] = hc_bytealign_be (w[16], w[17], offset); w[21] = hc_bytealign_be (w[15], w[16], offset); w[20] = hc_bytealign_be (w[14], w[15], offset); w[19] = hc_bytealign_be (w[13], w[14], offset); w[18] = hc_bytealign_be (w[12], w[13], offset); w[17] = hc_bytealign_be (w[11], w[12], offset); w[16] = hc_bytealign_be (w[10], w[11], offset); w[15] = hc_bytealign_be (w[ 9], w[10], offset); w[14] = hc_bytealign_be (w[ 8], w[ 9], offset); w[13] = hc_bytealign_be (w[ 7], w[ 8], offset); w[12] = hc_bytealign_be (w[ 6], w[ 7], offset); w[11] = hc_bytealign_be (w[ 5], w[ 6], offset); w[10] = hc_bytealign_be (w[ 4], w[ 5], offset); w[ 9] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 8] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 7] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 6] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 5] = hc_bytealign_be ( 0, w[ 0], offset); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_bytealign_be (w[56], w[57], offset); w[62] = hc_bytealign_be (w[55], w[56], offset); w[61] = hc_bytealign_be (w[54], w[55], offset); w[60] = hc_bytealign_be (w[53], w[54], offset); w[59] = hc_bytealign_be (w[52], w[53], offset); w[58] = hc_bytealign_be (w[51], w[52], offset); w[57] = hc_bytealign_be (w[50], w[51], offset); w[56] = hc_bytealign_be (w[49], w[50], offset); w[55] = hc_bytealign_be (w[48], w[49], offset); w[54] = hc_bytealign_be (w[47], w[48], offset); w[53] = hc_bytealign_be (w[46], w[47], offset); w[52] = hc_bytealign_be (w[45], w[46], offset); w[51] = hc_bytealign_be (w[44], w[45], offset); w[50] = hc_bytealign_be (w[43], w[44], offset); w[49] = hc_bytealign_be (w[42], w[43], offset); w[48] = hc_bytealign_be (w[41], w[42], offset); w[47] = hc_bytealign_be (w[40], w[41], offset); w[46] = hc_bytealign_be (w[39], w[40], offset); w[45] = hc_bytealign_be (w[38], w[39], offset); w[44] = hc_bytealign_be (w[37], w[38], offset); w[43] = hc_bytealign_be (w[36], w[37], offset); w[42] = hc_bytealign_be (w[35], w[36], offset); w[41] = hc_bytealign_be (w[34], w[35], offset); w[40] = hc_bytealign_be (w[33], w[34], offset); w[39] = hc_bytealign_be (w[32], w[33], offset); w[38] = hc_bytealign_be (w[31], w[32], offset); w[37] = hc_bytealign_be (w[30], w[31], offset); w[36] = hc_bytealign_be (w[29], w[30], offset); w[35] = hc_bytealign_be (w[28], w[29], offset); w[34] = hc_bytealign_be (w[27], w[28], offset); w[33] = hc_bytealign_be (w[26], w[27], offset); w[32] = hc_bytealign_be (w[25], w[26], offset); w[31] = hc_bytealign_be (w[24], w[25], offset); w[30] = hc_bytealign_be (w[23], w[24], offset); w[29] = hc_bytealign_be (w[22], w[23], offset); w[28] = hc_bytealign_be (w[21], w[22], offset); w[27] = hc_bytealign_be (w[20], w[21], offset); w[26] = hc_bytealign_be (w[19], w[20], offset); w[25] = hc_bytealign_be (w[18], w[19], offset); w[24] = hc_bytealign_be (w[17], w[18], offset); w[23] = hc_bytealign_be (w[16], w[17], offset); w[22] = hc_bytealign_be (w[15], w[16], offset); w[21] = hc_bytealign_be (w[14], w[15], offset); w[20] = hc_bytealign_be (w[13], w[14], offset); w[19] = hc_bytealign_be (w[12], w[13], offset); w[18] = hc_bytealign_be (w[11], w[12], offset); w[17] = hc_bytealign_be (w[10], w[11], offset); w[16] = hc_bytealign_be (w[ 9], w[10], offset); w[15] = hc_bytealign_be (w[ 8], w[ 9], offset); w[14] = hc_bytealign_be (w[ 7], w[ 8], offset); w[13] = hc_bytealign_be (w[ 6], w[ 7], offset); w[12] = hc_bytealign_be (w[ 5], w[ 6], offset); w[11] = hc_bytealign_be (w[ 4], w[ 5], offset); w[10] = hc_bytealign_be (w[ 3], w[ 4], offset); w[ 9] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 8] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 7] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 6] = hc_bytealign_be ( 0, w[ 0], offset); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_bytealign_be (w[55], w[56], offset); w[62] = hc_bytealign_be (w[54], w[55], offset); w[61] = hc_bytealign_be (w[53], w[54], offset); w[60] = hc_bytealign_be (w[52], w[53], offset); w[59] = hc_bytealign_be (w[51], w[52], offset); w[58] = hc_bytealign_be (w[50], w[51], offset); w[57] = hc_bytealign_be (w[49], w[50], offset); w[56] = hc_bytealign_be (w[48], w[49], offset); w[55] = hc_bytealign_be (w[47], w[48], offset); w[54] = hc_bytealign_be (w[46], w[47], offset); w[53] = hc_bytealign_be (w[45], w[46], offset); w[52] = hc_bytealign_be (w[44], w[45], offset); w[51] = hc_bytealign_be (w[43], w[44], offset); w[50] = hc_bytealign_be (w[42], w[43], offset); w[49] = hc_bytealign_be (w[41], w[42], offset); w[48] = hc_bytealign_be (w[40], w[41], offset); w[47] = hc_bytealign_be (w[39], w[40], offset); w[46] = hc_bytealign_be (w[38], w[39], offset); w[45] = hc_bytealign_be (w[37], w[38], offset); w[44] = hc_bytealign_be (w[36], w[37], offset); w[43] = hc_bytealign_be (w[35], w[36], offset); w[42] = hc_bytealign_be (w[34], w[35], offset); w[41] = hc_bytealign_be (w[33], w[34], offset); w[40] = hc_bytealign_be (w[32], w[33], offset); w[39] = hc_bytealign_be (w[31], w[32], offset); w[38] = hc_bytealign_be (w[30], w[31], offset); w[37] = hc_bytealign_be (w[29], w[30], offset); w[36] = hc_bytealign_be (w[28], w[29], offset); w[35] = hc_bytealign_be (w[27], w[28], offset); w[34] = hc_bytealign_be (w[26], w[27], offset); w[33] = hc_bytealign_be (w[25], w[26], offset); w[32] = hc_bytealign_be (w[24], w[25], offset); w[31] = hc_bytealign_be (w[23], w[24], offset); w[30] = hc_bytealign_be (w[22], w[23], offset); w[29] = hc_bytealign_be (w[21], w[22], offset); w[28] = hc_bytealign_be (w[20], w[21], offset); w[27] = hc_bytealign_be (w[19], w[20], offset); w[26] = hc_bytealign_be (w[18], w[19], offset); w[25] = hc_bytealign_be (w[17], w[18], offset); w[24] = hc_bytealign_be (w[16], w[17], offset); w[23] = hc_bytealign_be (w[15], w[16], offset); w[22] = hc_bytealign_be (w[14], w[15], offset); w[21] = hc_bytealign_be (w[13], w[14], offset); w[20] = hc_bytealign_be (w[12], w[13], offset); w[19] = hc_bytealign_be (w[11], w[12], offset); w[18] = hc_bytealign_be (w[10], w[11], offset); w[17] = hc_bytealign_be (w[ 9], w[10], offset); w[16] = hc_bytealign_be (w[ 8], w[ 9], offset); w[15] = hc_bytealign_be (w[ 7], w[ 8], offset); w[14] = hc_bytealign_be (w[ 6], w[ 7], offset); w[13] = hc_bytealign_be (w[ 5], w[ 6], offset); w[12] = hc_bytealign_be (w[ 4], w[ 5], offset); w[11] = hc_bytealign_be (w[ 3], w[ 4], offset); w[10] = hc_bytealign_be (w[ 2], w[ 3], offset); w[ 9] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 8] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 7] = hc_bytealign_be ( 0, w[ 0], offset); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_bytealign_be (w[54], w[55], offset); w[62] = hc_bytealign_be (w[53], w[54], offset); w[61] = hc_bytealign_be (w[52], w[53], offset); w[60] = hc_bytealign_be (w[51], w[52], offset); w[59] = hc_bytealign_be (w[50], w[51], offset); w[58] = hc_bytealign_be (w[49], w[50], offset); w[57] = hc_bytealign_be (w[48], w[49], offset); w[56] = hc_bytealign_be (w[47], w[48], offset); w[55] = hc_bytealign_be (w[46], w[47], offset); w[54] = hc_bytealign_be (w[45], w[46], offset); w[53] = hc_bytealign_be (w[44], w[45], offset); w[52] = hc_bytealign_be (w[43], w[44], offset); w[51] = hc_bytealign_be (w[42], w[43], offset); w[50] = hc_bytealign_be (w[41], w[42], offset); w[49] = hc_bytealign_be (w[40], w[41], offset); w[48] = hc_bytealign_be (w[39], w[40], offset); w[47] = hc_bytealign_be (w[38], w[39], offset); w[46] = hc_bytealign_be (w[37], w[38], offset); w[45] = hc_bytealign_be (w[36], w[37], offset); w[44] = hc_bytealign_be (w[35], w[36], offset); w[43] = hc_bytealign_be (w[34], w[35], offset); w[42] = hc_bytealign_be (w[33], w[34], offset); w[41] = hc_bytealign_be (w[32], w[33], offset); w[40] = hc_bytealign_be (w[31], w[32], offset); w[39] = hc_bytealign_be (w[30], w[31], offset); w[38] = hc_bytealign_be (w[29], w[30], offset); w[37] = hc_bytealign_be (w[28], w[29], offset); w[36] = hc_bytealign_be (w[27], w[28], offset); w[35] = hc_bytealign_be (w[26], w[27], offset); w[34] = hc_bytealign_be (w[25], w[26], offset); w[33] = hc_bytealign_be (w[24], w[25], offset); w[32] = hc_bytealign_be (w[23], w[24], offset); w[31] = hc_bytealign_be (w[22], w[23], offset); w[30] = hc_bytealign_be (w[21], w[22], offset); w[29] = hc_bytealign_be (w[20], w[21], offset); w[28] = hc_bytealign_be (w[19], w[20], offset); w[27] = hc_bytealign_be (w[18], w[19], offset); w[26] = hc_bytealign_be (w[17], w[18], offset); w[25] = hc_bytealign_be (w[16], w[17], offset); w[24] = hc_bytealign_be (w[15], w[16], offset); w[23] = hc_bytealign_be (w[14], w[15], offset); w[22] = hc_bytealign_be (w[13], w[14], offset); w[21] = hc_bytealign_be (w[12], w[13], offset); w[20] = hc_bytealign_be (w[11], w[12], offset); w[19] = hc_bytealign_be (w[10], w[11], offset); w[18] = hc_bytealign_be (w[ 9], w[10], offset); w[17] = hc_bytealign_be (w[ 8], w[ 9], offset); w[16] = hc_bytealign_be (w[ 7], w[ 8], offset); w[15] = hc_bytealign_be (w[ 6], w[ 7], offset); w[14] = hc_bytealign_be (w[ 5], w[ 6], offset); w[13] = hc_bytealign_be (w[ 4], w[ 5], offset); w[12] = hc_bytealign_be (w[ 3], w[ 4], offset); w[11] = hc_bytealign_be (w[ 2], w[ 3], offset); w[10] = hc_bytealign_be (w[ 1], w[ 2], offset); w[ 9] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 8] = hc_bytealign_be ( 0, w[ 0], offset); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_bytealign_be (w[53], w[54], offset); w[62] = hc_bytealign_be (w[52], w[53], offset); w[61] = hc_bytealign_be (w[51], w[52], offset); w[60] = hc_bytealign_be (w[50], w[51], offset); w[59] = hc_bytealign_be (w[49], w[50], offset); w[58] = hc_bytealign_be (w[48], w[49], offset); w[57] = hc_bytealign_be (w[47], w[48], offset); w[56] = hc_bytealign_be (w[46], w[47], offset); w[55] = hc_bytealign_be (w[45], w[46], offset); w[54] = hc_bytealign_be (w[44], w[45], offset); w[53] = hc_bytealign_be (w[43], w[44], offset); w[52] = hc_bytealign_be (w[42], w[43], offset); w[51] = hc_bytealign_be (w[41], w[42], offset); w[50] = hc_bytealign_be (w[40], w[41], offset); w[49] = hc_bytealign_be (w[39], w[40], offset); w[48] = hc_bytealign_be (w[38], w[39], offset); w[47] = hc_bytealign_be (w[37], w[38], offset); w[46] = hc_bytealign_be (w[36], w[37], offset); w[45] = hc_bytealign_be (w[35], w[36], offset); w[44] = hc_bytealign_be (w[34], w[35], offset); w[43] = hc_bytealign_be (w[33], w[34], offset); w[42] = hc_bytealign_be (w[32], w[33], offset); w[41] = hc_bytealign_be (w[31], w[32], offset); w[40] = hc_bytealign_be (w[30], w[31], offset); w[39] = hc_bytealign_be (w[29], w[30], offset); w[38] = hc_bytealign_be (w[28], w[29], offset); w[37] = hc_bytealign_be (w[27], w[28], offset); w[36] = hc_bytealign_be (w[26], w[27], offset); w[35] = hc_bytealign_be (w[25], w[26], offset); w[34] = hc_bytealign_be (w[24], w[25], offset); w[33] = hc_bytealign_be (w[23], w[24], offset); w[32] = hc_bytealign_be (w[22], w[23], offset); w[31] = hc_bytealign_be (w[21], w[22], offset); w[30] = hc_bytealign_be (w[20], w[21], offset); w[29] = hc_bytealign_be (w[19], w[20], offset); w[28] = hc_bytealign_be (w[18], w[19], offset); w[27] = hc_bytealign_be (w[17], w[18], offset); w[26] = hc_bytealign_be (w[16], w[17], offset); w[25] = hc_bytealign_be (w[15], w[16], offset); w[24] = hc_bytealign_be (w[14], w[15], offset); w[23] = hc_bytealign_be (w[13], w[14], offset); w[22] = hc_bytealign_be (w[12], w[13], offset); w[21] = hc_bytealign_be (w[11], w[12], offset); w[20] = hc_bytealign_be (w[10], w[11], offset); w[19] = hc_bytealign_be (w[ 9], w[10], offset); w[18] = hc_bytealign_be (w[ 8], w[ 9], offset); w[17] = hc_bytealign_be (w[ 7], w[ 8], offset); w[16] = hc_bytealign_be (w[ 6], w[ 7], offset); w[15] = hc_bytealign_be (w[ 5], w[ 6], offset); w[14] = hc_bytealign_be (w[ 4], w[ 5], offset); w[13] = hc_bytealign_be (w[ 3], w[ 4], offset); w[12] = hc_bytealign_be (w[ 2], w[ 3], offset); w[11] = hc_bytealign_be (w[ 1], w[ 2], offset); w[10] = hc_bytealign_be (w[ 0], w[ 1], offset); w[ 9] = hc_bytealign_be ( 0, w[ 0], offset); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_bytealign_be (w[52], w[53], offset); w[62] = hc_bytealign_be (w[51], w[52], offset); w[61] = hc_bytealign_be (w[50], w[51], offset); w[60] = hc_bytealign_be (w[49], w[50], offset); w[59] = hc_bytealign_be (w[48], w[49], offset); w[58] = hc_bytealign_be (w[47], w[48], offset); w[57] = hc_bytealign_be (w[46], w[47], offset); w[56] = hc_bytealign_be (w[45], w[46], offset); w[55] = hc_bytealign_be (w[44], w[45], offset); w[54] = hc_bytealign_be (w[43], w[44], offset); w[53] = hc_bytealign_be (w[42], w[43], offset); w[52] = hc_bytealign_be (w[41], w[42], offset); w[51] = hc_bytealign_be (w[40], w[41], offset); w[50] = hc_bytealign_be (w[39], w[40], offset); w[49] = hc_bytealign_be (w[38], w[39], offset); w[48] = hc_bytealign_be (w[37], w[38], offset); w[47] = hc_bytealign_be (w[36], w[37], offset); w[46] = hc_bytealign_be (w[35], w[36], offset); w[45] = hc_bytealign_be (w[34], w[35], offset); w[44] = hc_bytealign_be (w[33], w[34], offset); w[43] = hc_bytealign_be (w[32], w[33], offset); w[42] = hc_bytealign_be (w[31], w[32], offset); w[41] = hc_bytealign_be (w[30], w[31], offset); w[40] = hc_bytealign_be (w[29], w[30], offset); w[39] = hc_bytealign_be (w[28], w[29], offset); w[38] = hc_bytealign_be (w[27], w[28], offset); w[37] = hc_bytealign_be (w[26], w[27], offset); w[36] = hc_bytealign_be (w[25], w[26], offset); w[35] = hc_bytealign_be (w[24], w[25], offset); w[34] = hc_bytealign_be (w[23], w[24], offset); w[33] = hc_bytealign_be (w[22], w[23], offset); w[32] = hc_bytealign_be (w[21], w[22], offset); w[31] = hc_bytealign_be (w[20], w[21], offset); w[30] = hc_bytealign_be (w[19], w[20], offset); w[29] = hc_bytealign_be (w[18], w[19], offset); w[28] = hc_bytealign_be (w[17], w[18], offset); w[27] = hc_bytealign_be (w[16], w[17], offset); w[26] = hc_bytealign_be (w[15], w[16], offset); w[25] = hc_bytealign_be (w[14], w[15], offset); w[24] = hc_bytealign_be (w[13], w[14], offset); w[23] = hc_bytealign_be (w[12], w[13], offset); w[22] = hc_bytealign_be (w[11], w[12], offset); w[21] = hc_bytealign_be (w[10], w[11], offset); w[20] = hc_bytealign_be (w[ 9], w[10], offset); w[19] = hc_bytealign_be (w[ 8], w[ 9], offset); w[18] = hc_bytealign_be (w[ 7], w[ 8], offset); w[17] = hc_bytealign_be (w[ 6], w[ 7], offset); w[16] = hc_bytealign_be (w[ 5], w[ 6], offset); w[15] = hc_bytealign_be (w[ 4], w[ 5], offset); w[14] = hc_bytealign_be (w[ 3], w[ 4], offset); w[13] = hc_bytealign_be (w[ 2], w[ 3], offset); w[12] = hc_bytealign_be (w[ 1], w[ 2], offset); w[11] = hc_bytealign_be (w[ 0], w[ 1], offset); w[10] = hc_bytealign_be ( 0, w[ 0], offset); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_bytealign_be (w[51], w[52], offset); w[62] = hc_bytealign_be (w[50], w[51], offset); w[61] = hc_bytealign_be (w[49], w[50], offset); w[60] = hc_bytealign_be (w[48], w[49], offset); w[59] = hc_bytealign_be (w[47], w[48], offset); w[58] = hc_bytealign_be (w[46], w[47], offset); w[57] = hc_bytealign_be (w[45], w[46], offset); w[56] = hc_bytealign_be (w[44], w[45], offset); w[55] = hc_bytealign_be (w[43], w[44], offset); w[54] = hc_bytealign_be (w[42], w[43], offset); w[53] = hc_bytealign_be (w[41], w[42], offset); w[52] = hc_bytealign_be (w[40], w[41], offset); w[51] = hc_bytealign_be (w[39], w[40], offset); w[50] = hc_bytealign_be (w[38], w[39], offset); w[49] = hc_bytealign_be (w[37], w[38], offset); w[48] = hc_bytealign_be (w[36], w[37], offset); w[47] = hc_bytealign_be (w[35], w[36], offset); w[46] = hc_bytealign_be (w[34], w[35], offset); w[45] = hc_bytealign_be (w[33], w[34], offset); w[44] = hc_bytealign_be (w[32], w[33], offset); w[43] = hc_bytealign_be (w[31], w[32], offset); w[42] = hc_bytealign_be (w[30], w[31], offset); w[41] = hc_bytealign_be (w[29], w[30], offset); w[40] = hc_bytealign_be (w[28], w[29], offset); w[39] = hc_bytealign_be (w[27], w[28], offset); w[38] = hc_bytealign_be (w[26], w[27], offset); w[37] = hc_bytealign_be (w[25], w[26], offset); w[36] = hc_bytealign_be (w[24], w[25], offset); w[35] = hc_bytealign_be (w[23], w[24], offset); w[34] = hc_bytealign_be (w[22], w[23], offset); w[33] = hc_bytealign_be (w[21], w[22], offset); w[32] = hc_bytealign_be (w[20], w[21], offset); w[31] = hc_bytealign_be (w[19], w[20], offset); w[30] = hc_bytealign_be (w[18], w[19], offset); w[29] = hc_bytealign_be (w[17], w[18], offset); w[28] = hc_bytealign_be (w[16], w[17], offset); w[27] = hc_bytealign_be (w[15], w[16], offset); w[26] = hc_bytealign_be (w[14], w[15], offset); w[25] = hc_bytealign_be (w[13], w[14], offset); w[24] = hc_bytealign_be (w[12], w[13], offset); w[23] = hc_bytealign_be (w[11], w[12], offset); w[22] = hc_bytealign_be (w[10], w[11], offset); w[21] = hc_bytealign_be (w[ 9], w[10], offset); w[20] = hc_bytealign_be (w[ 8], w[ 9], offset); w[19] = hc_bytealign_be (w[ 7], w[ 8], offset); w[18] = hc_bytealign_be (w[ 6], w[ 7], offset); w[17] = hc_bytealign_be (w[ 5], w[ 6], offset); w[16] = hc_bytealign_be (w[ 4], w[ 5], offset); w[15] = hc_bytealign_be (w[ 3], w[ 4], offset); w[14] = hc_bytealign_be (w[ 2], w[ 3], offset); w[13] = hc_bytealign_be (w[ 1], w[ 2], offset); w[12] = hc_bytealign_be (w[ 0], w[ 1], offset); w[11] = hc_bytealign_be ( 0, w[ 0], offset); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_bytealign_be (w[50], w[51], offset); w[62] = hc_bytealign_be (w[49], w[50], offset); w[61] = hc_bytealign_be (w[48], w[49], offset); w[60] = hc_bytealign_be (w[47], w[48], offset); w[59] = hc_bytealign_be (w[46], w[47], offset); w[58] = hc_bytealign_be (w[45], w[46], offset); w[57] = hc_bytealign_be (w[44], w[45], offset); w[56] = hc_bytealign_be (w[43], w[44], offset); w[55] = hc_bytealign_be (w[42], w[43], offset); w[54] = hc_bytealign_be (w[41], w[42], offset); w[53] = hc_bytealign_be (w[40], w[41], offset); w[52] = hc_bytealign_be (w[39], w[40], offset); w[51] = hc_bytealign_be (w[38], w[39], offset); w[50] = hc_bytealign_be (w[37], w[38], offset); w[49] = hc_bytealign_be (w[36], w[37], offset); w[48] = hc_bytealign_be (w[35], w[36], offset); w[47] = hc_bytealign_be (w[34], w[35], offset); w[46] = hc_bytealign_be (w[33], w[34], offset); w[45] = hc_bytealign_be (w[32], w[33], offset); w[44] = hc_bytealign_be (w[31], w[32], offset); w[43] = hc_bytealign_be (w[30], w[31], offset); w[42] = hc_bytealign_be (w[29], w[30], offset); w[41] = hc_bytealign_be (w[28], w[29], offset); w[40] = hc_bytealign_be (w[27], w[28], offset); w[39] = hc_bytealign_be (w[26], w[27], offset); w[38] = hc_bytealign_be (w[25], w[26], offset); w[37] = hc_bytealign_be (w[24], w[25], offset); w[36] = hc_bytealign_be (w[23], w[24], offset); w[35] = hc_bytealign_be (w[22], w[23], offset); w[34] = hc_bytealign_be (w[21], w[22], offset); w[33] = hc_bytealign_be (w[20], w[21], offset); w[32] = hc_bytealign_be (w[19], w[20], offset); w[31] = hc_bytealign_be (w[18], w[19], offset); w[30] = hc_bytealign_be (w[17], w[18], offset); w[29] = hc_bytealign_be (w[16], w[17], offset); w[28] = hc_bytealign_be (w[15], w[16], offset); w[27] = hc_bytealign_be (w[14], w[15], offset); w[26] = hc_bytealign_be (w[13], w[14], offset); w[25] = hc_bytealign_be (w[12], w[13], offset); w[24] = hc_bytealign_be (w[11], w[12], offset); w[23] = hc_bytealign_be (w[10], w[11], offset); w[22] = hc_bytealign_be (w[ 9], w[10], offset); w[21] = hc_bytealign_be (w[ 8], w[ 9], offset); w[20] = hc_bytealign_be (w[ 7], w[ 8], offset); w[19] = hc_bytealign_be (w[ 6], w[ 7], offset); w[18] = hc_bytealign_be (w[ 5], w[ 6], offset); w[17] = hc_bytealign_be (w[ 4], w[ 5], offset); w[16] = hc_bytealign_be (w[ 3], w[ 4], offset); w[15] = hc_bytealign_be (w[ 2], w[ 3], offset); w[14] = hc_bytealign_be (w[ 1], w[ 2], offset); w[13] = hc_bytealign_be (w[ 0], w[ 1], offset); w[12] = hc_bytealign_be ( 0, w[ 0], offset); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_bytealign_be (w[49], w[50], offset); w[62] = hc_bytealign_be (w[48], w[49], offset); w[61] = hc_bytealign_be (w[47], w[48], offset); w[60] = hc_bytealign_be (w[46], w[47], offset); w[59] = hc_bytealign_be (w[45], w[46], offset); w[58] = hc_bytealign_be (w[44], w[45], offset); w[57] = hc_bytealign_be (w[43], w[44], offset); w[56] = hc_bytealign_be (w[42], w[43], offset); w[55] = hc_bytealign_be (w[41], w[42], offset); w[54] = hc_bytealign_be (w[40], w[41], offset); w[53] = hc_bytealign_be (w[39], w[40], offset); w[52] = hc_bytealign_be (w[38], w[39], offset); w[51] = hc_bytealign_be (w[37], w[38], offset); w[50] = hc_bytealign_be (w[36], w[37], offset); w[49] = hc_bytealign_be (w[35], w[36], offset); w[48] = hc_bytealign_be (w[34], w[35], offset); w[47] = hc_bytealign_be (w[33], w[34], offset); w[46] = hc_bytealign_be (w[32], w[33], offset); w[45] = hc_bytealign_be (w[31], w[32], offset); w[44] = hc_bytealign_be (w[30], w[31], offset); w[43] = hc_bytealign_be (w[29], w[30], offset); w[42] = hc_bytealign_be (w[28], w[29], offset); w[41] = hc_bytealign_be (w[27], w[28], offset); w[40] = hc_bytealign_be (w[26], w[27], offset); w[39] = hc_bytealign_be (w[25], w[26], offset); w[38] = hc_bytealign_be (w[24], w[25], offset); w[37] = hc_bytealign_be (w[23], w[24], offset); w[36] = hc_bytealign_be (w[22], w[23], offset); w[35] = hc_bytealign_be (w[21], w[22], offset); w[34] = hc_bytealign_be (w[20], w[21], offset); w[33] = hc_bytealign_be (w[19], w[20], offset); w[32] = hc_bytealign_be (w[18], w[19], offset); w[31] = hc_bytealign_be (w[17], w[18], offset); w[30] = hc_bytealign_be (w[16], w[17], offset); w[29] = hc_bytealign_be (w[15], w[16], offset); w[28] = hc_bytealign_be (w[14], w[15], offset); w[27] = hc_bytealign_be (w[13], w[14], offset); w[26] = hc_bytealign_be (w[12], w[13], offset); w[25] = hc_bytealign_be (w[11], w[12], offset); w[24] = hc_bytealign_be (w[10], w[11], offset); w[23] = hc_bytealign_be (w[ 9], w[10], offset); w[22] = hc_bytealign_be (w[ 8], w[ 9], offset); w[21] = hc_bytealign_be (w[ 7], w[ 8], offset); w[20] = hc_bytealign_be (w[ 6], w[ 7], offset); w[19] = hc_bytealign_be (w[ 5], w[ 6], offset); w[18] = hc_bytealign_be (w[ 4], w[ 5], offset); w[17] = hc_bytealign_be (w[ 3], w[ 4], offset); w[16] = hc_bytealign_be (w[ 2], w[ 3], offset); w[15] = hc_bytealign_be (w[ 1], w[ 2], offset); w[14] = hc_bytealign_be (w[ 0], w[ 1], offset); w[13] = hc_bytealign_be ( 0, w[ 0], offset); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_bytealign_be (w[48], w[49], offset); w[62] = hc_bytealign_be (w[47], w[48], offset); w[61] = hc_bytealign_be (w[46], w[47], offset); w[60] = hc_bytealign_be (w[45], w[46], offset); w[59] = hc_bytealign_be (w[44], w[45], offset); w[58] = hc_bytealign_be (w[43], w[44], offset); w[57] = hc_bytealign_be (w[42], w[43], offset); w[56] = hc_bytealign_be (w[41], w[42], offset); w[55] = hc_bytealign_be (w[40], w[41], offset); w[54] = hc_bytealign_be (w[39], w[40], offset); w[53] = hc_bytealign_be (w[38], w[39], offset); w[52] = hc_bytealign_be (w[37], w[38], offset); w[51] = hc_bytealign_be (w[36], w[37], offset); w[50] = hc_bytealign_be (w[35], w[36], offset); w[49] = hc_bytealign_be (w[34], w[35], offset); w[48] = hc_bytealign_be (w[33], w[34], offset); w[47] = hc_bytealign_be (w[32], w[33], offset); w[46] = hc_bytealign_be (w[31], w[32], offset); w[45] = hc_bytealign_be (w[30], w[31], offset); w[44] = hc_bytealign_be (w[29], w[30], offset); w[43] = hc_bytealign_be (w[28], w[29], offset); w[42] = hc_bytealign_be (w[27], w[28], offset); w[41] = hc_bytealign_be (w[26], w[27], offset); w[40] = hc_bytealign_be (w[25], w[26], offset); w[39] = hc_bytealign_be (w[24], w[25], offset); w[38] = hc_bytealign_be (w[23], w[24], offset); w[37] = hc_bytealign_be (w[22], w[23], offset); w[36] = hc_bytealign_be (w[21], w[22], offset); w[35] = hc_bytealign_be (w[20], w[21], offset); w[34] = hc_bytealign_be (w[19], w[20], offset); w[33] = hc_bytealign_be (w[18], w[19], offset); w[32] = hc_bytealign_be (w[17], w[18], offset); w[31] = hc_bytealign_be (w[16], w[17], offset); w[30] = hc_bytealign_be (w[15], w[16], offset); w[29] = hc_bytealign_be (w[14], w[15], offset); w[28] = hc_bytealign_be (w[13], w[14], offset); w[27] = hc_bytealign_be (w[12], w[13], offset); w[26] = hc_bytealign_be (w[11], w[12], offset); w[25] = hc_bytealign_be (w[10], w[11], offset); w[24] = hc_bytealign_be (w[ 9], w[10], offset); w[23] = hc_bytealign_be (w[ 8], w[ 9], offset); w[22] = hc_bytealign_be (w[ 7], w[ 8], offset); w[21] = hc_bytealign_be (w[ 6], w[ 7], offset); w[20] = hc_bytealign_be (w[ 5], w[ 6], offset); w[19] = hc_bytealign_be (w[ 4], w[ 5], offset); w[18] = hc_bytealign_be (w[ 3], w[ 4], offset); w[17] = hc_bytealign_be (w[ 2], w[ 3], offset); w[16] = hc_bytealign_be (w[ 1], w[ 2], offset); w[15] = hc_bytealign_be (w[ 0], w[ 1], offset); w[14] = hc_bytealign_be ( 0, w[ 0], offset); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_bytealign_be (w[47], w[48], offset); w[62] = hc_bytealign_be (w[46], w[47], offset); w[61] = hc_bytealign_be (w[45], w[46], offset); w[60] = hc_bytealign_be (w[44], w[45], offset); w[59] = hc_bytealign_be (w[43], w[44], offset); w[58] = hc_bytealign_be (w[42], w[43], offset); w[57] = hc_bytealign_be (w[41], w[42], offset); w[56] = hc_bytealign_be (w[40], w[41], offset); w[55] = hc_bytealign_be (w[39], w[40], offset); w[54] = hc_bytealign_be (w[38], w[39], offset); w[53] = hc_bytealign_be (w[37], w[38], offset); w[52] = hc_bytealign_be (w[36], w[37], offset); w[51] = hc_bytealign_be (w[35], w[36], offset); w[50] = hc_bytealign_be (w[34], w[35], offset); w[49] = hc_bytealign_be (w[33], w[34], offset); w[48] = hc_bytealign_be (w[32], w[33], offset); w[47] = hc_bytealign_be (w[31], w[32], offset); w[46] = hc_bytealign_be (w[30], w[31], offset); w[45] = hc_bytealign_be (w[29], w[30], offset); w[44] = hc_bytealign_be (w[28], w[29], offset); w[43] = hc_bytealign_be (w[27], w[28], offset); w[42] = hc_bytealign_be (w[26], w[27], offset); w[41] = hc_bytealign_be (w[25], w[26], offset); w[40] = hc_bytealign_be (w[24], w[25], offset); w[39] = hc_bytealign_be (w[23], w[24], offset); w[38] = hc_bytealign_be (w[22], w[23], offset); w[37] = hc_bytealign_be (w[21], w[22], offset); w[36] = hc_bytealign_be (w[20], w[21], offset); w[35] = hc_bytealign_be (w[19], w[20], offset); w[34] = hc_bytealign_be (w[18], w[19], offset); w[33] = hc_bytealign_be (w[17], w[18], offset); w[32] = hc_bytealign_be (w[16], w[17], offset); w[31] = hc_bytealign_be (w[15], w[16], offset); w[30] = hc_bytealign_be (w[14], w[15], offset); w[29] = hc_bytealign_be (w[13], w[14], offset); w[28] = hc_bytealign_be (w[12], w[13], offset); w[27] = hc_bytealign_be (w[11], w[12], offset); w[26] = hc_bytealign_be (w[10], w[11], offset); w[25] = hc_bytealign_be (w[ 9], w[10], offset); w[24] = hc_bytealign_be (w[ 8], w[ 9], offset); w[23] = hc_bytealign_be (w[ 7], w[ 8], offset); w[22] = hc_bytealign_be (w[ 6], w[ 7], offset); w[21] = hc_bytealign_be (w[ 5], w[ 6], offset); w[20] = hc_bytealign_be (w[ 4], w[ 5], offset); w[19] = hc_bytealign_be (w[ 3], w[ 4], offset); w[18] = hc_bytealign_be (w[ 2], w[ 3], offset); w[17] = hc_bytealign_be (w[ 1], w[ 2], offset); w[16] = hc_bytealign_be (w[ 0], w[ 1], offset); w[15] = hc_bytealign_be ( 0, w[ 0], offset); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_bytealign_be (w[46], w[47], offset); w[62] = hc_bytealign_be (w[45], w[46], offset); w[61] = hc_bytealign_be (w[44], w[45], offset); w[60] = hc_bytealign_be (w[43], w[44], offset); w[59] = hc_bytealign_be (w[42], w[43], offset); w[58] = hc_bytealign_be (w[41], w[42], offset); w[57] = hc_bytealign_be (w[40], w[41], offset); w[56] = hc_bytealign_be (w[39], w[40], offset); w[55] = hc_bytealign_be (w[38], w[39], offset); w[54] = hc_bytealign_be (w[37], w[38], offset); w[53] = hc_bytealign_be (w[36], w[37], offset); w[52] = hc_bytealign_be (w[35], w[36], offset); w[51] = hc_bytealign_be (w[34], w[35], offset); w[50] = hc_bytealign_be (w[33], w[34], offset); w[49] = hc_bytealign_be (w[32], w[33], offset); w[48] = hc_bytealign_be (w[31], w[32], offset); w[47] = hc_bytealign_be (w[30], w[31], offset); w[46] = hc_bytealign_be (w[29], w[30], offset); w[45] = hc_bytealign_be (w[28], w[29], offset); w[44] = hc_bytealign_be (w[27], w[28], offset); w[43] = hc_bytealign_be (w[26], w[27], offset); w[42] = hc_bytealign_be (w[25], w[26], offset); w[41] = hc_bytealign_be (w[24], w[25], offset); w[40] = hc_bytealign_be (w[23], w[24], offset); w[39] = hc_bytealign_be (w[22], w[23], offset); w[38] = hc_bytealign_be (w[21], w[22], offset); w[37] = hc_bytealign_be (w[20], w[21], offset); w[36] = hc_bytealign_be (w[19], w[20], offset); w[35] = hc_bytealign_be (w[18], w[19], offset); w[34] = hc_bytealign_be (w[17], w[18], offset); w[33] = hc_bytealign_be (w[16], w[17], offset); w[32] = hc_bytealign_be (w[15], w[16], offset); w[31] = hc_bytealign_be (w[14], w[15], offset); w[30] = hc_bytealign_be (w[13], w[14], offset); w[29] = hc_bytealign_be (w[12], w[13], offset); w[28] = hc_bytealign_be (w[11], w[12], offset); w[27] = hc_bytealign_be (w[10], w[11], offset); w[26] = hc_bytealign_be (w[ 9], w[10], offset); w[25] = hc_bytealign_be (w[ 8], w[ 9], offset); w[24] = hc_bytealign_be (w[ 7], w[ 8], offset); w[23] = hc_bytealign_be (w[ 6], w[ 7], offset); w[22] = hc_bytealign_be (w[ 5], w[ 6], offset); w[21] = hc_bytealign_be (w[ 4], w[ 5], offset); w[20] = hc_bytealign_be (w[ 3], w[ 4], offset); w[19] = hc_bytealign_be (w[ 2], w[ 3], offset); w[18] = hc_bytealign_be (w[ 1], w[ 2], offset); w[17] = hc_bytealign_be (w[ 0], w[ 1], offset); w[16] = hc_bytealign_be ( 0, w[ 0], offset); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_bytealign_be (w[45], w[46], offset); w[62] = hc_bytealign_be (w[44], w[45], offset); w[61] = hc_bytealign_be (w[43], w[44], offset); w[60] = hc_bytealign_be (w[42], w[43], offset); w[59] = hc_bytealign_be (w[41], w[42], offset); w[58] = hc_bytealign_be (w[40], w[41], offset); w[57] = hc_bytealign_be (w[39], w[40], offset); w[56] = hc_bytealign_be (w[38], w[39], offset); w[55] = hc_bytealign_be (w[37], w[38], offset); w[54] = hc_bytealign_be (w[36], w[37], offset); w[53] = hc_bytealign_be (w[35], w[36], offset); w[52] = hc_bytealign_be (w[34], w[35], offset); w[51] = hc_bytealign_be (w[33], w[34], offset); w[50] = hc_bytealign_be (w[32], w[33], offset); w[49] = hc_bytealign_be (w[31], w[32], offset); w[48] = hc_bytealign_be (w[30], w[31], offset); w[47] = hc_bytealign_be (w[29], w[30], offset); w[46] = hc_bytealign_be (w[28], w[29], offset); w[45] = hc_bytealign_be (w[27], w[28], offset); w[44] = hc_bytealign_be (w[26], w[27], offset); w[43] = hc_bytealign_be (w[25], w[26], offset); w[42] = hc_bytealign_be (w[24], w[25], offset); w[41] = hc_bytealign_be (w[23], w[24], offset); w[40] = hc_bytealign_be (w[22], w[23], offset); w[39] = hc_bytealign_be (w[21], w[22], offset); w[38] = hc_bytealign_be (w[20], w[21], offset); w[37] = hc_bytealign_be (w[19], w[20], offset); w[36] = hc_bytealign_be (w[18], w[19], offset); w[35] = hc_bytealign_be (w[17], w[18], offset); w[34] = hc_bytealign_be (w[16], w[17], offset); w[33] = hc_bytealign_be (w[15], w[16], offset); w[32] = hc_bytealign_be (w[14], w[15], offset); w[31] = hc_bytealign_be (w[13], w[14], offset); w[30] = hc_bytealign_be (w[12], w[13], offset); w[29] = hc_bytealign_be (w[11], w[12], offset); w[28] = hc_bytealign_be (w[10], w[11], offset); w[27] = hc_bytealign_be (w[ 9], w[10], offset); w[26] = hc_bytealign_be (w[ 8], w[ 9], offset); w[25] = hc_bytealign_be (w[ 7], w[ 8], offset); w[24] = hc_bytealign_be (w[ 6], w[ 7], offset); w[23] = hc_bytealign_be (w[ 5], w[ 6], offset); w[22] = hc_bytealign_be (w[ 4], w[ 5], offset); w[21] = hc_bytealign_be (w[ 3], w[ 4], offset); w[20] = hc_bytealign_be (w[ 2], w[ 3], offset); w[19] = hc_bytealign_be (w[ 1], w[ 2], offset); w[18] = hc_bytealign_be (w[ 0], w[ 1], offset); w[17] = hc_bytealign_be ( 0, w[ 0], offset); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_bytealign_be (w[44], w[45], offset); w[62] = hc_bytealign_be (w[43], w[44], offset); w[61] = hc_bytealign_be (w[42], w[43], offset); w[60] = hc_bytealign_be (w[41], w[42], offset); w[59] = hc_bytealign_be (w[40], w[41], offset); w[58] = hc_bytealign_be (w[39], w[40], offset); w[57] = hc_bytealign_be (w[38], w[39], offset); w[56] = hc_bytealign_be (w[37], w[38], offset); w[55] = hc_bytealign_be (w[36], w[37], offset); w[54] = hc_bytealign_be (w[35], w[36], offset); w[53] = hc_bytealign_be (w[34], w[35], offset); w[52] = hc_bytealign_be (w[33], w[34], offset); w[51] = hc_bytealign_be (w[32], w[33], offset); w[50] = hc_bytealign_be (w[31], w[32], offset); w[49] = hc_bytealign_be (w[30], w[31], offset); w[48] = hc_bytealign_be (w[29], w[30], offset); w[47] = hc_bytealign_be (w[28], w[29], offset); w[46] = hc_bytealign_be (w[27], w[28], offset); w[45] = hc_bytealign_be (w[26], w[27], offset); w[44] = hc_bytealign_be (w[25], w[26], offset); w[43] = hc_bytealign_be (w[24], w[25], offset); w[42] = hc_bytealign_be (w[23], w[24], offset); w[41] = hc_bytealign_be (w[22], w[23], offset); w[40] = hc_bytealign_be (w[21], w[22], offset); w[39] = hc_bytealign_be (w[20], w[21], offset); w[38] = hc_bytealign_be (w[19], w[20], offset); w[37] = hc_bytealign_be (w[18], w[19], offset); w[36] = hc_bytealign_be (w[17], w[18], offset); w[35] = hc_bytealign_be (w[16], w[17], offset); w[34] = hc_bytealign_be (w[15], w[16], offset); w[33] = hc_bytealign_be (w[14], w[15], offset); w[32] = hc_bytealign_be (w[13], w[14], offset); w[31] = hc_bytealign_be (w[12], w[13], offset); w[30] = hc_bytealign_be (w[11], w[12], offset); w[29] = hc_bytealign_be (w[10], w[11], offset); w[28] = hc_bytealign_be (w[ 9], w[10], offset); w[27] = hc_bytealign_be (w[ 8], w[ 9], offset); w[26] = hc_bytealign_be (w[ 7], w[ 8], offset); w[25] = hc_bytealign_be (w[ 6], w[ 7], offset); w[24] = hc_bytealign_be (w[ 5], w[ 6], offset); w[23] = hc_bytealign_be (w[ 4], w[ 5], offset); w[22] = hc_bytealign_be (w[ 3], w[ 4], offset); w[21] = hc_bytealign_be (w[ 2], w[ 3], offset); w[20] = hc_bytealign_be (w[ 1], w[ 2], offset); w[19] = hc_bytealign_be (w[ 0], w[ 1], offset); w[18] = hc_bytealign_be ( 0, w[ 0], offset); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_bytealign_be (w[43], w[44], offset); w[62] = hc_bytealign_be (w[42], w[43], offset); w[61] = hc_bytealign_be (w[41], w[42], offset); w[60] = hc_bytealign_be (w[40], w[41], offset); w[59] = hc_bytealign_be (w[39], w[40], offset); w[58] = hc_bytealign_be (w[38], w[39], offset); w[57] = hc_bytealign_be (w[37], w[38], offset); w[56] = hc_bytealign_be (w[36], w[37], offset); w[55] = hc_bytealign_be (w[35], w[36], offset); w[54] = hc_bytealign_be (w[34], w[35], offset); w[53] = hc_bytealign_be (w[33], w[34], offset); w[52] = hc_bytealign_be (w[32], w[33], offset); w[51] = hc_bytealign_be (w[31], w[32], offset); w[50] = hc_bytealign_be (w[30], w[31], offset); w[49] = hc_bytealign_be (w[29], w[30], offset); w[48] = hc_bytealign_be (w[28], w[29], offset); w[47] = hc_bytealign_be (w[27], w[28], offset); w[46] = hc_bytealign_be (w[26], w[27], offset); w[45] = hc_bytealign_be (w[25], w[26], offset); w[44] = hc_bytealign_be (w[24], w[25], offset); w[43] = hc_bytealign_be (w[23], w[24], offset); w[42] = hc_bytealign_be (w[22], w[23], offset); w[41] = hc_bytealign_be (w[21], w[22], offset); w[40] = hc_bytealign_be (w[20], w[21], offset); w[39] = hc_bytealign_be (w[19], w[20], offset); w[38] = hc_bytealign_be (w[18], w[19], offset); w[37] = hc_bytealign_be (w[17], w[18], offset); w[36] = hc_bytealign_be (w[16], w[17], offset); w[35] = hc_bytealign_be (w[15], w[16], offset); w[34] = hc_bytealign_be (w[14], w[15], offset); w[33] = hc_bytealign_be (w[13], w[14], offset); w[32] = hc_bytealign_be (w[12], w[13], offset); w[31] = hc_bytealign_be (w[11], w[12], offset); w[30] = hc_bytealign_be (w[10], w[11], offset); w[29] = hc_bytealign_be (w[ 9], w[10], offset); w[28] = hc_bytealign_be (w[ 8], w[ 9], offset); w[27] = hc_bytealign_be (w[ 7], w[ 8], offset); w[26] = hc_bytealign_be (w[ 6], w[ 7], offset); w[25] = hc_bytealign_be (w[ 5], w[ 6], offset); w[24] = hc_bytealign_be (w[ 4], w[ 5], offset); w[23] = hc_bytealign_be (w[ 3], w[ 4], offset); w[22] = hc_bytealign_be (w[ 2], w[ 3], offset); w[21] = hc_bytealign_be (w[ 1], w[ 2], offset); w[20] = hc_bytealign_be (w[ 0], w[ 1], offset); w[19] = hc_bytealign_be ( 0, w[ 0], offset); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_bytealign_be (w[42], w[43], offset); w[62] = hc_bytealign_be (w[41], w[42], offset); w[61] = hc_bytealign_be (w[40], w[41], offset); w[60] = hc_bytealign_be (w[39], w[40], offset); w[59] = hc_bytealign_be (w[38], w[39], offset); w[58] = hc_bytealign_be (w[37], w[38], offset); w[57] = hc_bytealign_be (w[36], w[37], offset); w[56] = hc_bytealign_be (w[35], w[36], offset); w[55] = hc_bytealign_be (w[34], w[35], offset); w[54] = hc_bytealign_be (w[33], w[34], offset); w[53] = hc_bytealign_be (w[32], w[33], offset); w[52] = hc_bytealign_be (w[31], w[32], offset); w[51] = hc_bytealign_be (w[30], w[31], offset); w[50] = hc_bytealign_be (w[29], w[30], offset); w[49] = hc_bytealign_be (w[28], w[29], offset); w[48] = hc_bytealign_be (w[27], w[28], offset); w[47] = hc_bytealign_be (w[26], w[27], offset); w[46] = hc_bytealign_be (w[25], w[26], offset); w[45] = hc_bytealign_be (w[24], w[25], offset); w[44] = hc_bytealign_be (w[23], w[24], offset); w[43] = hc_bytealign_be (w[22], w[23], offset); w[42] = hc_bytealign_be (w[21], w[22], offset); w[41] = hc_bytealign_be (w[20], w[21], offset); w[40] = hc_bytealign_be (w[19], w[20], offset); w[39] = hc_bytealign_be (w[18], w[19], offset); w[38] = hc_bytealign_be (w[17], w[18], offset); w[37] = hc_bytealign_be (w[16], w[17], offset); w[36] = hc_bytealign_be (w[15], w[16], offset); w[35] = hc_bytealign_be (w[14], w[15], offset); w[34] = hc_bytealign_be (w[13], w[14], offset); w[33] = hc_bytealign_be (w[12], w[13], offset); w[32] = hc_bytealign_be (w[11], w[12], offset); w[31] = hc_bytealign_be (w[10], w[11], offset); w[30] = hc_bytealign_be (w[ 9], w[10], offset); w[29] = hc_bytealign_be (w[ 8], w[ 9], offset); w[28] = hc_bytealign_be (w[ 7], w[ 8], offset); w[27] = hc_bytealign_be (w[ 6], w[ 7], offset); w[26] = hc_bytealign_be (w[ 5], w[ 6], offset); w[25] = hc_bytealign_be (w[ 4], w[ 5], offset); w[24] = hc_bytealign_be (w[ 3], w[ 4], offset); w[23] = hc_bytealign_be (w[ 2], w[ 3], offset); w[22] = hc_bytealign_be (w[ 1], w[ 2], offset); w[21] = hc_bytealign_be (w[ 0], w[ 1], offset); w[20] = hc_bytealign_be ( 0, w[ 0], offset); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_bytealign_be (w[41], w[42], offset); w[62] = hc_bytealign_be (w[40], w[41], offset); w[61] = hc_bytealign_be (w[39], w[40], offset); w[60] = hc_bytealign_be (w[38], w[39], offset); w[59] = hc_bytealign_be (w[37], w[38], offset); w[58] = hc_bytealign_be (w[36], w[37], offset); w[57] = hc_bytealign_be (w[35], w[36], offset); w[56] = hc_bytealign_be (w[34], w[35], offset); w[55] = hc_bytealign_be (w[33], w[34], offset); w[54] = hc_bytealign_be (w[32], w[33], offset); w[53] = hc_bytealign_be (w[31], w[32], offset); w[52] = hc_bytealign_be (w[30], w[31], offset); w[51] = hc_bytealign_be (w[29], w[30], offset); w[50] = hc_bytealign_be (w[28], w[29], offset); w[49] = hc_bytealign_be (w[27], w[28], offset); w[48] = hc_bytealign_be (w[26], w[27], offset); w[47] = hc_bytealign_be (w[25], w[26], offset); w[46] = hc_bytealign_be (w[24], w[25], offset); w[45] = hc_bytealign_be (w[23], w[24], offset); w[44] = hc_bytealign_be (w[22], w[23], offset); w[43] = hc_bytealign_be (w[21], w[22], offset); w[42] = hc_bytealign_be (w[20], w[21], offset); w[41] = hc_bytealign_be (w[19], w[20], offset); w[40] = hc_bytealign_be (w[18], w[19], offset); w[39] = hc_bytealign_be (w[17], w[18], offset); w[38] = hc_bytealign_be (w[16], w[17], offset); w[37] = hc_bytealign_be (w[15], w[16], offset); w[36] = hc_bytealign_be (w[14], w[15], offset); w[35] = hc_bytealign_be (w[13], w[14], offset); w[34] = hc_bytealign_be (w[12], w[13], offset); w[33] = hc_bytealign_be (w[11], w[12], offset); w[32] = hc_bytealign_be (w[10], w[11], offset); w[31] = hc_bytealign_be (w[ 9], w[10], offset); w[30] = hc_bytealign_be (w[ 8], w[ 9], offset); w[29] = hc_bytealign_be (w[ 7], w[ 8], offset); w[28] = hc_bytealign_be (w[ 6], w[ 7], offset); w[27] = hc_bytealign_be (w[ 5], w[ 6], offset); w[26] = hc_bytealign_be (w[ 4], w[ 5], offset); w[25] = hc_bytealign_be (w[ 3], w[ 4], offset); w[24] = hc_bytealign_be (w[ 2], w[ 3], offset); w[23] = hc_bytealign_be (w[ 1], w[ 2], offset); w[22] = hc_bytealign_be (w[ 0], w[ 1], offset); w[21] = hc_bytealign_be ( 0, w[ 0], offset); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_bytealign_be (w[40], w[41], offset); w[62] = hc_bytealign_be (w[39], w[40], offset); w[61] = hc_bytealign_be (w[38], w[39], offset); w[60] = hc_bytealign_be (w[37], w[38], offset); w[59] = hc_bytealign_be (w[36], w[37], offset); w[58] = hc_bytealign_be (w[35], w[36], offset); w[57] = hc_bytealign_be (w[34], w[35], offset); w[56] = hc_bytealign_be (w[33], w[34], offset); w[55] = hc_bytealign_be (w[32], w[33], offset); w[54] = hc_bytealign_be (w[31], w[32], offset); w[53] = hc_bytealign_be (w[30], w[31], offset); w[52] = hc_bytealign_be (w[29], w[30], offset); w[51] = hc_bytealign_be (w[28], w[29], offset); w[50] = hc_bytealign_be (w[27], w[28], offset); w[49] = hc_bytealign_be (w[26], w[27], offset); w[48] = hc_bytealign_be (w[25], w[26], offset); w[47] = hc_bytealign_be (w[24], w[25], offset); w[46] = hc_bytealign_be (w[23], w[24], offset); w[45] = hc_bytealign_be (w[22], w[23], offset); w[44] = hc_bytealign_be (w[21], w[22], offset); w[43] = hc_bytealign_be (w[20], w[21], offset); w[42] = hc_bytealign_be (w[19], w[20], offset); w[41] = hc_bytealign_be (w[18], w[19], offset); w[40] = hc_bytealign_be (w[17], w[18], offset); w[39] = hc_bytealign_be (w[16], w[17], offset); w[38] = hc_bytealign_be (w[15], w[16], offset); w[37] = hc_bytealign_be (w[14], w[15], offset); w[36] = hc_bytealign_be (w[13], w[14], offset); w[35] = hc_bytealign_be (w[12], w[13], offset); w[34] = hc_bytealign_be (w[11], w[12], offset); w[33] = hc_bytealign_be (w[10], w[11], offset); w[32] = hc_bytealign_be (w[ 9], w[10], offset); w[31] = hc_bytealign_be (w[ 8], w[ 9], offset); w[30] = hc_bytealign_be (w[ 7], w[ 8], offset); w[29] = hc_bytealign_be (w[ 6], w[ 7], offset); w[28] = hc_bytealign_be (w[ 5], w[ 6], offset); w[27] = hc_bytealign_be (w[ 4], w[ 5], offset); w[26] = hc_bytealign_be (w[ 3], w[ 4], offset); w[25] = hc_bytealign_be (w[ 2], w[ 3], offset); w[24] = hc_bytealign_be (w[ 1], w[ 2], offset); w[23] = hc_bytealign_be (w[ 0], w[ 1], offset); w[22] = hc_bytealign_be ( 0, w[ 0], offset); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_bytealign_be (w[39], w[40], offset); w[62] = hc_bytealign_be (w[38], w[39], offset); w[61] = hc_bytealign_be (w[37], w[38], offset); w[60] = hc_bytealign_be (w[36], w[37], offset); w[59] = hc_bytealign_be (w[35], w[36], offset); w[58] = hc_bytealign_be (w[34], w[35], offset); w[57] = hc_bytealign_be (w[33], w[34], offset); w[56] = hc_bytealign_be (w[32], w[33], offset); w[55] = hc_bytealign_be (w[31], w[32], offset); w[54] = hc_bytealign_be (w[30], w[31], offset); w[53] = hc_bytealign_be (w[29], w[30], offset); w[52] = hc_bytealign_be (w[28], w[29], offset); w[51] = hc_bytealign_be (w[27], w[28], offset); w[50] = hc_bytealign_be (w[26], w[27], offset); w[49] = hc_bytealign_be (w[25], w[26], offset); w[48] = hc_bytealign_be (w[24], w[25], offset); w[47] = hc_bytealign_be (w[23], w[24], offset); w[46] = hc_bytealign_be (w[22], w[23], offset); w[45] = hc_bytealign_be (w[21], w[22], offset); w[44] = hc_bytealign_be (w[20], w[21], offset); w[43] = hc_bytealign_be (w[19], w[20], offset); w[42] = hc_bytealign_be (w[18], w[19], offset); w[41] = hc_bytealign_be (w[17], w[18], offset); w[40] = hc_bytealign_be (w[16], w[17], offset); w[39] = hc_bytealign_be (w[15], w[16], offset); w[38] = hc_bytealign_be (w[14], w[15], offset); w[37] = hc_bytealign_be (w[13], w[14], offset); w[36] = hc_bytealign_be (w[12], w[13], offset); w[35] = hc_bytealign_be (w[11], w[12], offset); w[34] = hc_bytealign_be (w[10], w[11], offset); w[33] = hc_bytealign_be (w[ 9], w[10], offset); w[32] = hc_bytealign_be (w[ 8], w[ 9], offset); w[31] = hc_bytealign_be (w[ 7], w[ 8], offset); w[30] = hc_bytealign_be (w[ 6], w[ 7], offset); w[29] = hc_bytealign_be (w[ 5], w[ 6], offset); w[28] = hc_bytealign_be (w[ 4], w[ 5], offset); w[27] = hc_bytealign_be (w[ 3], w[ 4], offset); w[26] = hc_bytealign_be (w[ 2], w[ 3], offset); w[25] = hc_bytealign_be (w[ 1], w[ 2], offset); w[24] = hc_bytealign_be (w[ 0], w[ 1], offset); w[23] = hc_bytealign_be ( 0, w[ 0], offset); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_bytealign_be (w[38], w[39], offset); w[62] = hc_bytealign_be (w[37], w[38], offset); w[61] = hc_bytealign_be (w[36], w[37], offset); w[60] = hc_bytealign_be (w[35], w[36], offset); w[59] = hc_bytealign_be (w[34], w[35], offset); w[58] = hc_bytealign_be (w[33], w[34], offset); w[57] = hc_bytealign_be (w[32], w[33], offset); w[56] = hc_bytealign_be (w[31], w[32], offset); w[55] = hc_bytealign_be (w[30], w[31], offset); w[54] = hc_bytealign_be (w[29], w[30], offset); w[53] = hc_bytealign_be (w[28], w[29], offset); w[52] = hc_bytealign_be (w[27], w[28], offset); w[51] = hc_bytealign_be (w[26], w[27], offset); w[50] = hc_bytealign_be (w[25], w[26], offset); w[49] = hc_bytealign_be (w[24], w[25], offset); w[48] = hc_bytealign_be (w[23], w[24], offset); w[47] = hc_bytealign_be (w[22], w[23], offset); w[46] = hc_bytealign_be (w[21], w[22], offset); w[45] = hc_bytealign_be (w[20], w[21], offset); w[44] = hc_bytealign_be (w[19], w[20], offset); w[43] = hc_bytealign_be (w[18], w[19], offset); w[42] = hc_bytealign_be (w[17], w[18], offset); w[41] = hc_bytealign_be (w[16], w[17], offset); w[40] = hc_bytealign_be (w[15], w[16], offset); w[39] = hc_bytealign_be (w[14], w[15], offset); w[38] = hc_bytealign_be (w[13], w[14], offset); w[37] = hc_bytealign_be (w[12], w[13], offset); w[36] = hc_bytealign_be (w[11], w[12], offset); w[35] = hc_bytealign_be (w[10], w[11], offset); w[34] = hc_bytealign_be (w[ 9], w[10], offset); w[33] = hc_bytealign_be (w[ 8], w[ 9], offset); w[32] = hc_bytealign_be (w[ 7], w[ 8], offset); w[31] = hc_bytealign_be (w[ 6], w[ 7], offset); w[30] = hc_bytealign_be (w[ 5], w[ 6], offset); w[29] = hc_bytealign_be (w[ 4], w[ 5], offset); w[28] = hc_bytealign_be (w[ 3], w[ 4], offset); w[27] = hc_bytealign_be (w[ 2], w[ 3], offset); w[26] = hc_bytealign_be (w[ 1], w[ 2], offset); w[25] = hc_bytealign_be (w[ 0], w[ 1], offset); w[24] = hc_bytealign_be ( 0, w[ 0], offset); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_bytealign_be (w[37], w[38], offset); w[62] = hc_bytealign_be (w[36], w[37], offset); w[61] = hc_bytealign_be (w[35], w[36], offset); w[60] = hc_bytealign_be (w[34], w[35], offset); w[59] = hc_bytealign_be (w[33], w[34], offset); w[58] = hc_bytealign_be (w[32], w[33], offset); w[57] = hc_bytealign_be (w[31], w[32], offset); w[56] = hc_bytealign_be (w[30], w[31], offset); w[55] = hc_bytealign_be (w[29], w[30], offset); w[54] = hc_bytealign_be (w[28], w[29], offset); w[53] = hc_bytealign_be (w[27], w[28], offset); w[52] = hc_bytealign_be (w[26], w[27], offset); w[51] = hc_bytealign_be (w[25], w[26], offset); w[50] = hc_bytealign_be (w[24], w[25], offset); w[49] = hc_bytealign_be (w[23], w[24], offset); w[48] = hc_bytealign_be (w[22], w[23], offset); w[47] = hc_bytealign_be (w[21], w[22], offset); w[46] = hc_bytealign_be (w[20], w[21], offset); w[45] = hc_bytealign_be (w[19], w[20], offset); w[44] = hc_bytealign_be (w[18], w[19], offset); w[43] = hc_bytealign_be (w[17], w[18], offset); w[42] = hc_bytealign_be (w[16], w[17], offset); w[41] = hc_bytealign_be (w[15], w[16], offset); w[40] = hc_bytealign_be (w[14], w[15], offset); w[39] = hc_bytealign_be (w[13], w[14], offset); w[38] = hc_bytealign_be (w[12], w[13], offset); w[37] = hc_bytealign_be (w[11], w[12], offset); w[36] = hc_bytealign_be (w[10], w[11], offset); w[35] = hc_bytealign_be (w[ 9], w[10], offset); w[34] = hc_bytealign_be (w[ 8], w[ 9], offset); w[33] = hc_bytealign_be (w[ 7], w[ 8], offset); w[32] = hc_bytealign_be (w[ 6], w[ 7], offset); w[31] = hc_bytealign_be (w[ 5], w[ 6], offset); w[30] = hc_bytealign_be (w[ 4], w[ 5], offset); w[29] = hc_bytealign_be (w[ 3], w[ 4], offset); w[28] = hc_bytealign_be (w[ 2], w[ 3], offset); w[27] = hc_bytealign_be (w[ 1], w[ 2], offset); w[26] = hc_bytealign_be (w[ 0], w[ 1], offset); w[25] = hc_bytealign_be ( 0, w[ 0], offset); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_bytealign_be (w[36], w[37], offset); w[62] = hc_bytealign_be (w[35], w[36], offset); w[61] = hc_bytealign_be (w[34], w[35], offset); w[60] = hc_bytealign_be (w[33], w[34], offset); w[59] = hc_bytealign_be (w[32], w[33], offset); w[58] = hc_bytealign_be (w[31], w[32], offset); w[57] = hc_bytealign_be (w[30], w[31], offset); w[56] = hc_bytealign_be (w[29], w[30], offset); w[55] = hc_bytealign_be (w[28], w[29], offset); w[54] = hc_bytealign_be (w[27], w[28], offset); w[53] = hc_bytealign_be (w[26], w[27], offset); w[52] = hc_bytealign_be (w[25], w[26], offset); w[51] = hc_bytealign_be (w[24], w[25], offset); w[50] = hc_bytealign_be (w[23], w[24], offset); w[49] = hc_bytealign_be (w[22], w[23], offset); w[48] = hc_bytealign_be (w[21], w[22], offset); w[47] = hc_bytealign_be (w[20], w[21], offset); w[46] = hc_bytealign_be (w[19], w[20], offset); w[45] = hc_bytealign_be (w[18], w[19], offset); w[44] = hc_bytealign_be (w[17], w[18], offset); w[43] = hc_bytealign_be (w[16], w[17], offset); w[42] = hc_bytealign_be (w[15], w[16], offset); w[41] = hc_bytealign_be (w[14], w[15], offset); w[40] = hc_bytealign_be (w[13], w[14], offset); w[39] = hc_bytealign_be (w[12], w[13], offset); w[38] = hc_bytealign_be (w[11], w[12], offset); w[37] = hc_bytealign_be (w[10], w[11], offset); w[36] = hc_bytealign_be (w[ 9], w[10], offset); w[35] = hc_bytealign_be (w[ 8], w[ 9], offset); w[34] = hc_bytealign_be (w[ 7], w[ 8], offset); w[33] = hc_bytealign_be (w[ 6], w[ 7], offset); w[32] = hc_bytealign_be (w[ 5], w[ 6], offset); w[31] = hc_bytealign_be (w[ 4], w[ 5], offset); w[30] = hc_bytealign_be (w[ 3], w[ 4], offset); w[29] = hc_bytealign_be (w[ 2], w[ 3], offset); w[28] = hc_bytealign_be (w[ 1], w[ 2], offset); w[27] = hc_bytealign_be (w[ 0], w[ 1], offset); w[26] = hc_bytealign_be ( 0, w[ 0], offset); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_bytealign_be (w[35], w[36], offset); w[62] = hc_bytealign_be (w[34], w[35], offset); w[61] = hc_bytealign_be (w[33], w[34], offset); w[60] = hc_bytealign_be (w[32], w[33], offset); w[59] = hc_bytealign_be (w[31], w[32], offset); w[58] = hc_bytealign_be (w[30], w[31], offset); w[57] = hc_bytealign_be (w[29], w[30], offset); w[56] = hc_bytealign_be (w[28], w[29], offset); w[55] = hc_bytealign_be (w[27], w[28], offset); w[54] = hc_bytealign_be (w[26], w[27], offset); w[53] = hc_bytealign_be (w[25], w[26], offset); w[52] = hc_bytealign_be (w[24], w[25], offset); w[51] = hc_bytealign_be (w[23], w[24], offset); w[50] = hc_bytealign_be (w[22], w[23], offset); w[49] = hc_bytealign_be (w[21], w[22], offset); w[48] = hc_bytealign_be (w[20], w[21], offset); w[47] = hc_bytealign_be (w[19], w[20], offset); w[46] = hc_bytealign_be (w[18], w[19], offset); w[45] = hc_bytealign_be (w[17], w[18], offset); w[44] = hc_bytealign_be (w[16], w[17], offset); w[43] = hc_bytealign_be (w[15], w[16], offset); w[42] = hc_bytealign_be (w[14], w[15], offset); w[41] = hc_bytealign_be (w[13], w[14], offset); w[40] = hc_bytealign_be (w[12], w[13], offset); w[39] = hc_bytealign_be (w[11], w[12], offset); w[38] = hc_bytealign_be (w[10], w[11], offset); w[37] = hc_bytealign_be (w[ 9], w[10], offset); w[36] = hc_bytealign_be (w[ 8], w[ 9], offset); w[35] = hc_bytealign_be (w[ 7], w[ 8], offset); w[34] = hc_bytealign_be (w[ 6], w[ 7], offset); w[33] = hc_bytealign_be (w[ 5], w[ 6], offset); w[32] = hc_bytealign_be (w[ 4], w[ 5], offset); w[31] = hc_bytealign_be (w[ 3], w[ 4], offset); w[30] = hc_bytealign_be (w[ 2], w[ 3], offset); w[29] = hc_bytealign_be (w[ 1], w[ 2], offset); w[28] = hc_bytealign_be (w[ 0], w[ 1], offset); w[27] = hc_bytealign_be ( 0, w[ 0], offset); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_bytealign_be (w[34], w[35], offset); w[62] = hc_bytealign_be (w[33], w[34], offset); w[61] = hc_bytealign_be (w[32], w[33], offset); w[60] = hc_bytealign_be (w[31], w[32], offset); w[59] = hc_bytealign_be (w[30], w[31], offset); w[58] = hc_bytealign_be (w[29], w[30], offset); w[57] = hc_bytealign_be (w[28], w[29], offset); w[56] = hc_bytealign_be (w[27], w[28], offset); w[55] = hc_bytealign_be (w[26], w[27], offset); w[54] = hc_bytealign_be (w[25], w[26], offset); w[53] = hc_bytealign_be (w[24], w[25], offset); w[52] = hc_bytealign_be (w[23], w[24], offset); w[51] = hc_bytealign_be (w[22], w[23], offset); w[50] = hc_bytealign_be (w[21], w[22], offset); w[49] = hc_bytealign_be (w[20], w[21], offset); w[48] = hc_bytealign_be (w[19], w[20], offset); w[47] = hc_bytealign_be (w[18], w[19], offset); w[46] = hc_bytealign_be (w[17], w[18], offset); w[45] = hc_bytealign_be (w[16], w[17], offset); w[44] = hc_bytealign_be (w[15], w[16], offset); w[43] = hc_bytealign_be (w[14], w[15], offset); w[42] = hc_bytealign_be (w[13], w[14], offset); w[41] = hc_bytealign_be (w[12], w[13], offset); w[40] = hc_bytealign_be (w[11], w[12], offset); w[39] = hc_bytealign_be (w[10], w[11], offset); w[38] = hc_bytealign_be (w[ 9], w[10], offset); w[37] = hc_bytealign_be (w[ 8], w[ 9], offset); w[36] = hc_bytealign_be (w[ 7], w[ 8], offset); w[35] = hc_bytealign_be (w[ 6], w[ 7], offset); w[34] = hc_bytealign_be (w[ 5], w[ 6], offset); w[33] = hc_bytealign_be (w[ 4], w[ 5], offset); w[32] = hc_bytealign_be (w[ 3], w[ 4], offset); w[31] = hc_bytealign_be (w[ 2], w[ 3], offset); w[30] = hc_bytealign_be (w[ 1], w[ 2], offset); w[29] = hc_bytealign_be (w[ 0], w[ 1], offset); w[28] = hc_bytealign_be ( 0, w[ 0], offset); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_bytealign_be (w[33], w[34], offset); w[62] = hc_bytealign_be (w[32], w[33], offset); w[61] = hc_bytealign_be (w[31], w[32], offset); w[60] = hc_bytealign_be (w[30], w[31], offset); w[59] = hc_bytealign_be (w[29], w[30], offset); w[58] = hc_bytealign_be (w[28], w[29], offset); w[57] = hc_bytealign_be (w[27], w[28], offset); w[56] = hc_bytealign_be (w[26], w[27], offset); w[55] = hc_bytealign_be (w[25], w[26], offset); w[54] = hc_bytealign_be (w[24], w[25], offset); w[53] = hc_bytealign_be (w[23], w[24], offset); w[52] = hc_bytealign_be (w[22], w[23], offset); w[51] = hc_bytealign_be (w[21], w[22], offset); w[50] = hc_bytealign_be (w[20], w[21], offset); w[49] = hc_bytealign_be (w[19], w[20], offset); w[48] = hc_bytealign_be (w[18], w[19], offset); w[47] = hc_bytealign_be (w[17], w[18], offset); w[46] = hc_bytealign_be (w[16], w[17], offset); w[45] = hc_bytealign_be (w[15], w[16], offset); w[44] = hc_bytealign_be (w[14], w[15], offset); w[43] = hc_bytealign_be (w[13], w[14], offset); w[42] = hc_bytealign_be (w[12], w[13], offset); w[41] = hc_bytealign_be (w[11], w[12], offset); w[40] = hc_bytealign_be (w[10], w[11], offset); w[39] = hc_bytealign_be (w[ 9], w[10], offset); w[38] = hc_bytealign_be (w[ 8], w[ 9], offset); w[37] = hc_bytealign_be (w[ 7], w[ 8], offset); w[36] = hc_bytealign_be (w[ 6], w[ 7], offset); w[35] = hc_bytealign_be (w[ 5], w[ 6], offset); w[34] = hc_bytealign_be (w[ 4], w[ 5], offset); w[33] = hc_bytealign_be (w[ 3], w[ 4], offset); w[32] = hc_bytealign_be (w[ 2], w[ 3], offset); w[31] = hc_bytealign_be (w[ 1], w[ 2], offset); w[30] = hc_bytealign_be (w[ 0], w[ 1], offset); w[29] = hc_bytealign_be ( 0, w[ 0], offset); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_bytealign_be (w[32], w[33], offset); w[62] = hc_bytealign_be (w[31], w[32], offset); w[61] = hc_bytealign_be (w[30], w[31], offset); w[60] = hc_bytealign_be (w[29], w[30], offset); w[59] = hc_bytealign_be (w[28], w[29], offset); w[58] = hc_bytealign_be (w[27], w[28], offset); w[57] = hc_bytealign_be (w[26], w[27], offset); w[56] = hc_bytealign_be (w[25], w[26], offset); w[55] = hc_bytealign_be (w[24], w[25], offset); w[54] = hc_bytealign_be (w[23], w[24], offset); w[53] = hc_bytealign_be (w[22], w[23], offset); w[52] = hc_bytealign_be (w[21], w[22], offset); w[51] = hc_bytealign_be (w[20], w[21], offset); w[50] = hc_bytealign_be (w[19], w[20], offset); w[49] = hc_bytealign_be (w[18], w[19], offset); w[48] = hc_bytealign_be (w[17], w[18], offset); w[47] = hc_bytealign_be (w[16], w[17], offset); w[46] = hc_bytealign_be (w[15], w[16], offset); w[45] = hc_bytealign_be (w[14], w[15], offset); w[44] = hc_bytealign_be (w[13], w[14], offset); w[43] = hc_bytealign_be (w[12], w[13], offset); w[42] = hc_bytealign_be (w[11], w[12], offset); w[41] = hc_bytealign_be (w[10], w[11], offset); w[40] = hc_bytealign_be (w[ 9], w[10], offset); w[39] = hc_bytealign_be (w[ 8], w[ 9], offset); w[38] = hc_bytealign_be (w[ 7], w[ 8], offset); w[37] = hc_bytealign_be (w[ 6], w[ 7], offset); w[36] = hc_bytealign_be (w[ 5], w[ 6], offset); w[35] = hc_bytealign_be (w[ 4], w[ 5], offset); w[34] = hc_bytealign_be (w[ 3], w[ 4], offset); w[33] = hc_bytealign_be (w[ 2], w[ 3], offset); w[32] = hc_bytealign_be (w[ 1], w[ 2], offset); w[31] = hc_bytealign_be (w[ 0], w[ 1], offset); w[30] = hc_bytealign_be ( 0, w[ 0], offset); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_bytealign_be (w[31], w[32], offset); w[62] = hc_bytealign_be (w[30], w[31], offset); w[61] = hc_bytealign_be (w[29], w[30], offset); w[60] = hc_bytealign_be (w[28], w[29], offset); w[59] = hc_bytealign_be (w[27], w[28], offset); w[58] = hc_bytealign_be (w[26], w[27], offset); w[57] = hc_bytealign_be (w[25], w[26], offset); w[56] = hc_bytealign_be (w[24], w[25], offset); w[55] = hc_bytealign_be (w[23], w[24], offset); w[54] = hc_bytealign_be (w[22], w[23], offset); w[53] = hc_bytealign_be (w[21], w[22], offset); w[52] = hc_bytealign_be (w[20], w[21], offset); w[51] = hc_bytealign_be (w[19], w[20], offset); w[50] = hc_bytealign_be (w[18], w[19], offset); w[49] = hc_bytealign_be (w[17], w[18], offset); w[48] = hc_bytealign_be (w[16], w[17], offset); w[47] = hc_bytealign_be (w[15], w[16], offset); w[46] = hc_bytealign_be (w[14], w[15], offset); w[45] = hc_bytealign_be (w[13], w[14], offset); w[44] = hc_bytealign_be (w[12], w[13], offset); w[43] = hc_bytealign_be (w[11], w[12], offset); w[42] = hc_bytealign_be (w[10], w[11], offset); w[41] = hc_bytealign_be (w[ 9], w[10], offset); w[40] = hc_bytealign_be (w[ 8], w[ 9], offset); w[39] = hc_bytealign_be (w[ 7], w[ 8], offset); w[38] = hc_bytealign_be (w[ 6], w[ 7], offset); w[37] = hc_bytealign_be (w[ 5], w[ 6], offset); w[36] = hc_bytealign_be (w[ 4], w[ 5], offset); w[35] = hc_bytealign_be (w[ 3], w[ 4], offset); w[34] = hc_bytealign_be (w[ 2], w[ 3], offset); w[33] = hc_bytealign_be (w[ 1], w[ 2], offset); w[32] = hc_bytealign_be (w[ 0], w[ 1], offset); w[31] = hc_bytealign_be ( 0, w[ 0], offset); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_bytealign_be (w[30], w[31], offset); w[62] = hc_bytealign_be (w[29], w[30], offset); w[61] = hc_bytealign_be (w[28], w[29], offset); w[60] = hc_bytealign_be (w[27], w[28], offset); w[59] = hc_bytealign_be (w[26], w[27], offset); w[58] = hc_bytealign_be (w[25], w[26], offset); w[57] = hc_bytealign_be (w[24], w[25], offset); w[56] = hc_bytealign_be (w[23], w[24], offset); w[55] = hc_bytealign_be (w[22], w[23], offset); w[54] = hc_bytealign_be (w[21], w[22], offset); w[53] = hc_bytealign_be (w[20], w[21], offset); w[52] = hc_bytealign_be (w[19], w[20], offset); w[51] = hc_bytealign_be (w[18], w[19], offset); w[50] = hc_bytealign_be (w[17], w[18], offset); w[49] = hc_bytealign_be (w[16], w[17], offset); w[48] = hc_bytealign_be (w[15], w[16], offset); w[47] = hc_bytealign_be (w[14], w[15], offset); w[46] = hc_bytealign_be (w[13], w[14], offset); w[45] = hc_bytealign_be (w[12], w[13], offset); w[44] = hc_bytealign_be (w[11], w[12], offset); w[43] = hc_bytealign_be (w[10], w[11], offset); w[42] = hc_bytealign_be (w[ 9], w[10], offset); w[41] = hc_bytealign_be (w[ 8], w[ 9], offset); w[40] = hc_bytealign_be (w[ 7], w[ 8], offset); w[39] = hc_bytealign_be (w[ 6], w[ 7], offset); w[38] = hc_bytealign_be (w[ 5], w[ 6], offset); w[37] = hc_bytealign_be (w[ 4], w[ 5], offset); w[36] = hc_bytealign_be (w[ 3], w[ 4], offset); w[35] = hc_bytealign_be (w[ 2], w[ 3], offset); w[34] = hc_bytealign_be (w[ 1], w[ 2], offset); w[33] = hc_bytealign_be (w[ 0], w[ 1], offset); w[32] = hc_bytealign_be ( 0, w[ 0], offset); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_bytealign_be (w[29], w[30], offset); w[62] = hc_bytealign_be (w[28], w[29], offset); w[61] = hc_bytealign_be (w[27], w[28], offset); w[60] = hc_bytealign_be (w[26], w[27], offset); w[59] = hc_bytealign_be (w[25], w[26], offset); w[58] = hc_bytealign_be (w[24], w[25], offset); w[57] = hc_bytealign_be (w[23], w[24], offset); w[56] = hc_bytealign_be (w[22], w[23], offset); w[55] = hc_bytealign_be (w[21], w[22], offset); w[54] = hc_bytealign_be (w[20], w[21], offset); w[53] = hc_bytealign_be (w[19], w[20], offset); w[52] = hc_bytealign_be (w[18], w[19], offset); w[51] = hc_bytealign_be (w[17], w[18], offset); w[50] = hc_bytealign_be (w[16], w[17], offset); w[49] = hc_bytealign_be (w[15], w[16], offset); w[48] = hc_bytealign_be (w[14], w[15], offset); w[47] = hc_bytealign_be (w[13], w[14], offset); w[46] = hc_bytealign_be (w[12], w[13], offset); w[45] = hc_bytealign_be (w[11], w[12], offset); w[44] = hc_bytealign_be (w[10], w[11], offset); w[43] = hc_bytealign_be (w[ 9], w[10], offset); w[42] = hc_bytealign_be (w[ 8], w[ 9], offset); w[41] = hc_bytealign_be (w[ 7], w[ 8], offset); w[40] = hc_bytealign_be (w[ 6], w[ 7], offset); w[39] = hc_bytealign_be (w[ 5], w[ 6], offset); w[38] = hc_bytealign_be (w[ 4], w[ 5], offset); w[37] = hc_bytealign_be (w[ 3], w[ 4], offset); w[36] = hc_bytealign_be (w[ 2], w[ 3], offset); w[35] = hc_bytealign_be (w[ 1], w[ 2], offset); w[34] = hc_bytealign_be (w[ 0], w[ 1], offset); w[33] = hc_bytealign_be ( 0, w[ 0], offset); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_bytealign_be (w[28], w[29], offset); w[62] = hc_bytealign_be (w[27], w[28], offset); w[61] = hc_bytealign_be (w[26], w[27], offset); w[60] = hc_bytealign_be (w[25], w[26], offset); w[59] = hc_bytealign_be (w[24], w[25], offset); w[58] = hc_bytealign_be (w[23], w[24], offset); w[57] = hc_bytealign_be (w[22], w[23], offset); w[56] = hc_bytealign_be (w[21], w[22], offset); w[55] = hc_bytealign_be (w[20], w[21], offset); w[54] = hc_bytealign_be (w[19], w[20], offset); w[53] = hc_bytealign_be (w[18], w[19], offset); w[52] = hc_bytealign_be (w[17], w[18], offset); w[51] = hc_bytealign_be (w[16], w[17], offset); w[50] = hc_bytealign_be (w[15], w[16], offset); w[49] = hc_bytealign_be (w[14], w[15], offset); w[48] = hc_bytealign_be (w[13], w[14], offset); w[47] = hc_bytealign_be (w[12], w[13], offset); w[46] = hc_bytealign_be (w[11], w[12], offset); w[45] = hc_bytealign_be (w[10], w[11], offset); w[44] = hc_bytealign_be (w[ 9], w[10], offset); w[43] = hc_bytealign_be (w[ 8], w[ 9], offset); w[42] = hc_bytealign_be (w[ 7], w[ 8], offset); w[41] = hc_bytealign_be (w[ 6], w[ 7], offset); w[40] = hc_bytealign_be (w[ 5], w[ 6], offset); w[39] = hc_bytealign_be (w[ 4], w[ 5], offset); w[38] = hc_bytealign_be (w[ 3], w[ 4], offset); w[37] = hc_bytealign_be (w[ 2], w[ 3], offset); w[36] = hc_bytealign_be (w[ 1], w[ 2], offset); w[35] = hc_bytealign_be (w[ 0], w[ 1], offset); w[34] = hc_bytealign_be ( 0, w[ 0], offset); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_bytealign_be (w[27], w[28], offset); w[62] = hc_bytealign_be (w[26], w[27], offset); w[61] = hc_bytealign_be (w[25], w[26], offset); w[60] = hc_bytealign_be (w[24], w[25], offset); w[59] = hc_bytealign_be (w[23], w[24], offset); w[58] = hc_bytealign_be (w[22], w[23], offset); w[57] = hc_bytealign_be (w[21], w[22], offset); w[56] = hc_bytealign_be (w[20], w[21], offset); w[55] = hc_bytealign_be (w[19], w[20], offset); w[54] = hc_bytealign_be (w[18], w[19], offset); w[53] = hc_bytealign_be (w[17], w[18], offset); w[52] = hc_bytealign_be (w[16], w[17], offset); w[51] = hc_bytealign_be (w[15], w[16], offset); w[50] = hc_bytealign_be (w[14], w[15], offset); w[49] = hc_bytealign_be (w[13], w[14], offset); w[48] = hc_bytealign_be (w[12], w[13], offset); w[47] = hc_bytealign_be (w[11], w[12], offset); w[46] = hc_bytealign_be (w[10], w[11], offset); w[45] = hc_bytealign_be (w[ 9], w[10], offset); w[44] = hc_bytealign_be (w[ 8], w[ 9], offset); w[43] = hc_bytealign_be (w[ 7], w[ 8], offset); w[42] = hc_bytealign_be (w[ 6], w[ 7], offset); w[41] = hc_bytealign_be (w[ 5], w[ 6], offset); w[40] = hc_bytealign_be (w[ 4], w[ 5], offset); w[39] = hc_bytealign_be (w[ 3], w[ 4], offset); w[38] = hc_bytealign_be (w[ 2], w[ 3], offset); w[37] = hc_bytealign_be (w[ 1], w[ 2], offset); w[36] = hc_bytealign_be (w[ 0], w[ 1], offset); w[35] = hc_bytealign_be ( 0, w[ 0], offset); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_bytealign_be (w[26], w[27], offset); w[62] = hc_bytealign_be (w[25], w[26], offset); w[61] = hc_bytealign_be (w[24], w[25], offset); w[60] = hc_bytealign_be (w[23], w[24], offset); w[59] = hc_bytealign_be (w[22], w[23], offset); w[58] = hc_bytealign_be (w[21], w[22], offset); w[57] = hc_bytealign_be (w[20], w[21], offset); w[56] = hc_bytealign_be (w[19], w[20], offset); w[55] = hc_bytealign_be (w[18], w[19], offset); w[54] = hc_bytealign_be (w[17], w[18], offset); w[53] = hc_bytealign_be (w[16], w[17], offset); w[52] = hc_bytealign_be (w[15], w[16], offset); w[51] = hc_bytealign_be (w[14], w[15], offset); w[50] = hc_bytealign_be (w[13], w[14], offset); w[49] = hc_bytealign_be (w[12], w[13], offset); w[48] = hc_bytealign_be (w[11], w[12], offset); w[47] = hc_bytealign_be (w[10], w[11], offset); w[46] = hc_bytealign_be (w[ 9], w[10], offset); w[45] = hc_bytealign_be (w[ 8], w[ 9], offset); w[44] = hc_bytealign_be (w[ 7], w[ 8], offset); w[43] = hc_bytealign_be (w[ 6], w[ 7], offset); w[42] = hc_bytealign_be (w[ 5], w[ 6], offset); w[41] = hc_bytealign_be (w[ 4], w[ 5], offset); w[40] = hc_bytealign_be (w[ 3], w[ 4], offset); w[39] = hc_bytealign_be (w[ 2], w[ 3], offset); w[38] = hc_bytealign_be (w[ 1], w[ 2], offset); w[37] = hc_bytealign_be (w[ 0], w[ 1], offset); w[36] = hc_bytealign_be ( 0, w[ 0], offset); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_bytealign_be (w[25], w[26], offset); w[62] = hc_bytealign_be (w[24], w[25], offset); w[61] = hc_bytealign_be (w[23], w[24], offset); w[60] = hc_bytealign_be (w[22], w[23], offset); w[59] = hc_bytealign_be (w[21], w[22], offset); w[58] = hc_bytealign_be (w[20], w[21], offset); w[57] = hc_bytealign_be (w[19], w[20], offset); w[56] = hc_bytealign_be (w[18], w[19], offset); w[55] = hc_bytealign_be (w[17], w[18], offset); w[54] = hc_bytealign_be (w[16], w[17], offset); w[53] = hc_bytealign_be (w[15], w[16], offset); w[52] = hc_bytealign_be (w[14], w[15], offset); w[51] = hc_bytealign_be (w[13], w[14], offset); w[50] = hc_bytealign_be (w[12], w[13], offset); w[49] = hc_bytealign_be (w[11], w[12], offset); w[48] = hc_bytealign_be (w[10], w[11], offset); w[47] = hc_bytealign_be (w[ 9], w[10], offset); w[46] = hc_bytealign_be (w[ 8], w[ 9], offset); w[45] = hc_bytealign_be (w[ 7], w[ 8], offset); w[44] = hc_bytealign_be (w[ 6], w[ 7], offset); w[43] = hc_bytealign_be (w[ 5], w[ 6], offset); w[42] = hc_bytealign_be (w[ 4], w[ 5], offset); w[41] = hc_bytealign_be (w[ 3], w[ 4], offset); w[40] = hc_bytealign_be (w[ 2], w[ 3], offset); w[39] = hc_bytealign_be (w[ 1], w[ 2], offset); w[38] = hc_bytealign_be (w[ 0], w[ 1], offset); w[37] = hc_bytealign_be ( 0, w[ 0], offset); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_bytealign_be (w[24], w[25], offset); w[62] = hc_bytealign_be (w[23], w[24], offset); w[61] = hc_bytealign_be (w[22], w[23], offset); w[60] = hc_bytealign_be (w[21], w[22], offset); w[59] = hc_bytealign_be (w[20], w[21], offset); w[58] = hc_bytealign_be (w[19], w[20], offset); w[57] = hc_bytealign_be (w[18], w[19], offset); w[56] = hc_bytealign_be (w[17], w[18], offset); w[55] = hc_bytealign_be (w[16], w[17], offset); w[54] = hc_bytealign_be (w[15], w[16], offset); w[53] = hc_bytealign_be (w[14], w[15], offset); w[52] = hc_bytealign_be (w[13], w[14], offset); w[51] = hc_bytealign_be (w[12], w[13], offset); w[50] = hc_bytealign_be (w[11], w[12], offset); w[49] = hc_bytealign_be (w[10], w[11], offset); w[48] = hc_bytealign_be (w[ 9], w[10], offset); w[47] = hc_bytealign_be (w[ 8], w[ 9], offset); w[46] = hc_bytealign_be (w[ 7], w[ 8], offset); w[45] = hc_bytealign_be (w[ 6], w[ 7], offset); w[44] = hc_bytealign_be (w[ 5], w[ 6], offset); w[43] = hc_bytealign_be (w[ 4], w[ 5], offset); w[42] = hc_bytealign_be (w[ 3], w[ 4], offset); w[41] = hc_bytealign_be (w[ 2], w[ 3], offset); w[40] = hc_bytealign_be (w[ 1], w[ 2], offset); w[39] = hc_bytealign_be (w[ 0], w[ 1], offset); w[38] = hc_bytealign_be ( 0, w[ 0], offset); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_bytealign_be (w[23], w[24], offset); w[62] = hc_bytealign_be (w[22], w[23], offset); w[61] = hc_bytealign_be (w[21], w[22], offset); w[60] = hc_bytealign_be (w[20], w[21], offset); w[59] = hc_bytealign_be (w[19], w[20], offset); w[58] = hc_bytealign_be (w[18], w[19], offset); w[57] = hc_bytealign_be (w[17], w[18], offset); w[56] = hc_bytealign_be (w[16], w[17], offset); w[55] = hc_bytealign_be (w[15], w[16], offset); w[54] = hc_bytealign_be (w[14], w[15], offset); w[53] = hc_bytealign_be (w[13], w[14], offset); w[52] = hc_bytealign_be (w[12], w[13], offset); w[51] = hc_bytealign_be (w[11], w[12], offset); w[50] = hc_bytealign_be (w[10], w[11], offset); w[49] = hc_bytealign_be (w[ 9], w[10], offset); w[48] = hc_bytealign_be (w[ 8], w[ 9], offset); w[47] = hc_bytealign_be (w[ 7], w[ 8], offset); w[46] = hc_bytealign_be (w[ 6], w[ 7], offset); w[45] = hc_bytealign_be (w[ 5], w[ 6], offset); w[44] = hc_bytealign_be (w[ 4], w[ 5], offset); w[43] = hc_bytealign_be (w[ 3], w[ 4], offset); w[42] = hc_bytealign_be (w[ 2], w[ 3], offset); w[41] = hc_bytealign_be (w[ 1], w[ 2], offset); w[40] = hc_bytealign_be (w[ 0], w[ 1], offset); w[39] = hc_bytealign_be ( 0, w[ 0], offset); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_bytealign_be (w[22], w[23], offset); w[62] = hc_bytealign_be (w[21], w[22], offset); w[61] = hc_bytealign_be (w[20], w[21], offset); w[60] = hc_bytealign_be (w[19], w[20], offset); w[59] = hc_bytealign_be (w[18], w[19], offset); w[58] = hc_bytealign_be (w[17], w[18], offset); w[57] = hc_bytealign_be (w[16], w[17], offset); w[56] = hc_bytealign_be (w[15], w[16], offset); w[55] = hc_bytealign_be (w[14], w[15], offset); w[54] = hc_bytealign_be (w[13], w[14], offset); w[53] = hc_bytealign_be (w[12], w[13], offset); w[52] = hc_bytealign_be (w[11], w[12], offset); w[51] = hc_bytealign_be (w[10], w[11], offset); w[50] = hc_bytealign_be (w[ 9], w[10], offset); w[49] = hc_bytealign_be (w[ 8], w[ 9], offset); w[48] = hc_bytealign_be (w[ 7], w[ 8], offset); w[47] = hc_bytealign_be (w[ 6], w[ 7], offset); w[46] = hc_bytealign_be (w[ 5], w[ 6], offset); w[45] = hc_bytealign_be (w[ 4], w[ 5], offset); w[44] = hc_bytealign_be (w[ 3], w[ 4], offset); w[43] = hc_bytealign_be (w[ 2], w[ 3], offset); w[42] = hc_bytealign_be (w[ 1], w[ 2], offset); w[41] = hc_bytealign_be (w[ 0], w[ 1], offset); w[40] = hc_bytealign_be ( 0, w[ 0], offset); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_bytealign_be (w[21], w[22], offset); w[62] = hc_bytealign_be (w[20], w[21], offset); w[61] = hc_bytealign_be (w[19], w[20], offset); w[60] = hc_bytealign_be (w[18], w[19], offset); w[59] = hc_bytealign_be (w[17], w[18], offset); w[58] = hc_bytealign_be (w[16], w[17], offset); w[57] = hc_bytealign_be (w[15], w[16], offset); w[56] = hc_bytealign_be (w[14], w[15], offset); w[55] = hc_bytealign_be (w[13], w[14], offset); w[54] = hc_bytealign_be (w[12], w[13], offset); w[53] = hc_bytealign_be (w[11], w[12], offset); w[52] = hc_bytealign_be (w[10], w[11], offset); w[51] = hc_bytealign_be (w[ 9], w[10], offset); w[50] = hc_bytealign_be (w[ 8], w[ 9], offset); w[49] = hc_bytealign_be (w[ 7], w[ 8], offset); w[48] = hc_bytealign_be (w[ 6], w[ 7], offset); w[47] = hc_bytealign_be (w[ 5], w[ 6], offset); w[46] = hc_bytealign_be (w[ 4], w[ 5], offset); w[45] = hc_bytealign_be (w[ 3], w[ 4], offset); w[44] = hc_bytealign_be (w[ 2], w[ 3], offset); w[43] = hc_bytealign_be (w[ 1], w[ 2], offset); w[42] = hc_bytealign_be (w[ 0], w[ 1], offset); w[41] = hc_bytealign_be ( 0, w[ 0], offset); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_bytealign_be (w[20], w[21], offset); w[62] = hc_bytealign_be (w[19], w[20], offset); w[61] = hc_bytealign_be (w[18], w[19], offset); w[60] = hc_bytealign_be (w[17], w[18], offset); w[59] = hc_bytealign_be (w[16], w[17], offset); w[58] = hc_bytealign_be (w[15], w[16], offset); w[57] = hc_bytealign_be (w[14], w[15], offset); w[56] = hc_bytealign_be (w[13], w[14], offset); w[55] = hc_bytealign_be (w[12], w[13], offset); w[54] = hc_bytealign_be (w[11], w[12], offset); w[53] = hc_bytealign_be (w[10], w[11], offset); w[52] = hc_bytealign_be (w[ 9], w[10], offset); w[51] = hc_bytealign_be (w[ 8], w[ 9], offset); w[50] = hc_bytealign_be (w[ 7], w[ 8], offset); w[49] = hc_bytealign_be (w[ 6], w[ 7], offset); w[48] = hc_bytealign_be (w[ 5], w[ 6], offset); w[47] = hc_bytealign_be (w[ 4], w[ 5], offset); w[46] = hc_bytealign_be (w[ 3], w[ 4], offset); w[45] = hc_bytealign_be (w[ 2], w[ 3], offset); w[44] = hc_bytealign_be (w[ 1], w[ 2], offset); w[43] = hc_bytealign_be (w[ 0], w[ 1], offset); w[42] = hc_bytealign_be ( 0, w[ 0], offset); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_bytealign_be (w[19], w[20], offset); w[62] = hc_bytealign_be (w[18], w[19], offset); w[61] = hc_bytealign_be (w[17], w[18], offset); w[60] = hc_bytealign_be (w[16], w[17], offset); w[59] = hc_bytealign_be (w[15], w[16], offset); w[58] = hc_bytealign_be (w[14], w[15], offset); w[57] = hc_bytealign_be (w[13], w[14], offset); w[56] = hc_bytealign_be (w[12], w[13], offset); w[55] = hc_bytealign_be (w[11], w[12], offset); w[54] = hc_bytealign_be (w[10], w[11], offset); w[53] = hc_bytealign_be (w[ 9], w[10], offset); w[52] = hc_bytealign_be (w[ 8], w[ 9], offset); w[51] = hc_bytealign_be (w[ 7], w[ 8], offset); w[50] = hc_bytealign_be (w[ 6], w[ 7], offset); w[49] = hc_bytealign_be (w[ 5], w[ 6], offset); w[48] = hc_bytealign_be (w[ 4], w[ 5], offset); w[47] = hc_bytealign_be (w[ 3], w[ 4], offset); w[46] = hc_bytealign_be (w[ 2], w[ 3], offset); w[45] = hc_bytealign_be (w[ 1], w[ 2], offset); w[44] = hc_bytealign_be (w[ 0], w[ 1], offset); w[43] = hc_bytealign_be ( 0, w[ 0], offset); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_bytealign_be (w[18], w[19], offset); w[62] = hc_bytealign_be (w[17], w[18], offset); w[61] = hc_bytealign_be (w[16], w[17], offset); w[60] = hc_bytealign_be (w[15], w[16], offset); w[59] = hc_bytealign_be (w[14], w[15], offset); w[58] = hc_bytealign_be (w[13], w[14], offset); w[57] = hc_bytealign_be (w[12], w[13], offset); w[56] = hc_bytealign_be (w[11], w[12], offset); w[55] = hc_bytealign_be (w[10], w[11], offset); w[54] = hc_bytealign_be (w[ 9], w[10], offset); w[53] = hc_bytealign_be (w[ 8], w[ 9], offset); w[52] = hc_bytealign_be (w[ 7], w[ 8], offset); w[51] = hc_bytealign_be (w[ 6], w[ 7], offset); w[50] = hc_bytealign_be (w[ 5], w[ 6], offset); w[49] = hc_bytealign_be (w[ 4], w[ 5], offset); w[48] = hc_bytealign_be (w[ 3], w[ 4], offset); w[47] = hc_bytealign_be (w[ 2], w[ 3], offset); w[46] = hc_bytealign_be (w[ 1], w[ 2], offset); w[45] = hc_bytealign_be (w[ 0], w[ 1], offset); w[44] = hc_bytealign_be ( 0, w[ 0], offset); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_bytealign_be (w[17], w[18], offset); w[62] = hc_bytealign_be (w[16], w[17], offset); w[61] = hc_bytealign_be (w[15], w[16], offset); w[60] = hc_bytealign_be (w[14], w[15], offset); w[59] = hc_bytealign_be (w[13], w[14], offset); w[58] = hc_bytealign_be (w[12], w[13], offset); w[57] = hc_bytealign_be (w[11], w[12], offset); w[56] = hc_bytealign_be (w[10], w[11], offset); w[55] = hc_bytealign_be (w[ 9], w[10], offset); w[54] = hc_bytealign_be (w[ 8], w[ 9], offset); w[53] = hc_bytealign_be (w[ 7], w[ 8], offset); w[52] = hc_bytealign_be (w[ 6], w[ 7], offset); w[51] = hc_bytealign_be (w[ 5], w[ 6], offset); w[50] = hc_bytealign_be (w[ 4], w[ 5], offset); w[49] = hc_bytealign_be (w[ 3], w[ 4], offset); w[48] = hc_bytealign_be (w[ 2], w[ 3], offset); w[47] = hc_bytealign_be (w[ 1], w[ 2], offset); w[46] = hc_bytealign_be (w[ 0], w[ 1], offset); w[45] = hc_bytealign_be ( 0, w[ 0], offset); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_bytealign_be (w[16], w[17], offset); w[62] = hc_bytealign_be (w[15], w[16], offset); w[61] = hc_bytealign_be (w[14], w[15], offset); w[60] = hc_bytealign_be (w[13], w[14], offset); w[59] = hc_bytealign_be (w[12], w[13], offset); w[58] = hc_bytealign_be (w[11], w[12], offset); w[57] = hc_bytealign_be (w[10], w[11], offset); w[56] = hc_bytealign_be (w[ 9], w[10], offset); w[55] = hc_bytealign_be (w[ 8], w[ 9], offset); w[54] = hc_bytealign_be (w[ 7], w[ 8], offset); w[53] = hc_bytealign_be (w[ 6], w[ 7], offset); w[52] = hc_bytealign_be (w[ 5], w[ 6], offset); w[51] = hc_bytealign_be (w[ 4], w[ 5], offset); w[50] = hc_bytealign_be (w[ 3], w[ 4], offset); w[49] = hc_bytealign_be (w[ 2], w[ 3], offset); w[48] = hc_bytealign_be (w[ 1], w[ 2], offset); w[47] = hc_bytealign_be (w[ 0], w[ 1], offset); w[46] = hc_bytealign_be ( 0, w[ 0], offset); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_bytealign_be (w[15], w[16], offset); w[62] = hc_bytealign_be (w[14], w[15], offset); w[61] = hc_bytealign_be (w[13], w[14], offset); w[60] = hc_bytealign_be (w[12], w[13], offset); w[59] = hc_bytealign_be (w[11], w[12], offset); w[58] = hc_bytealign_be (w[10], w[11], offset); w[57] = hc_bytealign_be (w[ 9], w[10], offset); w[56] = hc_bytealign_be (w[ 8], w[ 9], offset); w[55] = hc_bytealign_be (w[ 7], w[ 8], offset); w[54] = hc_bytealign_be (w[ 6], w[ 7], offset); w[53] = hc_bytealign_be (w[ 5], w[ 6], offset); w[52] = hc_bytealign_be (w[ 4], w[ 5], offset); w[51] = hc_bytealign_be (w[ 3], w[ 4], offset); w[50] = hc_bytealign_be (w[ 2], w[ 3], offset); w[49] = hc_bytealign_be (w[ 1], w[ 2], offset); w[48] = hc_bytealign_be (w[ 0], w[ 1], offset); w[47] = hc_bytealign_be ( 0, w[ 0], offset); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_bytealign_be (w[14], w[15], offset); w[62] = hc_bytealign_be (w[13], w[14], offset); w[61] = hc_bytealign_be (w[12], w[13], offset); w[60] = hc_bytealign_be (w[11], w[12], offset); w[59] = hc_bytealign_be (w[10], w[11], offset); w[58] = hc_bytealign_be (w[ 9], w[10], offset); w[57] = hc_bytealign_be (w[ 8], w[ 9], offset); w[56] = hc_bytealign_be (w[ 7], w[ 8], offset); w[55] = hc_bytealign_be (w[ 6], w[ 7], offset); w[54] = hc_bytealign_be (w[ 5], w[ 6], offset); w[53] = hc_bytealign_be (w[ 4], w[ 5], offset); w[52] = hc_bytealign_be (w[ 3], w[ 4], offset); w[51] = hc_bytealign_be (w[ 2], w[ 3], offset); w[50] = hc_bytealign_be (w[ 1], w[ 2], offset); w[49] = hc_bytealign_be (w[ 0], w[ 1], offset); w[48] = hc_bytealign_be ( 0, w[ 0], offset); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_bytealign_be (w[13], w[14], offset); w[62] = hc_bytealign_be (w[12], w[13], offset); w[61] = hc_bytealign_be (w[11], w[12], offset); w[60] = hc_bytealign_be (w[10], w[11], offset); w[59] = hc_bytealign_be (w[ 9], w[10], offset); w[58] = hc_bytealign_be (w[ 8], w[ 9], offset); w[57] = hc_bytealign_be (w[ 7], w[ 8], offset); w[56] = hc_bytealign_be (w[ 6], w[ 7], offset); w[55] = hc_bytealign_be (w[ 5], w[ 6], offset); w[54] = hc_bytealign_be (w[ 4], w[ 5], offset); w[53] = hc_bytealign_be (w[ 3], w[ 4], offset); w[52] = hc_bytealign_be (w[ 2], w[ 3], offset); w[51] = hc_bytealign_be (w[ 1], w[ 2], offset); w[50] = hc_bytealign_be (w[ 0], w[ 1], offset); w[49] = hc_bytealign_be ( 0, w[ 0], offset); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_bytealign_be (w[12], w[13], offset); w[62] = hc_bytealign_be (w[11], w[12], offset); w[61] = hc_bytealign_be (w[10], w[11], offset); w[60] = hc_bytealign_be (w[ 9], w[10], offset); w[59] = hc_bytealign_be (w[ 8], w[ 9], offset); w[58] = hc_bytealign_be (w[ 7], w[ 8], offset); w[57] = hc_bytealign_be (w[ 6], w[ 7], offset); w[56] = hc_bytealign_be (w[ 5], w[ 6], offset); w[55] = hc_bytealign_be (w[ 4], w[ 5], offset); w[54] = hc_bytealign_be (w[ 3], w[ 4], offset); w[53] = hc_bytealign_be (w[ 2], w[ 3], offset); w[52] = hc_bytealign_be (w[ 1], w[ 2], offset); w[51] = hc_bytealign_be (w[ 0], w[ 1], offset); w[50] = hc_bytealign_be ( 0, w[ 0], offset); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_bytealign_be (w[11], w[12], offset); w[62] = hc_bytealign_be (w[10], w[11], offset); w[61] = hc_bytealign_be (w[ 9], w[10], offset); w[60] = hc_bytealign_be (w[ 8], w[ 9], offset); w[59] = hc_bytealign_be (w[ 7], w[ 8], offset); w[58] = hc_bytealign_be (w[ 6], w[ 7], offset); w[57] = hc_bytealign_be (w[ 5], w[ 6], offset); w[56] = hc_bytealign_be (w[ 4], w[ 5], offset); w[55] = hc_bytealign_be (w[ 3], w[ 4], offset); w[54] = hc_bytealign_be (w[ 2], w[ 3], offset); w[53] = hc_bytealign_be (w[ 1], w[ 2], offset); w[52] = hc_bytealign_be (w[ 0], w[ 1], offset); w[51] = hc_bytealign_be ( 0, w[ 0], offset); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_bytealign_be (w[10], w[11], offset); w[62] = hc_bytealign_be (w[ 9], w[10], offset); w[61] = hc_bytealign_be (w[ 8], w[ 9], offset); w[60] = hc_bytealign_be (w[ 7], w[ 8], offset); w[59] = hc_bytealign_be (w[ 6], w[ 7], offset); w[58] = hc_bytealign_be (w[ 5], w[ 6], offset); w[57] = hc_bytealign_be (w[ 4], w[ 5], offset); w[56] = hc_bytealign_be (w[ 3], w[ 4], offset); w[55] = hc_bytealign_be (w[ 2], w[ 3], offset); w[54] = hc_bytealign_be (w[ 1], w[ 2], offset); w[53] = hc_bytealign_be (w[ 0], w[ 1], offset); w[52] = hc_bytealign_be ( 0, w[ 0], offset); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_bytealign_be (w[ 9], w[10], offset); w[62] = hc_bytealign_be (w[ 8], w[ 9], offset); w[61] = hc_bytealign_be (w[ 7], w[ 8], offset); w[60] = hc_bytealign_be (w[ 6], w[ 7], offset); w[59] = hc_bytealign_be (w[ 5], w[ 6], offset); w[58] = hc_bytealign_be (w[ 4], w[ 5], offset); w[57] = hc_bytealign_be (w[ 3], w[ 4], offset); w[56] = hc_bytealign_be (w[ 2], w[ 3], offset); w[55] = hc_bytealign_be (w[ 1], w[ 2], offset); w[54] = hc_bytealign_be (w[ 0], w[ 1], offset); w[53] = hc_bytealign_be ( 0, w[ 0], offset); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_bytealign_be (w[ 8], w[ 9], offset); w[62] = hc_bytealign_be (w[ 7], w[ 8], offset); w[61] = hc_bytealign_be (w[ 6], w[ 7], offset); w[60] = hc_bytealign_be (w[ 5], w[ 6], offset); w[59] = hc_bytealign_be (w[ 4], w[ 5], offset); w[58] = hc_bytealign_be (w[ 3], w[ 4], offset); w[57] = hc_bytealign_be (w[ 2], w[ 3], offset); w[56] = hc_bytealign_be (w[ 1], w[ 2], offset); w[55] = hc_bytealign_be (w[ 0], w[ 1], offset); w[54] = hc_bytealign_be ( 0, w[ 0], offset); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_bytealign_be (w[ 7], w[ 8], offset); w[62] = hc_bytealign_be (w[ 6], w[ 7], offset); w[61] = hc_bytealign_be (w[ 5], w[ 6], offset); w[60] = hc_bytealign_be (w[ 4], w[ 5], offset); w[59] = hc_bytealign_be (w[ 3], w[ 4], offset); w[58] = hc_bytealign_be (w[ 2], w[ 3], offset); w[57] = hc_bytealign_be (w[ 1], w[ 2], offset); w[56] = hc_bytealign_be (w[ 0], w[ 1], offset); w[55] = hc_bytealign_be ( 0, w[ 0], offset); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_bytealign_be (w[ 6], w[ 7], offset); w[62] = hc_bytealign_be (w[ 5], w[ 6], offset); w[61] = hc_bytealign_be (w[ 4], w[ 5], offset); w[60] = hc_bytealign_be (w[ 3], w[ 4], offset); w[59] = hc_bytealign_be (w[ 2], w[ 3], offset); w[58] = hc_bytealign_be (w[ 1], w[ 2], offset); w[57] = hc_bytealign_be (w[ 0], w[ 1], offset); w[56] = hc_bytealign_be ( 0, w[ 0], offset); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_bytealign_be (w[ 5], w[ 6], offset); w[62] = hc_bytealign_be (w[ 4], w[ 5], offset); w[61] = hc_bytealign_be (w[ 3], w[ 4], offset); w[60] = hc_bytealign_be (w[ 2], w[ 3], offset); w[59] = hc_bytealign_be (w[ 1], w[ 2], offset); w[58] = hc_bytealign_be (w[ 0], w[ 1], offset); w[57] = hc_bytealign_be ( 0, w[ 0], offset); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_bytealign_be (w[ 4], w[ 5], offset); w[62] = hc_bytealign_be (w[ 3], w[ 4], offset); w[61] = hc_bytealign_be (w[ 2], w[ 3], offset); w[60] = hc_bytealign_be (w[ 1], w[ 2], offset); w[59] = hc_bytealign_be (w[ 0], w[ 1], offset); w[58] = hc_bytealign_be ( 0, w[ 0], offset); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_bytealign_be (w[ 3], w[ 4], offset); w[62] = hc_bytealign_be (w[ 2], w[ 3], offset); w[61] = hc_bytealign_be (w[ 1], w[ 2], offset); w[60] = hc_bytealign_be (w[ 0], w[ 1], offset); w[59] = hc_bytealign_be ( 0, w[ 0], offset); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_bytealign_be (w[ 2], w[ 3], offset); w[62] = hc_bytealign_be (w[ 1], w[ 2], offset); w[61] = hc_bytealign_be (w[ 0], w[ 1], offset); w[60] = hc_bytealign_be ( 0, w[ 0], offset); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_bytealign_be (w[ 1], w[ 2], offset); w[62] = hc_bytealign_be (w[ 0], w[ 1], offset); w[61] = hc_bytealign_be ( 0, w[ 0], offset); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_bytealign_be (w[ 0], w[ 1], offset); w[62] = hc_bytealign_be ( 0, w[ 0], offset); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_bytealign_be ( 0, w[ 0], offset); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w[63] = hc_byte_perm (w[63], w[62], selector); w[62] = hc_byte_perm (w[62], w[61], selector); w[61] = hc_byte_perm (w[61], w[60], selector); w[60] = hc_byte_perm (w[60], w[59], selector); w[59] = hc_byte_perm (w[59], w[58], selector); w[58] = hc_byte_perm (w[58], w[57], selector); w[57] = hc_byte_perm (w[57], w[56], selector); w[56] = hc_byte_perm (w[56], w[55], selector); w[55] = hc_byte_perm (w[55], w[54], selector); w[54] = hc_byte_perm (w[54], w[53], selector); w[53] = hc_byte_perm (w[53], w[52], selector); w[52] = hc_byte_perm (w[52], w[51], selector); w[51] = hc_byte_perm (w[51], w[50], selector); w[50] = hc_byte_perm (w[50], w[49], selector); w[49] = hc_byte_perm (w[49], w[48], selector); w[48] = hc_byte_perm (w[48], w[47], selector); w[47] = hc_byte_perm (w[47], w[46], selector); w[46] = hc_byte_perm (w[46], w[45], selector); w[45] = hc_byte_perm (w[45], w[44], selector); w[44] = hc_byte_perm (w[44], w[43], selector); w[43] = hc_byte_perm (w[43], w[42], selector); w[42] = hc_byte_perm (w[42], w[41], selector); w[41] = hc_byte_perm (w[41], w[40], selector); w[40] = hc_byte_perm (w[40], w[39], selector); w[39] = hc_byte_perm (w[39], w[38], selector); w[38] = hc_byte_perm (w[38], w[37], selector); w[37] = hc_byte_perm (w[37], w[36], selector); w[36] = hc_byte_perm (w[36], w[35], selector); w[35] = hc_byte_perm (w[35], w[34], selector); w[34] = hc_byte_perm (w[34], w[33], selector); w[33] = hc_byte_perm (w[33], w[32], selector); w[32] = hc_byte_perm (w[32], w[31], selector); w[31] = hc_byte_perm (w[31], w[30], selector); w[30] = hc_byte_perm (w[30], w[29], selector); w[29] = hc_byte_perm (w[29], w[28], selector); w[28] = hc_byte_perm (w[28], w[27], selector); w[27] = hc_byte_perm (w[27], w[26], selector); w[26] = hc_byte_perm (w[26], w[25], selector); w[25] = hc_byte_perm (w[25], w[24], selector); w[24] = hc_byte_perm (w[24], w[23], selector); w[23] = hc_byte_perm (w[23], w[22], selector); w[22] = hc_byte_perm (w[22], w[21], selector); w[21] = hc_byte_perm (w[21], w[20], selector); w[20] = hc_byte_perm (w[20], w[19], selector); w[19] = hc_byte_perm (w[19], w[18], selector); w[18] = hc_byte_perm (w[18], w[17], selector); w[17] = hc_byte_perm (w[17], w[16], selector); w[16] = hc_byte_perm (w[16], w[15], selector); w[15] = hc_byte_perm (w[15], w[14], selector); w[14] = hc_byte_perm (w[14], w[13], selector); w[13] = hc_byte_perm (w[13], w[12], selector); w[12] = hc_byte_perm (w[12], w[11], selector); w[11] = hc_byte_perm (w[11], w[10], selector); w[10] = hc_byte_perm (w[10], w[ 9], selector); w[ 9] = hc_byte_perm (w[ 9], w[ 8], selector); w[ 8] = hc_byte_perm (w[ 8], w[ 7], selector); w[ 7] = hc_byte_perm (w[ 7], w[ 6], selector); w[ 6] = hc_byte_perm (w[ 6], w[ 5], selector); w[ 5] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 4] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 3] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 2] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 1] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 0] = hc_byte_perm (w[ 0], 0, selector); break; case 1: w[63] = hc_byte_perm (w[62], w[61], selector); w[62] = hc_byte_perm (w[61], w[60], selector); w[61] = hc_byte_perm (w[60], w[59], selector); w[60] = hc_byte_perm (w[59], w[58], selector); w[59] = hc_byte_perm (w[58], w[57], selector); w[58] = hc_byte_perm (w[57], w[56], selector); w[57] = hc_byte_perm (w[56], w[55], selector); w[56] = hc_byte_perm (w[55], w[54], selector); w[55] = hc_byte_perm (w[54], w[53], selector); w[54] = hc_byte_perm (w[53], w[52], selector); w[53] = hc_byte_perm (w[52], w[51], selector); w[52] = hc_byte_perm (w[51], w[50], selector); w[51] = hc_byte_perm (w[50], w[49], selector); w[50] = hc_byte_perm (w[49], w[48], selector); w[49] = hc_byte_perm (w[48], w[47], selector); w[48] = hc_byte_perm (w[47], w[46], selector); w[47] = hc_byte_perm (w[46], w[45], selector); w[46] = hc_byte_perm (w[45], w[44], selector); w[45] = hc_byte_perm (w[44], w[43], selector); w[44] = hc_byte_perm (w[43], w[42], selector); w[43] = hc_byte_perm (w[42], w[41], selector); w[42] = hc_byte_perm (w[41], w[40], selector); w[41] = hc_byte_perm (w[40], w[39], selector); w[40] = hc_byte_perm (w[39], w[38], selector); w[39] = hc_byte_perm (w[38], w[37], selector); w[38] = hc_byte_perm (w[37], w[36], selector); w[37] = hc_byte_perm (w[36], w[35], selector); w[36] = hc_byte_perm (w[35], w[34], selector); w[35] = hc_byte_perm (w[34], w[33], selector); w[34] = hc_byte_perm (w[33], w[32], selector); w[33] = hc_byte_perm (w[32], w[31], selector); w[32] = hc_byte_perm (w[31], w[30], selector); w[31] = hc_byte_perm (w[30], w[29], selector); w[30] = hc_byte_perm (w[29], w[28], selector); w[29] = hc_byte_perm (w[28], w[27], selector); w[28] = hc_byte_perm (w[27], w[26], selector); w[27] = hc_byte_perm (w[26], w[25], selector); w[26] = hc_byte_perm (w[25], w[24], selector); w[25] = hc_byte_perm (w[24], w[23], selector); w[24] = hc_byte_perm (w[23], w[22], selector); w[23] = hc_byte_perm (w[22], w[21], selector); w[22] = hc_byte_perm (w[21], w[20], selector); w[21] = hc_byte_perm (w[20], w[19], selector); w[20] = hc_byte_perm (w[19], w[18], selector); w[19] = hc_byte_perm (w[18], w[17], selector); w[18] = hc_byte_perm (w[17], w[16], selector); w[17] = hc_byte_perm (w[16], w[15], selector); w[16] = hc_byte_perm (w[15], w[14], selector); w[15] = hc_byte_perm (w[14], w[13], selector); w[14] = hc_byte_perm (w[13], w[12], selector); w[13] = hc_byte_perm (w[12], w[11], selector); w[12] = hc_byte_perm (w[11], w[10], selector); w[11] = hc_byte_perm (w[10], w[ 9], selector); w[10] = hc_byte_perm (w[ 9], w[ 8], selector); w[ 9] = hc_byte_perm (w[ 8], w[ 7], selector); w[ 8] = hc_byte_perm (w[ 7], w[ 6], selector); w[ 7] = hc_byte_perm (w[ 6], w[ 5], selector); w[ 6] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 5] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 4] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 3] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 2] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 1] = hc_byte_perm (w[ 0], 0, selector); w[ 0] = 0; break; case 2: w[63] = hc_byte_perm (w[61], w[60], selector); w[62] = hc_byte_perm (w[60], w[59], selector); w[61] = hc_byte_perm (w[59], w[58], selector); w[60] = hc_byte_perm (w[58], w[57], selector); w[59] = hc_byte_perm (w[57], w[56], selector); w[58] = hc_byte_perm (w[56], w[55], selector); w[57] = hc_byte_perm (w[55], w[54], selector); w[56] = hc_byte_perm (w[54], w[53], selector); w[55] = hc_byte_perm (w[53], w[52], selector); w[54] = hc_byte_perm (w[52], w[51], selector); w[53] = hc_byte_perm (w[51], w[50], selector); w[52] = hc_byte_perm (w[50], w[49], selector); w[51] = hc_byte_perm (w[49], w[48], selector); w[50] = hc_byte_perm (w[48], w[47], selector); w[49] = hc_byte_perm (w[47], w[46], selector); w[48] = hc_byte_perm (w[46], w[45], selector); w[47] = hc_byte_perm (w[45], w[44], selector); w[46] = hc_byte_perm (w[44], w[43], selector); w[45] = hc_byte_perm (w[43], w[42], selector); w[44] = hc_byte_perm (w[42], w[41], selector); w[43] = hc_byte_perm (w[41], w[40], selector); w[42] = hc_byte_perm (w[40], w[39], selector); w[41] = hc_byte_perm (w[39], w[38], selector); w[40] = hc_byte_perm (w[38], w[37], selector); w[39] = hc_byte_perm (w[37], w[36], selector); w[38] = hc_byte_perm (w[36], w[35], selector); w[37] = hc_byte_perm (w[35], w[34], selector); w[36] = hc_byte_perm (w[34], w[33], selector); w[35] = hc_byte_perm (w[33], w[32], selector); w[34] = hc_byte_perm (w[32], w[31], selector); w[33] = hc_byte_perm (w[31], w[30], selector); w[32] = hc_byte_perm (w[30], w[29], selector); w[31] = hc_byte_perm (w[29], w[28], selector); w[30] = hc_byte_perm (w[28], w[27], selector); w[29] = hc_byte_perm (w[27], w[26], selector); w[28] = hc_byte_perm (w[26], w[25], selector); w[27] = hc_byte_perm (w[25], w[24], selector); w[26] = hc_byte_perm (w[24], w[23], selector); w[25] = hc_byte_perm (w[23], w[22], selector); w[24] = hc_byte_perm (w[22], w[21], selector); w[23] = hc_byte_perm (w[21], w[20], selector); w[22] = hc_byte_perm (w[20], w[19], selector); w[21] = hc_byte_perm (w[19], w[18], selector); w[20] = hc_byte_perm (w[18], w[17], selector); w[19] = hc_byte_perm (w[17], w[16], selector); w[18] = hc_byte_perm (w[16], w[15], selector); w[17] = hc_byte_perm (w[15], w[14], selector); w[16] = hc_byte_perm (w[14], w[13], selector); w[15] = hc_byte_perm (w[13], w[12], selector); w[14] = hc_byte_perm (w[12], w[11], selector); w[13] = hc_byte_perm (w[11], w[10], selector); w[12] = hc_byte_perm (w[10], w[ 9], selector); w[11] = hc_byte_perm (w[ 9], w[ 8], selector); w[10] = hc_byte_perm (w[ 8], w[ 7], selector); w[ 9] = hc_byte_perm (w[ 7], w[ 6], selector); w[ 8] = hc_byte_perm (w[ 6], w[ 5], selector); w[ 7] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 6] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 5] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 4] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 3] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 2] = hc_byte_perm (w[ 0], 0, selector); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_byte_perm (w[60], w[59], selector); w[62] = hc_byte_perm (w[59], w[58], selector); w[61] = hc_byte_perm (w[58], w[57], selector); w[60] = hc_byte_perm (w[57], w[56], selector); w[59] = hc_byte_perm (w[56], w[55], selector); w[58] = hc_byte_perm (w[55], w[54], selector); w[57] = hc_byte_perm (w[54], w[53], selector); w[56] = hc_byte_perm (w[53], w[52], selector); w[55] = hc_byte_perm (w[52], w[51], selector); w[54] = hc_byte_perm (w[51], w[50], selector); w[53] = hc_byte_perm (w[50], w[49], selector); w[52] = hc_byte_perm (w[49], w[48], selector); w[51] = hc_byte_perm (w[48], w[47], selector); w[50] = hc_byte_perm (w[47], w[46], selector); w[49] = hc_byte_perm (w[46], w[45], selector); w[48] = hc_byte_perm (w[45], w[44], selector); w[47] = hc_byte_perm (w[44], w[43], selector); w[46] = hc_byte_perm (w[43], w[42], selector); w[45] = hc_byte_perm (w[42], w[41], selector); w[44] = hc_byte_perm (w[41], w[40], selector); w[43] = hc_byte_perm (w[40], w[39], selector); w[42] = hc_byte_perm (w[39], w[38], selector); w[41] = hc_byte_perm (w[38], w[37], selector); w[40] = hc_byte_perm (w[37], w[36], selector); w[39] = hc_byte_perm (w[36], w[35], selector); w[38] = hc_byte_perm (w[35], w[34], selector); w[37] = hc_byte_perm (w[34], w[33], selector); w[36] = hc_byte_perm (w[33], w[32], selector); w[35] = hc_byte_perm (w[32], w[31], selector); w[34] = hc_byte_perm (w[31], w[30], selector); w[33] = hc_byte_perm (w[30], w[29], selector); w[32] = hc_byte_perm (w[29], w[28], selector); w[31] = hc_byte_perm (w[28], w[27], selector); w[30] = hc_byte_perm (w[27], w[26], selector); w[29] = hc_byte_perm (w[26], w[25], selector); w[28] = hc_byte_perm (w[25], w[24], selector); w[27] = hc_byte_perm (w[24], w[23], selector); w[26] = hc_byte_perm (w[23], w[22], selector); w[25] = hc_byte_perm (w[22], w[21], selector); w[24] = hc_byte_perm (w[21], w[20], selector); w[23] = hc_byte_perm (w[20], w[19], selector); w[22] = hc_byte_perm (w[19], w[18], selector); w[21] = hc_byte_perm (w[18], w[17], selector); w[20] = hc_byte_perm (w[17], w[16], selector); w[19] = hc_byte_perm (w[16], w[15], selector); w[18] = hc_byte_perm (w[15], w[14], selector); w[17] = hc_byte_perm (w[14], w[13], selector); w[16] = hc_byte_perm (w[13], w[12], selector); w[15] = hc_byte_perm (w[12], w[11], selector); w[14] = hc_byte_perm (w[11], w[10], selector); w[13] = hc_byte_perm (w[10], w[ 9], selector); w[12] = hc_byte_perm (w[ 9], w[ 8], selector); w[11] = hc_byte_perm (w[ 8], w[ 7], selector); w[10] = hc_byte_perm (w[ 7], w[ 6], selector); w[ 9] = hc_byte_perm (w[ 6], w[ 5], selector); w[ 8] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 7] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 6] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 5] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 4] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 3] = hc_byte_perm (w[ 0], 0, selector); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_byte_perm (w[59], w[58], selector); w[62] = hc_byte_perm (w[58], w[57], selector); w[61] = hc_byte_perm (w[57], w[56], selector); w[60] = hc_byte_perm (w[56], w[55], selector); w[59] = hc_byte_perm (w[55], w[54], selector); w[58] = hc_byte_perm (w[54], w[53], selector); w[57] = hc_byte_perm (w[53], w[52], selector); w[56] = hc_byte_perm (w[52], w[51], selector); w[55] = hc_byte_perm (w[51], w[50], selector); w[54] = hc_byte_perm (w[50], w[49], selector); w[53] = hc_byte_perm (w[49], w[48], selector); w[52] = hc_byte_perm (w[48], w[47], selector); w[51] = hc_byte_perm (w[47], w[46], selector); w[50] = hc_byte_perm (w[46], w[45], selector); w[49] = hc_byte_perm (w[45], w[44], selector); w[48] = hc_byte_perm (w[44], w[43], selector); w[47] = hc_byte_perm (w[43], w[42], selector); w[46] = hc_byte_perm (w[42], w[41], selector); w[45] = hc_byte_perm (w[41], w[40], selector); w[44] = hc_byte_perm (w[40], w[39], selector); w[43] = hc_byte_perm (w[39], w[38], selector); w[42] = hc_byte_perm (w[38], w[37], selector); w[41] = hc_byte_perm (w[37], w[36], selector); w[40] = hc_byte_perm (w[36], w[35], selector); w[39] = hc_byte_perm (w[35], w[34], selector); w[38] = hc_byte_perm (w[34], w[33], selector); w[37] = hc_byte_perm (w[33], w[32], selector); w[36] = hc_byte_perm (w[32], w[31], selector); w[35] = hc_byte_perm (w[31], w[30], selector); w[34] = hc_byte_perm (w[30], w[29], selector); w[33] = hc_byte_perm (w[29], w[28], selector); w[32] = hc_byte_perm (w[28], w[27], selector); w[31] = hc_byte_perm (w[27], w[26], selector); w[30] = hc_byte_perm (w[26], w[25], selector); w[29] = hc_byte_perm (w[25], w[24], selector); w[28] = hc_byte_perm (w[24], w[23], selector); w[27] = hc_byte_perm (w[23], w[22], selector); w[26] = hc_byte_perm (w[22], w[21], selector); w[25] = hc_byte_perm (w[21], w[20], selector); w[24] = hc_byte_perm (w[20], w[19], selector); w[23] = hc_byte_perm (w[19], w[18], selector); w[22] = hc_byte_perm (w[18], w[17], selector); w[21] = hc_byte_perm (w[17], w[16], selector); w[20] = hc_byte_perm (w[16], w[15], selector); w[19] = hc_byte_perm (w[15], w[14], selector); w[18] = hc_byte_perm (w[14], w[13], selector); w[17] = hc_byte_perm (w[13], w[12], selector); w[16] = hc_byte_perm (w[12], w[11], selector); w[15] = hc_byte_perm (w[11], w[10], selector); w[14] = hc_byte_perm (w[10], w[ 9], selector); w[13] = hc_byte_perm (w[ 9], w[ 8], selector); w[12] = hc_byte_perm (w[ 8], w[ 7], selector); w[11] = hc_byte_perm (w[ 7], w[ 6], selector); w[10] = hc_byte_perm (w[ 6], w[ 5], selector); w[ 9] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 8] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 7] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 6] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 5] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 4] = hc_byte_perm (w[ 0], 0, selector); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_byte_perm (w[58], w[57], selector); w[62] = hc_byte_perm (w[57], w[56], selector); w[61] = hc_byte_perm (w[56], w[55], selector); w[60] = hc_byte_perm (w[55], w[54], selector); w[59] = hc_byte_perm (w[54], w[53], selector); w[58] = hc_byte_perm (w[53], w[52], selector); w[57] = hc_byte_perm (w[52], w[51], selector); w[56] = hc_byte_perm (w[51], w[50], selector); w[55] = hc_byte_perm (w[50], w[49], selector); w[54] = hc_byte_perm (w[49], w[48], selector); w[53] = hc_byte_perm (w[48], w[47], selector); w[52] = hc_byte_perm (w[47], w[46], selector); w[51] = hc_byte_perm (w[46], w[45], selector); w[50] = hc_byte_perm (w[45], w[44], selector); w[49] = hc_byte_perm (w[44], w[43], selector); w[48] = hc_byte_perm (w[43], w[42], selector); w[47] = hc_byte_perm (w[42], w[41], selector); w[46] = hc_byte_perm (w[41], w[40], selector); w[45] = hc_byte_perm (w[40], w[39], selector); w[44] = hc_byte_perm (w[39], w[38], selector); w[43] = hc_byte_perm (w[38], w[37], selector); w[42] = hc_byte_perm (w[37], w[36], selector); w[41] = hc_byte_perm (w[36], w[35], selector); w[40] = hc_byte_perm (w[35], w[34], selector); w[39] = hc_byte_perm (w[34], w[33], selector); w[38] = hc_byte_perm (w[33], w[32], selector); w[37] = hc_byte_perm (w[32], w[31], selector); w[36] = hc_byte_perm (w[31], w[30], selector); w[35] = hc_byte_perm (w[30], w[29], selector); w[34] = hc_byte_perm (w[29], w[28], selector); w[33] = hc_byte_perm (w[28], w[27], selector); w[32] = hc_byte_perm (w[27], w[26], selector); w[31] = hc_byte_perm (w[26], w[25], selector); w[30] = hc_byte_perm (w[25], w[24], selector); w[29] = hc_byte_perm (w[24], w[23], selector); w[28] = hc_byte_perm (w[23], w[22], selector); w[27] = hc_byte_perm (w[22], w[21], selector); w[26] = hc_byte_perm (w[21], w[20], selector); w[25] = hc_byte_perm (w[20], w[19], selector); w[24] = hc_byte_perm (w[19], w[18], selector); w[23] = hc_byte_perm (w[18], w[17], selector); w[22] = hc_byte_perm (w[17], w[16], selector); w[21] = hc_byte_perm (w[16], w[15], selector); w[20] = hc_byte_perm (w[15], w[14], selector); w[19] = hc_byte_perm (w[14], w[13], selector); w[18] = hc_byte_perm (w[13], w[12], selector); w[17] = hc_byte_perm (w[12], w[11], selector); w[16] = hc_byte_perm (w[11], w[10], selector); w[15] = hc_byte_perm (w[10], w[ 9], selector); w[14] = hc_byte_perm (w[ 9], w[ 8], selector); w[13] = hc_byte_perm (w[ 8], w[ 7], selector); w[12] = hc_byte_perm (w[ 7], w[ 6], selector); w[11] = hc_byte_perm (w[ 6], w[ 5], selector); w[10] = hc_byte_perm (w[ 5], w[ 4], selector); w[ 9] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 8] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 7] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 6] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 5] = hc_byte_perm (w[ 0], 0, selector); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_byte_perm (w[57], w[56], selector); w[62] = hc_byte_perm (w[56], w[55], selector); w[61] = hc_byte_perm (w[55], w[54], selector); w[60] = hc_byte_perm (w[54], w[53], selector); w[59] = hc_byte_perm (w[53], w[52], selector); w[58] = hc_byte_perm (w[52], w[51], selector); w[57] = hc_byte_perm (w[51], w[50], selector); w[56] = hc_byte_perm (w[50], w[49], selector); w[55] = hc_byte_perm (w[49], w[48], selector); w[54] = hc_byte_perm (w[48], w[47], selector); w[53] = hc_byte_perm (w[47], w[46], selector); w[52] = hc_byte_perm (w[46], w[45], selector); w[51] = hc_byte_perm (w[45], w[44], selector); w[50] = hc_byte_perm (w[44], w[43], selector); w[49] = hc_byte_perm (w[43], w[42], selector); w[48] = hc_byte_perm (w[42], w[41], selector); w[47] = hc_byte_perm (w[41], w[40], selector); w[46] = hc_byte_perm (w[40], w[39], selector); w[45] = hc_byte_perm (w[39], w[38], selector); w[44] = hc_byte_perm (w[38], w[37], selector); w[43] = hc_byte_perm (w[37], w[36], selector); w[42] = hc_byte_perm (w[36], w[35], selector); w[41] = hc_byte_perm (w[35], w[34], selector); w[40] = hc_byte_perm (w[34], w[33], selector); w[39] = hc_byte_perm (w[33], w[32], selector); w[38] = hc_byte_perm (w[32], w[31], selector); w[37] = hc_byte_perm (w[31], w[30], selector); w[36] = hc_byte_perm (w[30], w[29], selector); w[35] = hc_byte_perm (w[29], w[28], selector); w[34] = hc_byte_perm (w[28], w[27], selector); w[33] = hc_byte_perm (w[27], w[26], selector); w[32] = hc_byte_perm (w[26], w[25], selector); w[31] = hc_byte_perm (w[25], w[24], selector); w[30] = hc_byte_perm (w[24], w[23], selector); w[29] = hc_byte_perm (w[23], w[22], selector); w[28] = hc_byte_perm (w[22], w[21], selector); w[27] = hc_byte_perm (w[21], w[20], selector); w[26] = hc_byte_perm (w[20], w[19], selector); w[25] = hc_byte_perm (w[19], w[18], selector); w[24] = hc_byte_perm (w[18], w[17], selector); w[23] = hc_byte_perm (w[17], w[16], selector); w[22] = hc_byte_perm (w[16], w[15], selector); w[21] = hc_byte_perm (w[15], w[14], selector); w[20] = hc_byte_perm (w[14], w[13], selector); w[19] = hc_byte_perm (w[13], w[12], selector); w[18] = hc_byte_perm (w[12], w[11], selector); w[17] = hc_byte_perm (w[11], w[10], selector); w[16] = hc_byte_perm (w[10], w[ 9], selector); w[15] = hc_byte_perm (w[ 9], w[ 8], selector); w[14] = hc_byte_perm (w[ 8], w[ 7], selector); w[13] = hc_byte_perm (w[ 7], w[ 6], selector); w[12] = hc_byte_perm (w[ 6], w[ 5], selector); w[11] = hc_byte_perm (w[ 5], w[ 4], selector); w[10] = hc_byte_perm (w[ 4], w[ 3], selector); w[ 9] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 8] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 7] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 6] = hc_byte_perm (w[ 0], 0, selector); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_byte_perm (w[56], w[55], selector); w[62] = hc_byte_perm (w[55], w[54], selector); w[61] = hc_byte_perm (w[54], w[53], selector); w[60] = hc_byte_perm (w[53], w[52], selector); w[59] = hc_byte_perm (w[52], w[51], selector); w[58] = hc_byte_perm (w[51], w[50], selector); w[57] = hc_byte_perm (w[50], w[49], selector); w[56] = hc_byte_perm (w[49], w[48], selector); w[55] = hc_byte_perm (w[48], w[47], selector); w[54] = hc_byte_perm (w[47], w[46], selector); w[53] = hc_byte_perm (w[46], w[45], selector); w[52] = hc_byte_perm (w[45], w[44], selector); w[51] = hc_byte_perm (w[44], w[43], selector); w[50] = hc_byte_perm (w[43], w[42], selector); w[49] = hc_byte_perm (w[42], w[41], selector); w[48] = hc_byte_perm (w[41], w[40], selector); w[47] = hc_byte_perm (w[40], w[39], selector); w[46] = hc_byte_perm (w[39], w[38], selector); w[45] = hc_byte_perm (w[38], w[37], selector); w[44] = hc_byte_perm (w[37], w[36], selector); w[43] = hc_byte_perm (w[36], w[35], selector); w[42] = hc_byte_perm (w[35], w[34], selector); w[41] = hc_byte_perm (w[34], w[33], selector); w[40] = hc_byte_perm (w[33], w[32], selector); w[39] = hc_byte_perm (w[32], w[31], selector); w[38] = hc_byte_perm (w[31], w[30], selector); w[37] = hc_byte_perm (w[30], w[29], selector); w[36] = hc_byte_perm (w[29], w[28], selector); w[35] = hc_byte_perm (w[28], w[27], selector); w[34] = hc_byte_perm (w[27], w[26], selector); w[33] = hc_byte_perm (w[26], w[25], selector); w[32] = hc_byte_perm (w[25], w[24], selector); w[31] = hc_byte_perm (w[24], w[23], selector); w[30] = hc_byte_perm (w[23], w[22], selector); w[29] = hc_byte_perm (w[22], w[21], selector); w[28] = hc_byte_perm (w[21], w[20], selector); w[27] = hc_byte_perm (w[20], w[19], selector); w[26] = hc_byte_perm (w[19], w[18], selector); w[25] = hc_byte_perm (w[18], w[17], selector); w[24] = hc_byte_perm (w[17], w[16], selector); w[23] = hc_byte_perm (w[16], w[15], selector); w[22] = hc_byte_perm (w[15], w[14], selector); w[21] = hc_byte_perm (w[14], w[13], selector); w[20] = hc_byte_perm (w[13], w[12], selector); w[19] = hc_byte_perm (w[12], w[11], selector); w[18] = hc_byte_perm (w[11], w[10], selector); w[17] = hc_byte_perm (w[10], w[ 9], selector); w[16] = hc_byte_perm (w[ 9], w[ 8], selector); w[15] = hc_byte_perm (w[ 8], w[ 7], selector); w[14] = hc_byte_perm (w[ 7], w[ 6], selector); w[13] = hc_byte_perm (w[ 6], w[ 5], selector); w[12] = hc_byte_perm (w[ 5], w[ 4], selector); w[11] = hc_byte_perm (w[ 4], w[ 3], selector); w[10] = hc_byte_perm (w[ 3], w[ 2], selector); w[ 9] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 8] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 7] = hc_byte_perm (w[ 0], 0, selector); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_byte_perm (w[55], w[54], selector); w[62] = hc_byte_perm (w[54], w[53], selector); w[61] = hc_byte_perm (w[53], w[52], selector); w[60] = hc_byte_perm (w[52], w[51], selector); w[59] = hc_byte_perm (w[51], w[50], selector); w[58] = hc_byte_perm (w[50], w[49], selector); w[57] = hc_byte_perm (w[49], w[48], selector); w[56] = hc_byte_perm (w[48], w[47], selector); w[55] = hc_byte_perm (w[47], w[46], selector); w[54] = hc_byte_perm (w[46], w[45], selector); w[53] = hc_byte_perm (w[45], w[44], selector); w[52] = hc_byte_perm (w[44], w[43], selector); w[51] = hc_byte_perm (w[43], w[42], selector); w[50] = hc_byte_perm (w[42], w[41], selector); w[49] = hc_byte_perm (w[41], w[40], selector); w[48] = hc_byte_perm (w[40], w[39], selector); w[47] = hc_byte_perm (w[39], w[38], selector); w[46] = hc_byte_perm (w[38], w[37], selector); w[45] = hc_byte_perm (w[37], w[36], selector); w[44] = hc_byte_perm (w[36], w[35], selector); w[43] = hc_byte_perm (w[35], w[34], selector); w[42] = hc_byte_perm (w[34], w[33], selector); w[41] = hc_byte_perm (w[33], w[32], selector); w[40] = hc_byte_perm (w[32], w[31], selector); w[39] = hc_byte_perm (w[31], w[30], selector); w[38] = hc_byte_perm (w[30], w[29], selector); w[37] = hc_byte_perm (w[29], w[28], selector); w[36] = hc_byte_perm (w[28], w[27], selector); w[35] = hc_byte_perm (w[27], w[26], selector); w[34] = hc_byte_perm (w[26], w[25], selector); w[33] = hc_byte_perm (w[25], w[24], selector); w[32] = hc_byte_perm (w[24], w[23], selector); w[31] = hc_byte_perm (w[23], w[22], selector); w[30] = hc_byte_perm (w[22], w[21], selector); w[29] = hc_byte_perm (w[21], w[20], selector); w[28] = hc_byte_perm (w[20], w[19], selector); w[27] = hc_byte_perm (w[19], w[18], selector); w[26] = hc_byte_perm (w[18], w[17], selector); w[25] = hc_byte_perm (w[17], w[16], selector); w[24] = hc_byte_perm (w[16], w[15], selector); w[23] = hc_byte_perm (w[15], w[14], selector); w[22] = hc_byte_perm (w[14], w[13], selector); w[21] = hc_byte_perm (w[13], w[12], selector); w[20] = hc_byte_perm (w[12], w[11], selector); w[19] = hc_byte_perm (w[11], w[10], selector); w[18] = hc_byte_perm (w[10], w[ 9], selector); w[17] = hc_byte_perm (w[ 9], w[ 8], selector); w[16] = hc_byte_perm (w[ 8], w[ 7], selector); w[15] = hc_byte_perm (w[ 7], w[ 6], selector); w[14] = hc_byte_perm (w[ 6], w[ 5], selector); w[13] = hc_byte_perm (w[ 5], w[ 4], selector); w[12] = hc_byte_perm (w[ 4], w[ 3], selector); w[11] = hc_byte_perm (w[ 3], w[ 2], selector); w[10] = hc_byte_perm (w[ 2], w[ 1], selector); w[ 9] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 8] = hc_byte_perm (w[ 0], 0, selector); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_byte_perm (w[54], w[53], selector); w[62] = hc_byte_perm (w[53], w[52], selector); w[61] = hc_byte_perm (w[52], w[51], selector); w[60] = hc_byte_perm (w[51], w[50], selector); w[59] = hc_byte_perm (w[50], w[49], selector); w[58] = hc_byte_perm (w[49], w[48], selector); w[57] = hc_byte_perm (w[48], w[47], selector); w[56] = hc_byte_perm (w[47], w[46], selector); w[55] = hc_byte_perm (w[46], w[45], selector); w[54] = hc_byte_perm (w[45], w[44], selector); w[53] = hc_byte_perm (w[44], w[43], selector); w[52] = hc_byte_perm (w[43], w[42], selector); w[51] = hc_byte_perm (w[42], w[41], selector); w[50] = hc_byte_perm (w[41], w[40], selector); w[49] = hc_byte_perm (w[40], w[39], selector); w[48] = hc_byte_perm (w[39], w[38], selector); w[47] = hc_byte_perm (w[38], w[37], selector); w[46] = hc_byte_perm (w[37], w[36], selector); w[45] = hc_byte_perm (w[36], w[35], selector); w[44] = hc_byte_perm (w[35], w[34], selector); w[43] = hc_byte_perm (w[34], w[33], selector); w[42] = hc_byte_perm (w[33], w[32], selector); w[41] = hc_byte_perm (w[32], w[31], selector); w[40] = hc_byte_perm (w[31], w[30], selector); w[39] = hc_byte_perm (w[30], w[29], selector); w[38] = hc_byte_perm (w[29], w[28], selector); w[37] = hc_byte_perm (w[28], w[27], selector); w[36] = hc_byte_perm (w[27], w[26], selector); w[35] = hc_byte_perm (w[26], w[25], selector); w[34] = hc_byte_perm (w[25], w[24], selector); w[33] = hc_byte_perm (w[24], w[23], selector); w[32] = hc_byte_perm (w[23], w[22], selector); w[31] = hc_byte_perm (w[22], w[21], selector); w[30] = hc_byte_perm (w[21], w[20], selector); w[29] = hc_byte_perm (w[20], w[19], selector); w[28] = hc_byte_perm (w[19], w[18], selector); w[27] = hc_byte_perm (w[18], w[17], selector); w[26] = hc_byte_perm (w[17], w[16], selector); w[25] = hc_byte_perm (w[16], w[15], selector); w[24] = hc_byte_perm (w[15], w[14], selector); w[23] = hc_byte_perm (w[14], w[13], selector); w[22] = hc_byte_perm (w[13], w[12], selector); w[21] = hc_byte_perm (w[12], w[11], selector); w[20] = hc_byte_perm (w[11], w[10], selector); w[19] = hc_byte_perm (w[10], w[ 9], selector); w[18] = hc_byte_perm (w[ 9], w[ 8], selector); w[17] = hc_byte_perm (w[ 8], w[ 7], selector); w[16] = hc_byte_perm (w[ 7], w[ 6], selector); w[15] = hc_byte_perm (w[ 6], w[ 5], selector); w[14] = hc_byte_perm (w[ 5], w[ 4], selector); w[13] = hc_byte_perm (w[ 4], w[ 3], selector); w[12] = hc_byte_perm (w[ 3], w[ 2], selector); w[11] = hc_byte_perm (w[ 2], w[ 1], selector); w[10] = hc_byte_perm (w[ 1], w[ 0], selector); w[ 9] = hc_byte_perm (w[ 0], 0, selector); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_byte_perm (w[53], w[52], selector); w[62] = hc_byte_perm (w[52], w[51], selector); w[61] = hc_byte_perm (w[51], w[50], selector); w[60] = hc_byte_perm (w[50], w[49], selector); w[59] = hc_byte_perm (w[49], w[48], selector); w[58] = hc_byte_perm (w[48], w[47], selector); w[57] = hc_byte_perm (w[47], w[46], selector); w[56] = hc_byte_perm (w[46], w[45], selector); w[55] = hc_byte_perm (w[45], w[44], selector); w[54] = hc_byte_perm (w[44], w[43], selector); w[53] = hc_byte_perm (w[43], w[42], selector); w[52] = hc_byte_perm (w[42], w[41], selector); w[51] = hc_byte_perm (w[41], w[40], selector); w[50] = hc_byte_perm (w[40], w[39], selector); w[49] = hc_byte_perm (w[39], w[38], selector); w[48] = hc_byte_perm (w[38], w[37], selector); w[47] = hc_byte_perm (w[37], w[36], selector); w[46] = hc_byte_perm (w[36], w[35], selector); w[45] = hc_byte_perm (w[35], w[34], selector); w[44] = hc_byte_perm (w[34], w[33], selector); w[43] = hc_byte_perm (w[33], w[32], selector); w[42] = hc_byte_perm (w[32], w[31], selector); w[41] = hc_byte_perm (w[31], w[30], selector); w[40] = hc_byte_perm (w[30], w[29], selector); w[39] = hc_byte_perm (w[29], w[28], selector); w[38] = hc_byte_perm (w[28], w[27], selector); w[37] = hc_byte_perm (w[27], w[26], selector); w[36] = hc_byte_perm (w[26], w[25], selector); w[35] = hc_byte_perm (w[25], w[24], selector); w[34] = hc_byte_perm (w[24], w[23], selector); w[33] = hc_byte_perm (w[23], w[22], selector); w[32] = hc_byte_perm (w[22], w[21], selector); w[31] = hc_byte_perm (w[21], w[20], selector); w[30] = hc_byte_perm (w[20], w[19], selector); w[29] = hc_byte_perm (w[19], w[18], selector); w[28] = hc_byte_perm (w[18], w[17], selector); w[27] = hc_byte_perm (w[17], w[16], selector); w[26] = hc_byte_perm (w[16], w[15], selector); w[25] = hc_byte_perm (w[15], w[14], selector); w[24] = hc_byte_perm (w[14], w[13], selector); w[23] = hc_byte_perm (w[13], w[12], selector); w[22] = hc_byte_perm (w[12], w[11], selector); w[21] = hc_byte_perm (w[11], w[10], selector); w[20] = hc_byte_perm (w[10], w[ 9], selector); w[19] = hc_byte_perm (w[ 9], w[ 8], selector); w[18] = hc_byte_perm (w[ 8], w[ 7], selector); w[17] = hc_byte_perm (w[ 7], w[ 6], selector); w[16] = hc_byte_perm (w[ 6], w[ 5], selector); w[15] = hc_byte_perm (w[ 5], w[ 4], selector); w[14] = hc_byte_perm (w[ 4], w[ 3], selector); w[13] = hc_byte_perm (w[ 3], w[ 2], selector); w[12] = hc_byte_perm (w[ 2], w[ 1], selector); w[11] = hc_byte_perm (w[ 1], w[ 0], selector); w[10] = hc_byte_perm (w[ 0], 0, selector); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_byte_perm (w[52], w[51], selector); w[62] = hc_byte_perm (w[51], w[50], selector); w[61] = hc_byte_perm (w[50], w[49], selector); w[60] = hc_byte_perm (w[49], w[48], selector); w[59] = hc_byte_perm (w[48], w[47], selector); w[58] = hc_byte_perm (w[47], w[46], selector); w[57] = hc_byte_perm (w[46], w[45], selector); w[56] = hc_byte_perm (w[45], w[44], selector); w[55] = hc_byte_perm (w[44], w[43], selector); w[54] = hc_byte_perm (w[43], w[42], selector); w[53] = hc_byte_perm (w[42], w[41], selector); w[52] = hc_byte_perm (w[41], w[40], selector); w[51] = hc_byte_perm (w[40], w[39], selector); w[50] = hc_byte_perm (w[39], w[38], selector); w[49] = hc_byte_perm (w[38], w[37], selector); w[48] = hc_byte_perm (w[37], w[36], selector); w[47] = hc_byte_perm (w[36], w[35], selector); w[46] = hc_byte_perm (w[35], w[34], selector); w[45] = hc_byte_perm (w[34], w[33], selector); w[44] = hc_byte_perm (w[33], w[32], selector); w[43] = hc_byte_perm (w[32], w[31], selector); w[42] = hc_byte_perm (w[31], w[30], selector); w[41] = hc_byte_perm (w[30], w[29], selector); w[40] = hc_byte_perm (w[29], w[28], selector); w[39] = hc_byte_perm (w[28], w[27], selector); w[38] = hc_byte_perm (w[27], w[26], selector); w[37] = hc_byte_perm (w[26], w[25], selector); w[36] = hc_byte_perm (w[25], w[24], selector); w[35] = hc_byte_perm (w[24], w[23], selector); w[34] = hc_byte_perm (w[23], w[22], selector); w[33] = hc_byte_perm (w[22], w[21], selector); w[32] = hc_byte_perm (w[21], w[20], selector); w[31] = hc_byte_perm (w[20], w[19], selector); w[30] = hc_byte_perm (w[19], w[18], selector); w[29] = hc_byte_perm (w[18], w[17], selector); w[28] = hc_byte_perm (w[17], w[16], selector); w[27] = hc_byte_perm (w[16], w[15], selector); w[26] = hc_byte_perm (w[15], w[14], selector); w[25] = hc_byte_perm (w[14], w[13], selector); w[24] = hc_byte_perm (w[13], w[12], selector); w[23] = hc_byte_perm (w[12], w[11], selector); w[22] = hc_byte_perm (w[11], w[10], selector); w[21] = hc_byte_perm (w[10], w[ 9], selector); w[20] = hc_byte_perm (w[ 9], w[ 8], selector); w[19] = hc_byte_perm (w[ 8], w[ 7], selector); w[18] = hc_byte_perm (w[ 7], w[ 6], selector); w[17] = hc_byte_perm (w[ 6], w[ 5], selector); w[16] = hc_byte_perm (w[ 5], w[ 4], selector); w[15] = hc_byte_perm (w[ 4], w[ 3], selector); w[14] = hc_byte_perm (w[ 3], w[ 2], selector); w[13] = hc_byte_perm (w[ 2], w[ 1], selector); w[12] = hc_byte_perm (w[ 1], w[ 0], selector); w[11] = hc_byte_perm (w[ 0], 0, selector); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_byte_perm (w[51], w[50], selector); w[62] = hc_byte_perm (w[50], w[49], selector); w[61] = hc_byte_perm (w[49], w[48], selector); w[60] = hc_byte_perm (w[48], w[47], selector); w[59] = hc_byte_perm (w[47], w[46], selector); w[58] = hc_byte_perm (w[46], w[45], selector); w[57] = hc_byte_perm (w[45], w[44], selector); w[56] = hc_byte_perm (w[44], w[43], selector); w[55] = hc_byte_perm (w[43], w[42], selector); w[54] = hc_byte_perm (w[42], w[41], selector); w[53] = hc_byte_perm (w[41], w[40], selector); w[52] = hc_byte_perm (w[40], w[39], selector); w[51] = hc_byte_perm (w[39], w[38], selector); w[50] = hc_byte_perm (w[38], w[37], selector); w[49] = hc_byte_perm (w[37], w[36], selector); w[48] = hc_byte_perm (w[36], w[35], selector); w[47] = hc_byte_perm (w[35], w[34], selector); w[46] = hc_byte_perm (w[34], w[33], selector); w[45] = hc_byte_perm (w[33], w[32], selector); w[44] = hc_byte_perm (w[32], w[31], selector); w[43] = hc_byte_perm (w[31], w[30], selector); w[42] = hc_byte_perm (w[30], w[29], selector); w[41] = hc_byte_perm (w[29], w[28], selector); w[40] = hc_byte_perm (w[28], w[27], selector); w[39] = hc_byte_perm (w[27], w[26], selector); w[38] = hc_byte_perm (w[26], w[25], selector); w[37] = hc_byte_perm (w[25], w[24], selector); w[36] = hc_byte_perm (w[24], w[23], selector); w[35] = hc_byte_perm (w[23], w[22], selector); w[34] = hc_byte_perm (w[22], w[21], selector); w[33] = hc_byte_perm (w[21], w[20], selector); w[32] = hc_byte_perm (w[20], w[19], selector); w[31] = hc_byte_perm (w[19], w[18], selector); w[30] = hc_byte_perm (w[18], w[17], selector); w[29] = hc_byte_perm (w[17], w[16], selector); w[28] = hc_byte_perm (w[16], w[15], selector); w[27] = hc_byte_perm (w[15], w[14], selector); w[26] = hc_byte_perm (w[14], w[13], selector); w[25] = hc_byte_perm (w[13], w[12], selector); w[24] = hc_byte_perm (w[12], w[11], selector); w[23] = hc_byte_perm (w[11], w[10], selector); w[22] = hc_byte_perm (w[10], w[ 9], selector); w[21] = hc_byte_perm (w[ 9], w[ 8], selector); w[20] = hc_byte_perm (w[ 8], w[ 7], selector); w[19] = hc_byte_perm (w[ 7], w[ 6], selector); w[18] = hc_byte_perm (w[ 6], w[ 5], selector); w[17] = hc_byte_perm (w[ 5], w[ 4], selector); w[16] = hc_byte_perm (w[ 4], w[ 3], selector); w[15] = hc_byte_perm (w[ 3], w[ 2], selector); w[14] = hc_byte_perm (w[ 2], w[ 1], selector); w[13] = hc_byte_perm (w[ 1], w[ 0], selector); w[12] = hc_byte_perm (w[ 0], 0, selector); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_byte_perm (w[50], w[49], selector); w[62] = hc_byte_perm (w[49], w[48], selector); w[61] = hc_byte_perm (w[48], w[47], selector); w[60] = hc_byte_perm (w[47], w[46], selector); w[59] = hc_byte_perm (w[46], w[45], selector); w[58] = hc_byte_perm (w[45], w[44], selector); w[57] = hc_byte_perm (w[44], w[43], selector); w[56] = hc_byte_perm (w[43], w[42], selector); w[55] = hc_byte_perm (w[42], w[41], selector); w[54] = hc_byte_perm (w[41], w[40], selector); w[53] = hc_byte_perm (w[40], w[39], selector); w[52] = hc_byte_perm (w[39], w[38], selector); w[51] = hc_byte_perm (w[38], w[37], selector); w[50] = hc_byte_perm (w[37], w[36], selector); w[49] = hc_byte_perm (w[36], w[35], selector); w[48] = hc_byte_perm (w[35], w[34], selector); w[47] = hc_byte_perm (w[34], w[33], selector); w[46] = hc_byte_perm (w[33], w[32], selector); w[45] = hc_byte_perm (w[32], w[31], selector); w[44] = hc_byte_perm (w[31], w[30], selector); w[43] = hc_byte_perm (w[30], w[29], selector); w[42] = hc_byte_perm (w[29], w[28], selector); w[41] = hc_byte_perm (w[28], w[27], selector); w[40] = hc_byte_perm (w[27], w[26], selector); w[39] = hc_byte_perm (w[26], w[25], selector); w[38] = hc_byte_perm (w[25], w[24], selector); w[37] = hc_byte_perm (w[24], w[23], selector); w[36] = hc_byte_perm (w[23], w[22], selector); w[35] = hc_byte_perm (w[22], w[21], selector); w[34] = hc_byte_perm (w[21], w[20], selector); w[33] = hc_byte_perm (w[20], w[19], selector); w[32] = hc_byte_perm (w[19], w[18], selector); w[31] = hc_byte_perm (w[18], w[17], selector); w[30] = hc_byte_perm (w[17], w[16], selector); w[29] = hc_byte_perm (w[16], w[15], selector); w[28] = hc_byte_perm (w[15], w[14], selector); w[27] = hc_byte_perm (w[14], w[13], selector); w[26] = hc_byte_perm (w[13], w[12], selector); w[25] = hc_byte_perm (w[12], w[11], selector); w[24] = hc_byte_perm (w[11], w[10], selector); w[23] = hc_byte_perm (w[10], w[ 9], selector); w[22] = hc_byte_perm (w[ 9], w[ 8], selector); w[21] = hc_byte_perm (w[ 8], w[ 7], selector); w[20] = hc_byte_perm (w[ 7], w[ 6], selector); w[19] = hc_byte_perm (w[ 6], w[ 5], selector); w[18] = hc_byte_perm (w[ 5], w[ 4], selector); w[17] = hc_byte_perm (w[ 4], w[ 3], selector); w[16] = hc_byte_perm (w[ 3], w[ 2], selector); w[15] = hc_byte_perm (w[ 2], w[ 1], selector); w[14] = hc_byte_perm (w[ 1], w[ 0], selector); w[13] = hc_byte_perm (w[ 0], 0, selector); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_byte_perm (w[49], w[48], selector); w[62] = hc_byte_perm (w[48], w[47], selector); w[61] = hc_byte_perm (w[47], w[46], selector); w[60] = hc_byte_perm (w[46], w[45], selector); w[59] = hc_byte_perm (w[45], w[44], selector); w[58] = hc_byte_perm (w[44], w[43], selector); w[57] = hc_byte_perm (w[43], w[42], selector); w[56] = hc_byte_perm (w[42], w[41], selector); w[55] = hc_byte_perm (w[41], w[40], selector); w[54] = hc_byte_perm (w[40], w[39], selector); w[53] = hc_byte_perm (w[39], w[38], selector); w[52] = hc_byte_perm (w[38], w[37], selector); w[51] = hc_byte_perm (w[37], w[36], selector); w[50] = hc_byte_perm (w[36], w[35], selector); w[49] = hc_byte_perm (w[35], w[34], selector); w[48] = hc_byte_perm (w[34], w[33], selector); w[47] = hc_byte_perm (w[33], w[32], selector); w[46] = hc_byte_perm (w[32], w[31], selector); w[45] = hc_byte_perm (w[31], w[30], selector); w[44] = hc_byte_perm (w[30], w[29], selector); w[43] = hc_byte_perm (w[29], w[28], selector); w[42] = hc_byte_perm (w[28], w[27], selector); w[41] = hc_byte_perm (w[27], w[26], selector); w[40] = hc_byte_perm (w[26], w[25], selector); w[39] = hc_byte_perm (w[25], w[24], selector); w[38] = hc_byte_perm (w[24], w[23], selector); w[37] = hc_byte_perm (w[23], w[22], selector); w[36] = hc_byte_perm (w[22], w[21], selector); w[35] = hc_byte_perm (w[21], w[20], selector); w[34] = hc_byte_perm (w[20], w[19], selector); w[33] = hc_byte_perm (w[19], w[18], selector); w[32] = hc_byte_perm (w[18], w[17], selector); w[31] = hc_byte_perm (w[17], w[16], selector); w[30] = hc_byte_perm (w[16], w[15], selector); w[29] = hc_byte_perm (w[15], w[14], selector); w[28] = hc_byte_perm (w[14], w[13], selector); w[27] = hc_byte_perm (w[13], w[12], selector); w[26] = hc_byte_perm (w[12], w[11], selector); w[25] = hc_byte_perm (w[11], w[10], selector); w[24] = hc_byte_perm (w[10], w[ 9], selector); w[23] = hc_byte_perm (w[ 9], w[ 8], selector); w[22] = hc_byte_perm (w[ 8], w[ 7], selector); w[21] = hc_byte_perm (w[ 7], w[ 6], selector); w[20] = hc_byte_perm (w[ 6], w[ 5], selector); w[19] = hc_byte_perm (w[ 5], w[ 4], selector); w[18] = hc_byte_perm (w[ 4], w[ 3], selector); w[17] = hc_byte_perm (w[ 3], w[ 2], selector); w[16] = hc_byte_perm (w[ 2], w[ 1], selector); w[15] = hc_byte_perm (w[ 1], w[ 0], selector); w[14] = hc_byte_perm (w[ 0], 0, selector); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_byte_perm (w[48], w[47], selector); w[62] = hc_byte_perm (w[47], w[46], selector); w[61] = hc_byte_perm (w[46], w[45], selector); w[60] = hc_byte_perm (w[45], w[44], selector); w[59] = hc_byte_perm (w[44], w[43], selector); w[58] = hc_byte_perm (w[43], w[42], selector); w[57] = hc_byte_perm (w[42], w[41], selector); w[56] = hc_byte_perm (w[41], w[40], selector); w[55] = hc_byte_perm (w[40], w[39], selector); w[54] = hc_byte_perm (w[39], w[38], selector); w[53] = hc_byte_perm (w[38], w[37], selector); w[52] = hc_byte_perm (w[37], w[36], selector); w[51] = hc_byte_perm (w[36], w[35], selector); w[50] = hc_byte_perm (w[35], w[34], selector); w[49] = hc_byte_perm (w[34], w[33], selector); w[48] = hc_byte_perm (w[33], w[32], selector); w[47] = hc_byte_perm (w[32], w[31], selector); w[46] = hc_byte_perm (w[31], w[30], selector); w[45] = hc_byte_perm (w[30], w[29], selector); w[44] = hc_byte_perm (w[29], w[28], selector); w[43] = hc_byte_perm (w[28], w[27], selector); w[42] = hc_byte_perm (w[27], w[26], selector); w[41] = hc_byte_perm (w[26], w[25], selector); w[40] = hc_byte_perm (w[25], w[24], selector); w[39] = hc_byte_perm (w[24], w[23], selector); w[38] = hc_byte_perm (w[23], w[22], selector); w[37] = hc_byte_perm (w[22], w[21], selector); w[36] = hc_byte_perm (w[21], w[20], selector); w[35] = hc_byte_perm (w[20], w[19], selector); w[34] = hc_byte_perm (w[19], w[18], selector); w[33] = hc_byte_perm (w[18], w[17], selector); w[32] = hc_byte_perm (w[17], w[16], selector); w[31] = hc_byte_perm (w[16], w[15], selector); w[30] = hc_byte_perm (w[15], w[14], selector); w[29] = hc_byte_perm (w[14], w[13], selector); w[28] = hc_byte_perm (w[13], w[12], selector); w[27] = hc_byte_perm (w[12], w[11], selector); w[26] = hc_byte_perm (w[11], w[10], selector); w[25] = hc_byte_perm (w[10], w[ 9], selector); w[24] = hc_byte_perm (w[ 9], w[ 8], selector); w[23] = hc_byte_perm (w[ 8], w[ 7], selector); w[22] = hc_byte_perm (w[ 7], w[ 6], selector); w[21] = hc_byte_perm (w[ 6], w[ 5], selector); w[20] = hc_byte_perm (w[ 5], w[ 4], selector); w[19] = hc_byte_perm (w[ 4], w[ 3], selector); w[18] = hc_byte_perm (w[ 3], w[ 2], selector); w[17] = hc_byte_perm (w[ 2], w[ 1], selector); w[16] = hc_byte_perm (w[ 1], w[ 0], selector); w[15] = hc_byte_perm (w[ 0], 0, selector); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_byte_perm (w[47], w[46], selector); w[62] = hc_byte_perm (w[46], w[45], selector); w[61] = hc_byte_perm (w[45], w[44], selector); w[60] = hc_byte_perm (w[44], w[43], selector); w[59] = hc_byte_perm (w[43], w[42], selector); w[58] = hc_byte_perm (w[42], w[41], selector); w[57] = hc_byte_perm (w[41], w[40], selector); w[56] = hc_byte_perm (w[40], w[39], selector); w[55] = hc_byte_perm (w[39], w[38], selector); w[54] = hc_byte_perm (w[38], w[37], selector); w[53] = hc_byte_perm (w[37], w[36], selector); w[52] = hc_byte_perm (w[36], w[35], selector); w[51] = hc_byte_perm (w[35], w[34], selector); w[50] = hc_byte_perm (w[34], w[33], selector); w[49] = hc_byte_perm (w[33], w[32], selector); w[48] = hc_byte_perm (w[32], w[31], selector); w[47] = hc_byte_perm (w[31], w[30], selector); w[46] = hc_byte_perm (w[30], w[29], selector); w[45] = hc_byte_perm (w[29], w[28], selector); w[44] = hc_byte_perm (w[28], w[27], selector); w[43] = hc_byte_perm (w[27], w[26], selector); w[42] = hc_byte_perm (w[26], w[25], selector); w[41] = hc_byte_perm (w[25], w[24], selector); w[40] = hc_byte_perm (w[24], w[23], selector); w[39] = hc_byte_perm (w[23], w[22], selector); w[38] = hc_byte_perm (w[22], w[21], selector); w[37] = hc_byte_perm (w[21], w[20], selector); w[36] = hc_byte_perm (w[20], w[19], selector); w[35] = hc_byte_perm (w[19], w[18], selector); w[34] = hc_byte_perm (w[18], w[17], selector); w[33] = hc_byte_perm (w[17], w[16], selector); w[32] = hc_byte_perm (w[16], w[15], selector); w[31] = hc_byte_perm (w[15], w[14], selector); w[30] = hc_byte_perm (w[14], w[13], selector); w[29] = hc_byte_perm (w[13], w[12], selector); w[28] = hc_byte_perm (w[12], w[11], selector); w[27] = hc_byte_perm (w[11], w[10], selector); w[26] = hc_byte_perm (w[10], w[ 9], selector); w[25] = hc_byte_perm (w[ 9], w[ 8], selector); w[24] = hc_byte_perm (w[ 8], w[ 7], selector); w[23] = hc_byte_perm (w[ 7], w[ 6], selector); w[22] = hc_byte_perm (w[ 6], w[ 5], selector); w[21] = hc_byte_perm (w[ 5], w[ 4], selector); w[20] = hc_byte_perm (w[ 4], w[ 3], selector); w[19] = hc_byte_perm (w[ 3], w[ 2], selector); w[18] = hc_byte_perm (w[ 2], w[ 1], selector); w[17] = hc_byte_perm (w[ 1], w[ 0], selector); w[16] = hc_byte_perm (w[ 0], 0, selector); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_byte_perm (w[46], w[45], selector); w[62] = hc_byte_perm (w[45], w[44], selector); w[61] = hc_byte_perm (w[44], w[43], selector); w[60] = hc_byte_perm (w[43], w[42], selector); w[59] = hc_byte_perm (w[42], w[41], selector); w[58] = hc_byte_perm (w[41], w[40], selector); w[57] = hc_byte_perm (w[40], w[39], selector); w[56] = hc_byte_perm (w[39], w[38], selector); w[55] = hc_byte_perm (w[38], w[37], selector); w[54] = hc_byte_perm (w[37], w[36], selector); w[53] = hc_byte_perm (w[36], w[35], selector); w[52] = hc_byte_perm (w[35], w[34], selector); w[51] = hc_byte_perm (w[34], w[33], selector); w[50] = hc_byte_perm (w[33], w[32], selector); w[49] = hc_byte_perm (w[32], w[31], selector); w[48] = hc_byte_perm (w[31], w[30], selector); w[47] = hc_byte_perm (w[30], w[29], selector); w[46] = hc_byte_perm (w[29], w[28], selector); w[45] = hc_byte_perm (w[28], w[27], selector); w[44] = hc_byte_perm (w[27], w[26], selector); w[43] = hc_byte_perm (w[26], w[25], selector); w[42] = hc_byte_perm (w[25], w[24], selector); w[41] = hc_byte_perm (w[24], w[23], selector); w[40] = hc_byte_perm (w[23], w[22], selector); w[39] = hc_byte_perm (w[22], w[21], selector); w[38] = hc_byte_perm (w[21], w[20], selector); w[37] = hc_byte_perm (w[20], w[19], selector); w[36] = hc_byte_perm (w[19], w[18], selector); w[35] = hc_byte_perm (w[18], w[17], selector); w[34] = hc_byte_perm (w[17], w[16], selector); w[33] = hc_byte_perm (w[16], w[15], selector); w[32] = hc_byte_perm (w[15], w[14], selector); w[31] = hc_byte_perm (w[14], w[13], selector); w[30] = hc_byte_perm (w[13], w[12], selector); w[29] = hc_byte_perm (w[12], w[11], selector); w[28] = hc_byte_perm (w[11], w[10], selector); w[27] = hc_byte_perm (w[10], w[ 9], selector); w[26] = hc_byte_perm (w[ 9], w[ 8], selector); w[25] = hc_byte_perm (w[ 8], w[ 7], selector); w[24] = hc_byte_perm (w[ 7], w[ 6], selector); w[23] = hc_byte_perm (w[ 6], w[ 5], selector); w[22] = hc_byte_perm (w[ 5], w[ 4], selector); w[21] = hc_byte_perm (w[ 4], w[ 3], selector); w[20] = hc_byte_perm (w[ 3], w[ 2], selector); w[19] = hc_byte_perm (w[ 2], w[ 1], selector); w[18] = hc_byte_perm (w[ 1], w[ 0], selector); w[17] = hc_byte_perm (w[ 0], 0, selector); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_byte_perm (w[45], w[44], selector); w[62] = hc_byte_perm (w[44], w[43], selector); w[61] = hc_byte_perm (w[43], w[42], selector); w[60] = hc_byte_perm (w[42], w[41], selector); w[59] = hc_byte_perm (w[41], w[40], selector); w[58] = hc_byte_perm (w[40], w[39], selector); w[57] = hc_byte_perm (w[39], w[38], selector); w[56] = hc_byte_perm (w[38], w[37], selector); w[55] = hc_byte_perm (w[37], w[36], selector); w[54] = hc_byte_perm (w[36], w[35], selector); w[53] = hc_byte_perm (w[35], w[34], selector); w[52] = hc_byte_perm (w[34], w[33], selector); w[51] = hc_byte_perm (w[33], w[32], selector); w[50] = hc_byte_perm (w[32], w[31], selector); w[49] = hc_byte_perm (w[31], w[30], selector); w[48] = hc_byte_perm (w[30], w[29], selector); w[47] = hc_byte_perm (w[29], w[28], selector); w[46] = hc_byte_perm (w[28], w[27], selector); w[45] = hc_byte_perm (w[27], w[26], selector); w[44] = hc_byte_perm (w[26], w[25], selector); w[43] = hc_byte_perm (w[25], w[24], selector); w[42] = hc_byte_perm (w[24], w[23], selector); w[41] = hc_byte_perm (w[23], w[22], selector); w[40] = hc_byte_perm (w[22], w[21], selector); w[39] = hc_byte_perm (w[21], w[20], selector); w[38] = hc_byte_perm (w[20], w[19], selector); w[37] = hc_byte_perm (w[19], w[18], selector); w[36] = hc_byte_perm (w[18], w[17], selector); w[35] = hc_byte_perm (w[17], w[16], selector); w[34] = hc_byte_perm (w[16], w[15], selector); w[33] = hc_byte_perm (w[15], w[14], selector); w[32] = hc_byte_perm (w[14], w[13], selector); w[31] = hc_byte_perm (w[13], w[12], selector); w[30] = hc_byte_perm (w[12], w[11], selector); w[29] = hc_byte_perm (w[11], w[10], selector); w[28] = hc_byte_perm (w[10], w[ 9], selector); w[27] = hc_byte_perm (w[ 9], w[ 8], selector); w[26] = hc_byte_perm (w[ 8], w[ 7], selector); w[25] = hc_byte_perm (w[ 7], w[ 6], selector); w[24] = hc_byte_perm (w[ 6], w[ 5], selector); w[23] = hc_byte_perm (w[ 5], w[ 4], selector); w[22] = hc_byte_perm (w[ 4], w[ 3], selector); w[21] = hc_byte_perm (w[ 3], w[ 2], selector); w[20] = hc_byte_perm (w[ 2], w[ 1], selector); w[19] = hc_byte_perm (w[ 1], w[ 0], selector); w[18] = hc_byte_perm (w[ 0], 0, selector); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_byte_perm (w[44], w[43], selector); w[62] = hc_byte_perm (w[43], w[42], selector); w[61] = hc_byte_perm (w[42], w[41], selector); w[60] = hc_byte_perm (w[41], w[40], selector); w[59] = hc_byte_perm (w[40], w[39], selector); w[58] = hc_byte_perm (w[39], w[38], selector); w[57] = hc_byte_perm (w[38], w[37], selector); w[56] = hc_byte_perm (w[37], w[36], selector); w[55] = hc_byte_perm (w[36], w[35], selector); w[54] = hc_byte_perm (w[35], w[34], selector); w[53] = hc_byte_perm (w[34], w[33], selector); w[52] = hc_byte_perm (w[33], w[32], selector); w[51] = hc_byte_perm (w[32], w[31], selector); w[50] = hc_byte_perm (w[31], w[30], selector); w[49] = hc_byte_perm (w[30], w[29], selector); w[48] = hc_byte_perm (w[29], w[28], selector); w[47] = hc_byte_perm (w[28], w[27], selector); w[46] = hc_byte_perm (w[27], w[26], selector); w[45] = hc_byte_perm (w[26], w[25], selector); w[44] = hc_byte_perm (w[25], w[24], selector); w[43] = hc_byte_perm (w[24], w[23], selector); w[42] = hc_byte_perm (w[23], w[22], selector); w[41] = hc_byte_perm (w[22], w[21], selector); w[40] = hc_byte_perm (w[21], w[20], selector); w[39] = hc_byte_perm (w[20], w[19], selector); w[38] = hc_byte_perm (w[19], w[18], selector); w[37] = hc_byte_perm (w[18], w[17], selector); w[36] = hc_byte_perm (w[17], w[16], selector); w[35] = hc_byte_perm (w[16], w[15], selector); w[34] = hc_byte_perm (w[15], w[14], selector); w[33] = hc_byte_perm (w[14], w[13], selector); w[32] = hc_byte_perm (w[13], w[12], selector); w[31] = hc_byte_perm (w[12], w[11], selector); w[30] = hc_byte_perm (w[11], w[10], selector); w[29] = hc_byte_perm (w[10], w[ 9], selector); w[28] = hc_byte_perm (w[ 9], w[ 8], selector); w[27] = hc_byte_perm (w[ 8], w[ 7], selector); w[26] = hc_byte_perm (w[ 7], w[ 6], selector); w[25] = hc_byte_perm (w[ 6], w[ 5], selector); w[24] = hc_byte_perm (w[ 5], w[ 4], selector); w[23] = hc_byte_perm (w[ 4], w[ 3], selector); w[22] = hc_byte_perm (w[ 3], w[ 2], selector); w[21] = hc_byte_perm (w[ 2], w[ 1], selector); w[20] = hc_byte_perm (w[ 1], w[ 0], selector); w[19] = hc_byte_perm (w[ 0], 0, selector); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_byte_perm (w[43], w[42], selector); w[62] = hc_byte_perm (w[42], w[41], selector); w[61] = hc_byte_perm (w[41], w[40], selector); w[60] = hc_byte_perm (w[40], w[39], selector); w[59] = hc_byte_perm (w[39], w[38], selector); w[58] = hc_byte_perm (w[38], w[37], selector); w[57] = hc_byte_perm (w[37], w[36], selector); w[56] = hc_byte_perm (w[36], w[35], selector); w[55] = hc_byte_perm (w[35], w[34], selector); w[54] = hc_byte_perm (w[34], w[33], selector); w[53] = hc_byte_perm (w[33], w[32], selector); w[52] = hc_byte_perm (w[32], w[31], selector); w[51] = hc_byte_perm (w[31], w[30], selector); w[50] = hc_byte_perm (w[30], w[29], selector); w[49] = hc_byte_perm (w[29], w[28], selector); w[48] = hc_byte_perm (w[28], w[27], selector); w[47] = hc_byte_perm (w[27], w[26], selector); w[46] = hc_byte_perm (w[26], w[25], selector); w[45] = hc_byte_perm (w[25], w[24], selector); w[44] = hc_byte_perm (w[24], w[23], selector); w[43] = hc_byte_perm (w[23], w[22], selector); w[42] = hc_byte_perm (w[22], w[21], selector); w[41] = hc_byte_perm (w[21], w[20], selector); w[40] = hc_byte_perm (w[20], w[19], selector); w[39] = hc_byte_perm (w[19], w[18], selector); w[38] = hc_byte_perm (w[18], w[17], selector); w[37] = hc_byte_perm (w[17], w[16], selector); w[36] = hc_byte_perm (w[16], w[15], selector); w[35] = hc_byte_perm (w[15], w[14], selector); w[34] = hc_byte_perm (w[14], w[13], selector); w[33] = hc_byte_perm (w[13], w[12], selector); w[32] = hc_byte_perm (w[12], w[11], selector); w[31] = hc_byte_perm (w[11], w[10], selector); w[30] = hc_byte_perm (w[10], w[ 9], selector); w[29] = hc_byte_perm (w[ 9], w[ 8], selector); w[28] = hc_byte_perm (w[ 8], w[ 7], selector); w[27] = hc_byte_perm (w[ 7], w[ 6], selector); w[26] = hc_byte_perm (w[ 6], w[ 5], selector); w[25] = hc_byte_perm (w[ 5], w[ 4], selector); w[24] = hc_byte_perm (w[ 4], w[ 3], selector); w[23] = hc_byte_perm (w[ 3], w[ 2], selector); w[22] = hc_byte_perm (w[ 2], w[ 1], selector); w[21] = hc_byte_perm (w[ 1], w[ 0], selector); w[20] = hc_byte_perm (w[ 0], 0, selector); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_byte_perm (w[42], w[41], selector); w[62] = hc_byte_perm (w[41], w[40], selector); w[61] = hc_byte_perm (w[40], w[39], selector); w[60] = hc_byte_perm (w[39], w[38], selector); w[59] = hc_byte_perm (w[38], w[37], selector); w[58] = hc_byte_perm (w[37], w[36], selector); w[57] = hc_byte_perm (w[36], w[35], selector); w[56] = hc_byte_perm (w[35], w[34], selector); w[55] = hc_byte_perm (w[34], w[33], selector); w[54] = hc_byte_perm (w[33], w[32], selector); w[53] = hc_byte_perm (w[32], w[31], selector); w[52] = hc_byte_perm (w[31], w[30], selector); w[51] = hc_byte_perm (w[30], w[29], selector); w[50] = hc_byte_perm (w[29], w[28], selector); w[49] = hc_byte_perm (w[28], w[27], selector); w[48] = hc_byte_perm (w[27], w[26], selector); w[47] = hc_byte_perm (w[26], w[25], selector); w[46] = hc_byte_perm (w[25], w[24], selector); w[45] = hc_byte_perm (w[24], w[23], selector); w[44] = hc_byte_perm (w[23], w[22], selector); w[43] = hc_byte_perm (w[22], w[21], selector); w[42] = hc_byte_perm (w[21], w[20], selector); w[41] = hc_byte_perm (w[20], w[19], selector); w[40] = hc_byte_perm (w[19], w[18], selector); w[39] = hc_byte_perm (w[18], w[17], selector); w[38] = hc_byte_perm (w[17], w[16], selector); w[37] = hc_byte_perm (w[16], w[15], selector); w[36] = hc_byte_perm (w[15], w[14], selector); w[35] = hc_byte_perm (w[14], w[13], selector); w[34] = hc_byte_perm (w[13], w[12], selector); w[33] = hc_byte_perm (w[12], w[11], selector); w[32] = hc_byte_perm (w[11], w[10], selector); w[31] = hc_byte_perm (w[10], w[ 9], selector); w[30] = hc_byte_perm (w[ 9], w[ 8], selector); w[29] = hc_byte_perm (w[ 8], w[ 7], selector); w[28] = hc_byte_perm (w[ 7], w[ 6], selector); w[27] = hc_byte_perm (w[ 6], w[ 5], selector); w[26] = hc_byte_perm (w[ 5], w[ 4], selector); w[25] = hc_byte_perm (w[ 4], w[ 3], selector); w[24] = hc_byte_perm (w[ 3], w[ 2], selector); w[23] = hc_byte_perm (w[ 2], w[ 1], selector); w[22] = hc_byte_perm (w[ 1], w[ 0], selector); w[21] = hc_byte_perm (w[ 0], 0, selector); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_byte_perm (w[41], w[40], selector); w[62] = hc_byte_perm (w[40], w[39], selector); w[61] = hc_byte_perm (w[39], w[38], selector); w[60] = hc_byte_perm (w[38], w[37], selector); w[59] = hc_byte_perm (w[37], w[36], selector); w[58] = hc_byte_perm (w[36], w[35], selector); w[57] = hc_byte_perm (w[35], w[34], selector); w[56] = hc_byte_perm (w[34], w[33], selector); w[55] = hc_byte_perm (w[33], w[32], selector); w[54] = hc_byte_perm (w[32], w[31], selector); w[53] = hc_byte_perm (w[31], w[30], selector); w[52] = hc_byte_perm (w[30], w[29], selector); w[51] = hc_byte_perm (w[29], w[28], selector); w[50] = hc_byte_perm (w[28], w[27], selector); w[49] = hc_byte_perm (w[27], w[26], selector); w[48] = hc_byte_perm (w[26], w[25], selector); w[47] = hc_byte_perm (w[25], w[24], selector); w[46] = hc_byte_perm (w[24], w[23], selector); w[45] = hc_byte_perm (w[23], w[22], selector); w[44] = hc_byte_perm (w[22], w[21], selector); w[43] = hc_byte_perm (w[21], w[20], selector); w[42] = hc_byte_perm (w[20], w[19], selector); w[41] = hc_byte_perm (w[19], w[18], selector); w[40] = hc_byte_perm (w[18], w[17], selector); w[39] = hc_byte_perm (w[17], w[16], selector); w[38] = hc_byte_perm (w[16], w[15], selector); w[37] = hc_byte_perm (w[15], w[14], selector); w[36] = hc_byte_perm (w[14], w[13], selector); w[35] = hc_byte_perm (w[13], w[12], selector); w[34] = hc_byte_perm (w[12], w[11], selector); w[33] = hc_byte_perm (w[11], w[10], selector); w[32] = hc_byte_perm (w[10], w[ 9], selector); w[31] = hc_byte_perm (w[ 9], w[ 8], selector); w[30] = hc_byte_perm (w[ 8], w[ 7], selector); w[29] = hc_byte_perm (w[ 7], w[ 6], selector); w[28] = hc_byte_perm (w[ 6], w[ 5], selector); w[27] = hc_byte_perm (w[ 5], w[ 4], selector); w[26] = hc_byte_perm (w[ 4], w[ 3], selector); w[25] = hc_byte_perm (w[ 3], w[ 2], selector); w[24] = hc_byte_perm (w[ 2], w[ 1], selector); w[23] = hc_byte_perm (w[ 1], w[ 0], selector); w[22] = hc_byte_perm (w[ 0], 0, selector); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_byte_perm (w[40], w[39], selector); w[62] = hc_byte_perm (w[39], w[38], selector); w[61] = hc_byte_perm (w[38], w[37], selector); w[60] = hc_byte_perm (w[37], w[36], selector); w[59] = hc_byte_perm (w[36], w[35], selector); w[58] = hc_byte_perm (w[35], w[34], selector); w[57] = hc_byte_perm (w[34], w[33], selector); w[56] = hc_byte_perm (w[33], w[32], selector); w[55] = hc_byte_perm (w[32], w[31], selector); w[54] = hc_byte_perm (w[31], w[30], selector); w[53] = hc_byte_perm (w[30], w[29], selector); w[52] = hc_byte_perm (w[29], w[28], selector); w[51] = hc_byte_perm (w[28], w[27], selector); w[50] = hc_byte_perm (w[27], w[26], selector); w[49] = hc_byte_perm (w[26], w[25], selector); w[48] = hc_byte_perm (w[25], w[24], selector); w[47] = hc_byte_perm (w[24], w[23], selector); w[46] = hc_byte_perm (w[23], w[22], selector); w[45] = hc_byte_perm (w[22], w[21], selector); w[44] = hc_byte_perm (w[21], w[20], selector); w[43] = hc_byte_perm (w[20], w[19], selector); w[42] = hc_byte_perm (w[19], w[18], selector); w[41] = hc_byte_perm (w[18], w[17], selector); w[40] = hc_byte_perm (w[17], w[16], selector); w[39] = hc_byte_perm (w[16], w[15], selector); w[38] = hc_byte_perm (w[15], w[14], selector); w[37] = hc_byte_perm (w[14], w[13], selector); w[36] = hc_byte_perm (w[13], w[12], selector); w[35] = hc_byte_perm (w[12], w[11], selector); w[34] = hc_byte_perm (w[11], w[10], selector); w[33] = hc_byte_perm (w[10], w[ 9], selector); w[32] = hc_byte_perm (w[ 9], w[ 8], selector); w[31] = hc_byte_perm (w[ 8], w[ 7], selector); w[30] = hc_byte_perm (w[ 7], w[ 6], selector); w[29] = hc_byte_perm (w[ 6], w[ 5], selector); w[28] = hc_byte_perm (w[ 5], w[ 4], selector); w[27] = hc_byte_perm (w[ 4], w[ 3], selector); w[26] = hc_byte_perm (w[ 3], w[ 2], selector); w[25] = hc_byte_perm (w[ 2], w[ 1], selector); w[24] = hc_byte_perm (w[ 1], w[ 0], selector); w[23] = hc_byte_perm (w[ 0], 0, selector); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_byte_perm (w[39], w[38], selector); w[62] = hc_byte_perm (w[38], w[37], selector); w[61] = hc_byte_perm (w[37], w[36], selector); w[60] = hc_byte_perm (w[36], w[35], selector); w[59] = hc_byte_perm (w[35], w[34], selector); w[58] = hc_byte_perm (w[34], w[33], selector); w[57] = hc_byte_perm (w[33], w[32], selector); w[56] = hc_byte_perm (w[32], w[31], selector); w[55] = hc_byte_perm (w[31], w[30], selector); w[54] = hc_byte_perm (w[30], w[29], selector); w[53] = hc_byte_perm (w[29], w[28], selector); w[52] = hc_byte_perm (w[28], w[27], selector); w[51] = hc_byte_perm (w[27], w[26], selector); w[50] = hc_byte_perm (w[26], w[25], selector); w[49] = hc_byte_perm (w[25], w[24], selector); w[48] = hc_byte_perm (w[24], w[23], selector); w[47] = hc_byte_perm (w[23], w[22], selector); w[46] = hc_byte_perm (w[22], w[21], selector); w[45] = hc_byte_perm (w[21], w[20], selector); w[44] = hc_byte_perm (w[20], w[19], selector); w[43] = hc_byte_perm (w[19], w[18], selector); w[42] = hc_byte_perm (w[18], w[17], selector); w[41] = hc_byte_perm (w[17], w[16], selector); w[40] = hc_byte_perm (w[16], w[15], selector); w[39] = hc_byte_perm (w[15], w[14], selector); w[38] = hc_byte_perm (w[14], w[13], selector); w[37] = hc_byte_perm (w[13], w[12], selector); w[36] = hc_byte_perm (w[12], w[11], selector); w[35] = hc_byte_perm (w[11], w[10], selector); w[34] = hc_byte_perm (w[10], w[ 9], selector); w[33] = hc_byte_perm (w[ 9], w[ 8], selector); w[32] = hc_byte_perm (w[ 8], w[ 7], selector); w[31] = hc_byte_perm (w[ 7], w[ 6], selector); w[30] = hc_byte_perm (w[ 6], w[ 5], selector); w[29] = hc_byte_perm (w[ 5], w[ 4], selector); w[28] = hc_byte_perm (w[ 4], w[ 3], selector); w[27] = hc_byte_perm (w[ 3], w[ 2], selector); w[26] = hc_byte_perm (w[ 2], w[ 1], selector); w[25] = hc_byte_perm (w[ 1], w[ 0], selector); w[24] = hc_byte_perm (w[ 0], 0, selector); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_byte_perm (w[38], w[37], selector); w[62] = hc_byte_perm (w[37], w[36], selector); w[61] = hc_byte_perm (w[36], w[35], selector); w[60] = hc_byte_perm (w[35], w[34], selector); w[59] = hc_byte_perm (w[34], w[33], selector); w[58] = hc_byte_perm (w[33], w[32], selector); w[57] = hc_byte_perm (w[32], w[31], selector); w[56] = hc_byte_perm (w[31], w[30], selector); w[55] = hc_byte_perm (w[30], w[29], selector); w[54] = hc_byte_perm (w[29], w[28], selector); w[53] = hc_byte_perm (w[28], w[27], selector); w[52] = hc_byte_perm (w[27], w[26], selector); w[51] = hc_byte_perm (w[26], w[25], selector); w[50] = hc_byte_perm (w[25], w[24], selector); w[49] = hc_byte_perm (w[24], w[23], selector); w[48] = hc_byte_perm (w[23], w[22], selector); w[47] = hc_byte_perm (w[22], w[21], selector); w[46] = hc_byte_perm (w[21], w[20], selector); w[45] = hc_byte_perm (w[20], w[19], selector); w[44] = hc_byte_perm (w[19], w[18], selector); w[43] = hc_byte_perm (w[18], w[17], selector); w[42] = hc_byte_perm (w[17], w[16], selector); w[41] = hc_byte_perm (w[16], w[15], selector); w[40] = hc_byte_perm (w[15], w[14], selector); w[39] = hc_byte_perm (w[14], w[13], selector); w[38] = hc_byte_perm (w[13], w[12], selector); w[37] = hc_byte_perm (w[12], w[11], selector); w[36] = hc_byte_perm (w[11], w[10], selector); w[35] = hc_byte_perm (w[10], w[ 9], selector); w[34] = hc_byte_perm (w[ 9], w[ 8], selector); w[33] = hc_byte_perm (w[ 8], w[ 7], selector); w[32] = hc_byte_perm (w[ 7], w[ 6], selector); w[31] = hc_byte_perm (w[ 6], w[ 5], selector); w[30] = hc_byte_perm (w[ 5], w[ 4], selector); w[29] = hc_byte_perm (w[ 4], w[ 3], selector); w[28] = hc_byte_perm (w[ 3], w[ 2], selector); w[27] = hc_byte_perm (w[ 2], w[ 1], selector); w[26] = hc_byte_perm (w[ 1], w[ 0], selector); w[25] = hc_byte_perm (w[ 0], 0, selector); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_byte_perm (w[37], w[36], selector); w[62] = hc_byte_perm (w[36], w[35], selector); w[61] = hc_byte_perm (w[35], w[34], selector); w[60] = hc_byte_perm (w[34], w[33], selector); w[59] = hc_byte_perm (w[33], w[32], selector); w[58] = hc_byte_perm (w[32], w[31], selector); w[57] = hc_byte_perm (w[31], w[30], selector); w[56] = hc_byte_perm (w[30], w[29], selector); w[55] = hc_byte_perm (w[29], w[28], selector); w[54] = hc_byte_perm (w[28], w[27], selector); w[53] = hc_byte_perm (w[27], w[26], selector); w[52] = hc_byte_perm (w[26], w[25], selector); w[51] = hc_byte_perm (w[25], w[24], selector); w[50] = hc_byte_perm (w[24], w[23], selector); w[49] = hc_byte_perm (w[23], w[22], selector); w[48] = hc_byte_perm (w[22], w[21], selector); w[47] = hc_byte_perm (w[21], w[20], selector); w[46] = hc_byte_perm (w[20], w[19], selector); w[45] = hc_byte_perm (w[19], w[18], selector); w[44] = hc_byte_perm (w[18], w[17], selector); w[43] = hc_byte_perm (w[17], w[16], selector); w[42] = hc_byte_perm (w[16], w[15], selector); w[41] = hc_byte_perm (w[15], w[14], selector); w[40] = hc_byte_perm (w[14], w[13], selector); w[39] = hc_byte_perm (w[13], w[12], selector); w[38] = hc_byte_perm (w[12], w[11], selector); w[37] = hc_byte_perm (w[11], w[10], selector); w[36] = hc_byte_perm (w[10], w[ 9], selector); w[35] = hc_byte_perm (w[ 9], w[ 8], selector); w[34] = hc_byte_perm (w[ 8], w[ 7], selector); w[33] = hc_byte_perm (w[ 7], w[ 6], selector); w[32] = hc_byte_perm (w[ 6], w[ 5], selector); w[31] = hc_byte_perm (w[ 5], w[ 4], selector); w[30] = hc_byte_perm (w[ 4], w[ 3], selector); w[29] = hc_byte_perm (w[ 3], w[ 2], selector); w[28] = hc_byte_perm (w[ 2], w[ 1], selector); w[27] = hc_byte_perm (w[ 1], w[ 0], selector); w[26] = hc_byte_perm (w[ 0], 0, selector); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_byte_perm (w[36], w[35], selector); w[62] = hc_byte_perm (w[35], w[34], selector); w[61] = hc_byte_perm (w[34], w[33], selector); w[60] = hc_byte_perm (w[33], w[32], selector); w[59] = hc_byte_perm (w[32], w[31], selector); w[58] = hc_byte_perm (w[31], w[30], selector); w[57] = hc_byte_perm (w[30], w[29], selector); w[56] = hc_byte_perm (w[29], w[28], selector); w[55] = hc_byte_perm (w[28], w[27], selector); w[54] = hc_byte_perm (w[27], w[26], selector); w[53] = hc_byte_perm (w[26], w[25], selector); w[52] = hc_byte_perm (w[25], w[24], selector); w[51] = hc_byte_perm (w[24], w[23], selector); w[50] = hc_byte_perm (w[23], w[22], selector); w[49] = hc_byte_perm (w[22], w[21], selector); w[48] = hc_byte_perm (w[21], w[20], selector); w[47] = hc_byte_perm (w[20], w[19], selector); w[46] = hc_byte_perm (w[19], w[18], selector); w[45] = hc_byte_perm (w[18], w[17], selector); w[44] = hc_byte_perm (w[17], w[16], selector); w[43] = hc_byte_perm (w[16], w[15], selector); w[42] = hc_byte_perm (w[15], w[14], selector); w[41] = hc_byte_perm (w[14], w[13], selector); w[40] = hc_byte_perm (w[13], w[12], selector); w[39] = hc_byte_perm (w[12], w[11], selector); w[38] = hc_byte_perm (w[11], w[10], selector); w[37] = hc_byte_perm (w[10], w[ 9], selector); w[36] = hc_byte_perm (w[ 9], w[ 8], selector); w[35] = hc_byte_perm (w[ 8], w[ 7], selector); w[34] = hc_byte_perm (w[ 7], w[ 6], selector); w[33] = hc_byte_perm (w[ 6], w[ 5], selector); w[32] = hc_byte_perm (w[ 5], w[ 4], selector); w[31] = hc_byte_perm (w[ 4], w[ 3], selector); w[30] = hc_byte_perm (w[ 3], w[ 2], selector); w[29] = hc_byte_perm (w[ 2], w[ 1], selector); w[28] = hc_byte_perm (w[ 1], w[ 0], selector); w[27] = hc_byte_perm (w[ 0], 0, selector); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_byte_perm (w[35], w[34], selector); w[62] = hc_byte_perm (w[34], w[33], selector); w[61] = hc_byte_perm (w[33], w[32], selector); w[60] = hc_byte_perm (w[32], w[31], selector); w[59] = hc_byte_perm (w[31], w[30], selector); w[58] = hc_byte_perm (w[30], w[29], selector); w[57] = hc_byte_perm (w[29], w[28], selector); w[56] = hc_byte_perm (w[28], w[27], selector); w[55] = hc_byte_perm (w[27], w[26], selector); w[54] = hc_byte_perm (w[26], w[25], selector); w[53] = hc_byte_perm (w[25], w[24], selector); w[52] = hc_byte_perm (w[24], w[23], selector); w[51] = hc_byte_perm (w[23], w[22], selector); w[50] = hc_byte_perm (w[22], w[21], selector); w[49] = hc_byte_perm (w[21], w[20], selector); w[48] = hc_byte_perm (w[20], w[19], selector); w[47] = hc_byte_perm (w[19], w[18], selector); w[46] = hc_byte_perm (w[18], w[17], selector); w[45] = hc_byte_perm (w[17], w[16], selector); w[44] = hc_byte_perm (w[16], w[15], selector); w[43] = hc_byte_perm (w[15], w[14], selector); w[42] = hc_byte_perm (w[14], w[13], selector); w[41] = hc_byte_perm (w[13], w[12], selector); w[40] = hc_byte_perm (w[12], w[11], selector); w[39] = hc_byte_perm (w[11], w[10], selector); w[38] = hc_byte_perm (w[10], w[ 9], selector); w[37] = hc_byte_perm (w[ 9], w[ 8], selector); w[36] = hc_byte_perm (w[ 8], w[ 7], selector); w[35] = hc_byte_perm (w[ 7], w[ 6], selector); w[34] = hc_byte_perm (w[ 6], w[ 5], selector); w[33] = hc_byte_perm (w[ 5], w[ 4], selector); w[32] = hc_byte_perm (w[ 4], w[ 3], selector); w[31] = hc_byte_perm (w[ 3], w[ 2], selector); w[30] = hc_byte_perm (w[ 2], w[ 1], selector); w[29] = hc_byte_perm (w[ 1], w[ 0], selector); w[28] = hc_byte_perm (w[ 0], 0, selector); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_byte_perm (w[34], w[33], selector); w[62] = hc_byte_perm (w[33], w[32], selector); w[61] = hc_byte_perm (w[32], w[31], selector); w[60] = hc_byte_perm (w[31], w[30], selector); w[59] = hc_byte_perm (w[30], w[29], selector); w[58] = hc_byte_perm (w[29], w[28], selector); w[57] = hc_byte_perm (w[28], w[27], selector); w[56] = hc_byte_perm (w[27], w[26], selector); w[55] = hc_byte_perm (w[26], w[25], selector); w[54] = hc_byte_perm (w[25], w[24], selector); w[53] = hc_byte_perm (w[24], w[23], selector); w[52] = hc_byte_perm (w[23], w[22], selector); w[51] = hc_byte_perm (w[22], w[21], selector); w[50] = hc_byte_perm (w[21], w[20], selector); w[49] = hc_byte_perm (w[20], w[19], selector); w[48] = hc_byte_perm (w[19], w[18], selector); w[47] = hc_byte_perm (w[18], w[17], selector); w[46] = hc_byte_perm (w[17], w[16], selector); w[45] = hc_byte_perm (w[16], w[15], selector); w[44] = hc_byte_perm (w[15], w[14], selector); w[43] = hc_byte_perm (w[14], w[13], selector); w[42] = hc_byte_perm (w[13], w[12], selector); w[41] = hc_byte_perm (w[12], w[11], selector); w[40] = hc_byte_perm (w[11], w[10], selector); w[39] = hc_byte_perm (w[10], w[ 9], selector); w[38] = hc_byte_perm (w[ 9], w[ 8], selector); w[37] = hc_byte_perm (w[ 8], w[ 7], selector); w[36] = hc_byte_perm (w[ 7], w[ 6], selector); w[35] = hc_byte_perm (w[ 6], w[ 5], selector); w[34] = hc_byte_perm (w[ 5], w[ 4], selector); w[33] = hc_byte_perm (w[ 4], w[ 3], selector); w[32] = hc_byte_perm (w[ 3], w[ 2], selector); w[31] = hc_byte_perm (w[ 2], w[ 1], selector); w[30] = hc_byte_perm (w[ 1], w[ 0], selector); w[29] = hc_byte_perm (w[ 0], 0, selector); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_byte_perm (w[33], w[32], selector); w[62] = hc_byte_perm (w[32], w[31], selector); w[61] = hc_byte_perm (w[31], w[30], selector); w[60] = hc_byte_perm (w[30], w[29], selector); w[59] = hc_byte_perm (w[29], w[28], selector); w[58] = hc_byte_perm (w[28], w[27], selector); w[57] = hc_byte_perm (w[27], w[26], selector); w[56] = hc_byte_perm (w[26], w[25], selector); w[55] = hc_byte_perm (w[25], w[24], selector); w[54] = hc_byte_perm (w[24], w[23], selector); w[53] = hc_byte_perm (w[23], w[22], selector); w[52] = hc_byte_perm (w[22], w[21], selector); w[51] = hc_byte_perm (w[21], w[20], selector); w[50] = hc_byte_perm (w[20], w[19], selector); w[49] = hc_byte_perm (w[19], w[18], selector); w[48] = hc_byte_perm (w[18], w[17], selector); w[47] = hc_byte_perm (w[17], w[16], selector); w[46] = hc_byte_perm (w[16], w[15], selector); w[45] = hc_byte_perm (w[15], w[14], selector); w[44] = hc_byte_perm (w[14], w[13], selector); w[43] = hc_byte_perm (w[13], w[12], selector); w[42] = hc_byte_perm (w[12], w[11], selector); w[41] = hc_byte_perm (w[11], w[10], selector); w[40] = hc_byte_perm (w[10], w[ 9], selector); w[39] = hc_byte_perm (w[ 9], w[ 8], selector); w[38] = hc_byte_perm (w[ 8], w[ 7], selector); w[37] = hc_byte_perm (w[ 7], w[ 6], selector); w[36] = hc_byte_perm (w[ 6], w[ 5], selector); w[35] = hc_byte_perm (w[ 5], w[ 4], selector); w[34] = hc_byte_perm (w[ 4], w[ 3], selector); w[33] = hc_byte_perm (w[ 3], w[ 2], selector); w[32] = hc_byte_perm (w[ 2], w[ 1], selector); w[31] = hc_byte_perm (w[ 1], w[ 0], selector); w[30] = hc_byte_perm (w[ 0], 0, selector); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_byte_perm (w[32], w[31], selector); w[62] = hc_byte_perm (w[31], w[30], selector); w[61] = hc_byte_perm (w[30], w[29], selector); w[60] = hc_byte_perm (w[29], w[28], selector); w[59] = hc_byte_perm (w[28], w[27], selector); w[58] = hc_byte_perm (w[27], w[26], selector); w[57] = hc_byte_perm (w[26], w[25], selector); w[56] = hc_byte_perm (w[25], w[24], selector); w[55] = hc_byte_perm (w[24], w[23], selector); w[54] = hc_byte_perm (w[23], w[22], selector); w[53] = hc_byte_perm (w[22], w[21], selector); w[52] = hc_byte_perm (w[21], w[20], selector); w[51] = hc_byte_perm (w[20], w[19], selector); w[50] = hc_byte_perm (w[19], w[18], selector); w[49] = hc_byte_perm (w[18], w[17], selector); w[48] = hc_byte_perm (w[17], w[16], selector); w[47] = hc_byte_perm (w[16], w[15], selector); w[46] = hc_byte_perm (w[15], w[14], selector); w[45] = hc_byte_perm (w[14], w[13], selector); w[44] = hc_byte_perm (w[13], w[12], selector); w[43] = hc_byte_perm (w[12], w[11], selector); w[42] = hc_byte_perm (w[11], w[10], selector); w[41] = hc_byte_perm (w[10], w[ 9], selector); w[40] = hc_byte_perm (w[ 9], w[ 8], selector); w[39] = hc_byte_perm (w[ 8], w[ 7], selector); w[38] = hc_byte_perm (w[ 7], w[ 6], selector); w[37] = hc_byte_perm (w[ 6], w[ 5], selector); w[36] = hc_byte_perm (w[ 5], w[ 4], selector); w[35] = hc_byte_perm (w[ 4], w[ 3], selector); w[34] = hc_byte_perm (w[ 3], w[ 2], selector); w[33] = hc_byte_perm (w[ 2], w[ 1], selector); w[32] = hc_byte_perm (w[ 1], w[ 0], selector); w[31] = hc_byte_perm (w[ 0], 0, selector); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_byte_perm (w[31], w[30], selector); w[62] = hc_byte_perm (w[30], w[29], selector); w[61] = hc_byte_perm (w[29], w[28], selector); w[60] = hc_byte_perm (w[28], w[27], selector); w[59] = hc_byte_perm (w[27], w[26], selector); w[58] = hc_byte_perm (w[26], w[25], selector); w[57] = hc_byte_perm (w[25], w[24], selector); w[56] = hc_byte_perm (w[24], w[23], selector); w[55] = hc_byte_perm (w[23], w[22], selector); w[54] = hc_byte_perm (w[22], w[21], selector); w[53] = hc_byte_perm (w[21], w[20], selector); w[52] = hc_byte_perm (w[20], w[19], selector); w[51] = hc_byte_perm (w[19], w[18], selector); w[50] = hc_byte_perm (w[18], w[17], selector); w[49] = hc_byte_perm (w[17], w[16], selector); w[48] = hc_byte_perm (w[16], w[15], selector); w[47] = hc_byte_perm (w[15], w[14], selector); w[46] = hc_byte_perm (w[14], w[13], selector); w[45] = hc_byte_perm (w[13], w[12], selector); w[44] = hc_byte_perm (w[12], w[11], selector); w[43] = hc_byte_perm (w[11], w[10], selector); w[42] = hc_byte_perm (w[10], w[ 9], selector); w[41] = hc_byte_perm (w[ 9], w[ 8], selector); w[40] = hc_byte_perm (w[ 8], w[ 7], selector); w[39] = hc_byte_perm (w[ 7], w[ 6], selector); w[38] = hc_byte_perm (w[ 6], w[ 5], selector); w[37] = hc_byte_perm (w[ 5], w[ 4], selector); w[36] = hc_byte_perm (w[ 4], w[ 3], selector); w[35] = hc_byte_perm (w[ 3], w[ 2], selector); w[34] = hc_byte_perm (w[ 2], w[ 1], selector); w[33] = hc_byte_perm (w[ 1], w[ 0], selector); w[32] = hc_byte_perm (w[ 0], 0, selector); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_byte_perm (w[30], w[29], selector); w[62] = hc_byte_perm (w[29], w[28], selector); w[61] = hc_byte_perm (w[28], w[27], selector); w[60] = hc_byte_perm (w[27], w[26], selector); w[59] = hc_byte_perm (w[26], w[25], selector); w[58] = hc_byte_perm (w[25], w[24], selector); w[57] = hc_byte_perm (w[24], w[23], selector); w[56] = hc_byte_perm (w[23], w[22], selector); w[55] = hc_byte_perm (w[22], w[21], selector); w[54] = hc_byte_perm (w[21], w[20], selector); w[53] = hc_byte_perm (w[20], w[19], selector); w[52] = hc_byte_perm (w[19], w[18], selector); w[51] = hc_byte_perm (w[18], w[17], selector); w[50] = hc_byte_perm (w[17], w[16], selector); w[49] = hc_byte_perm (w[16], w[15], selector); w[48] = hc_byte_perm (w[15], w[14], selector); w[47] = hc_byte_perm (w[14], w[13], selector); w[46] = hc_byte_perm (w[13], w[12], selector); w[45] = hc_byte_perm (w[12], w[11], selector); w[44] = hc_byte_perm (w[11], w[10], selector); w[43] = hc_byte_perm (w[10], w[ 9], selector); w[42] = hc_byte_perm (w[ 9], w[ 8], selector); w[41] = hc_byte_perm (w[ 8], w[ 7], selector); w[40] = hc_byte_perm (w[ 7], w[ 6], selector); w[39] = hc_byte_perm (w[ 6], w[ 5], selector); w[38] = hc_byte_perm (w[ 5], w[ 4], selector); w[37] = hc_byte_perm (w[ 4], w[ 3], selector); w[36] = hc_byte_perm (w[ 3], w[ 2], selector); w[35] = hc_byte_perm (w[ 2], w[ 1], selector); w[34] = hc_byte_perm (w[ 1], w[ 0], selector); w[33] = hc_byte_perm (w[ 0], 0, selector); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_byte_perm (w[29], w[28], selector); w[62] = hc_byte_perm (w[28], w[27], selector); w[61] = hc_byte_perm (w[27], w[26], selector); w[60] = hc_byte_perm (w[26], w[25], selector); w[59] = hc_byte_perm (w[25], w[24], selector); w[58] = hc_byte_perm (w[24], w[23], selector); w[57] = hc_byte_perm (w[23], w[22], selector); w[56] = hc_byte_perm (w[22], w[21], selector); w[55] = hc_byte_perm (w[21], w[20], selector); w[54] = hc_byte_perm (w[20], w[19], selector); w[53] = hc_byte_perm (w[19], w[18], selector); w[52] = hc_byte_perm (w[18], w[17], selector); w[51] = hc_byte_perm (w[17], w[16], selector); w[50] = hc_byte_perm (w[16], w[15], selector); w[49] = hc_byte_perm (w[15], w[14], selector); w[48] = hc_byte_perm (w[14], w[13], selector); w[47] = hc_byte_perm (w[13], w[12], selector); w[46] = hc_byte_perm (w[12], w[11], selector); w[45] = hc_byte_perm (w[11], w[10], selector); w[44] = hc_byte_perm (w[10], w[ 9], selector); w[43] = hc_byte_perm (w[ 9], w[ 8], selector); w[42] = hc_byte_perm (w[ 8], w[ 7], selector); w[41] = hc_byte_perm (w[ 7], w[ 6], selector); w[40] = hc_byte_perm (w[ 6], w[ 5], selector); w[39] = hc_byte_perm (w[ 5], w[ 4], selector); w[38] = hc_byte_perm (w[ 4], w[ 3], selector); w[37] = hc_byte_perm (w[ 3], w[ 2], selector); w[36] = hc_byte_perm (w[ 2], w[ 1], selector); w[35] = hc_byte_perm (w[ 1], w[ 0], selector); w[34] = hc_byte_perm (w[ 0], 0, selector); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_byte_perm (w[28], w[27], selector); w[62] = hc_byte_perm (w[27], w[26], selector); w[61] = hc_byte_perm (w[26], w[25], selector); w[60] = hc_byte_perm (w[25], w[24], selector); w[59] = hc_byte_perm (w[24], w[23], selector); w[58] = hc_byte_perm (w[23], w[22], selector); w[57] = hc_byte_perm (w[22], w[21], selector); w[56] = hc_byte_perm (w[21], w[20], selector); w[55] = hc_byte_perm (w[20], w[19], selector); w[54] = hc_byte_perm (w[19], w[18], selector); w[53] = hc_byte_perm (w[18], w[17], selector); w[52] = hc_byte_perm (w[17], w[16], selector); w[51] = hc_byte_perm (w[16], w[15], selector); w[50] = hc_byte_perm (w[15], w[14], selector); w[49] = hc_byte_perm (w[14], w[13], selector); w[48] = hc_byte_perm (w[13], w[12], selector); w[47] = hc_byte_perm (w[12], w[11], selector); w[46] = hc_byte_perm (w[11], w[10], selector); w[45] = hc_byte_perm (w[10], w[ 9], selector); w[44] = hc_byte_perm (w[ 9], w[ 8], selector); w[43] = hc_byte_perm (w[ 8], w[ 7], selector); w[42] = hc_byte_perm (w[ 7], w[ 6], selector); w[41] = hc_byte_perm (w[ 6], w[ 5], selector); w[40] = hc_byte_perm (w[ 5], w[ 4], selector); w[39] = hc_byte_perm (w[ 4], w[ 3], selector); w[38] = hc_byte_perm (w[ 3], w[ 2], selector); w[37] = hc_byte_perm (w[ 2], w[ 1], selector); w[36] = hc_byte_perm (w[ 1], w[ 0], selector); w[35] = hc_byte_perm (w[ 0], 0, selector); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_byte_perm (w[27], w[26], selector); w[62] = hc_byte_perm (w[26], w[25], selector); w[61] = hc_byte_perm (w[25], w[24], selector); w[60] = hc_byte_perm (w[24], w[23], selector); w[59] = hc_byte_perm (w[23], w[22], selector); w[58] = hc_byte_perm (w[22], w[21], selector); w[57] = hc_byte_perm (w[21], w[20], selector); w[56] = hc_byte_perm (w[20], w[19], selector); w[55] = hc_byte_perm (w[19], w[18], selector); w[54] = hc_byte_perm (w[18], w[17], selector); w[53] = hc_byte_perm (w[17], w[16], selector); w[52] = hc_byte_perm (w[16], w[15], selector); w[51] = hc_byte_perm (w[15], w[14], selector); w[50] = hc_byte_perm (w[14], w[13], selector); w[49] = hc_byte_perm (w[13], w[12], selector); w[48] = hc_byte_perm (w[12], w[11], selector); w[47] = hc_byte_perm (w[11], w[10], selector); w[46] = hc_byte_perm (w[10], w[ 9], selector); w[45] = hc_byte_perm (w[ 9], w[ 8], selector); w[44] = hc_byte_perm (w[ 8], w[ 7], selector); w[43] = hc_byte_perm (w[ 7], w[ 6], selector); w[42] = hc_byte_perm (w[ 6], w[ 5], selector); w[41] = hc_byte_perm (w[ 5], w[ 4], selector); w[40] = hc_byte_perm (w[ 4], w[ 3], selector); w[39] = hc_byte_perm (w[ 3], w[ 2], selector); w[38] = hc_byte_perm (w[ 2], w[ 1], selector); w[37] = hc_byte_perm (w[ 1], w[ 0], selector); w[36] = hc_byte_perm (w[ 0], 0, selector); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_byte_perm (w[26], w[25], selector); w[62] = hc_byte_perm (w[25], w[24], selector); w[61] = hc_byte_perm (w[24], w[23], selector); w[60] = hc_byte_perm (w[23], w[22], selector); w[59] = hc_byte_perm (w[22], w[21], selector); w[58] = hc_byte_perm (w[21], w[20], selector); w[57] = hc_byte_perm (w[20], w[19], selector); w[56] = hc_byte_perm (w[19], w[18], selector); w[55] = hc_byte_perm (w[18], w[17], selector); w[54] = hc_byte_perm (w[17], w[16], selector); w[53] = hc_byte_perm (w[16], w[15], selector); w[52] = hc_byte_perm (w[15], w[14], selector); w[51] = hc_byte_perm (w[14], w[13], selector); w[50] = hc_byte_perm (w[13], w[12], selector); w[49] = hc_byte_perm (w[12], w[11], selector); w[48] = hc_byte_perm (w[11], w[10], selector); w[47] = hc_byte_perm (w[10], w[ 9], selector); w[46] = hc_byte_perm (w[ 9], w[ 8], selector); w[45] = hc_byte_perm (w[ 8], w[ 7], selector); w[44] = hc_byte_perm (w[ 7], w[ 6], selector); w[43] = hc_byte_perm (w[ 6], w[ 5], selector); w[42] = hc_byte_perm (w[ 5], w[ 4], selector); w[41] = hc_byte_perm (w[ 4], w[ 3], selector); w[40] = hc_byte_perm (w[ 3], w[ 2], selector); w[39] = hc_byte_perm (w[ 2], w[ 1], selector); w[38] = hc_byte_perm (w[ 1], w[ 0], selector); w[37] = hc_byte_perm (w[ 0], 0, selector); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_byte_perm (w[25], w[24], selector); w[62] = hc_byte_perm (w[24], w[23], selector); w[61] = hc_byte_perm (w[23], w[22], selector); w[60] = hc_byte_perm (w[22], w[21], selector); w[59] = hc_byte_perm (w[21], w[20], selector); w[58] = hc_byte_perm (w[20], w[19], selector); w[57] = hc_byte_perm (w[19], w[18], selector); w[56] = hc_byte_perm (w[18], w[17], selector); w[55] = hc_byte_perm (w[17], w[16], selector); w[54] = hc_byte_perm (w[16], w[15], selector); w[53] = hc_byte_perm (w[15], w[14], selector); w[52] = hc_byte_perm (w[14], w[13], selector); w[51] = hc_byte_perm (w[13], w[12], selector); w[50] = hc_byte_perm (w[12], w[11], selector); w[49] = hc_byte_perm (w[11], w[10], selector); w[48] = hc_byte_perm (w[10], w[ 9], selector); w[47] = hc_byte_perm (w[ 9], w[ 8], selector); w[46] = hc_byte_perm (w[ 8], w[ 7], selector); w[45] = hc_byte_perm (w[ 7], w[ 6], selector); w[44] = hc_byte_perm (w[ 6], w[ 5], selector); w[43] = hc_byte_perm (w[ 5], w[ 4], selector); w[42] = hc_byte_perm (w[ 4], w[ 3], selector); w[41] = hc_byte_perm (w[ 3], w[ 2], selector); w[40] = hc_byte_perm (w[ 2], w[ 1], selector); w[39] = hc_byte_perm (w[ 1], w[ 0], selector); w[38] = hc_byte_perm (w[ 0], 0, selector); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_byte_perm (w[24], w[23], selector); w[62] = hc_byte_perm (w[23], w[22], selector); w[61] = hc_byte_perm (w[22], w[21], selector); w[60] = hc_byte_perm (w[21], w[20], selector); w[59] = hc_byte_perm (w[20], w[19], selector); w[58] = hc_byte_perm (w[19], w[18], selector); w[57] = hc_byte_perm (w[18], w[17], selector); w[56] = hc_byte_perm (w[17], w[16], selector); w[55] = hc_byte_perm (w[16], w[15], selector); w[54] = hc_byte_perm (w[15], w[14], selector); w[53] = hc_byte_perm (w[14], w[13], selector); w[52] = hc_byte_perm (w[13], w[12], selector); w[51] = hc_byte_perm (w[12], w[11], selector); w[50] = hc_byte_perm (w[11], w[10], selector); w[49] = hc_byte_perm (w[10], w[ 9], selector); w[48] = hc_byte_perm (w[ 9], w[ 8], selector); w[47] = hc_byte_perm (w[ 8], w[ 7], selector); w[46] = hc_byte_perm (w[ 7], w[ 6], selector); w[45] = hc_byte_perm (w[ 6], w[ 5], selector); w[44] = hc_byte_perm (w[ 5], w[ 4], selector); w[43] = hc_byte_perm (w[ 4], w[ 3], selector); w[42] = hc_byte_perm (w[ 3], w[ 2], selector); w[41] = hc_byte_perm (w[ 2], w[ 1], selector); w[40] = hc_byte_perm (w[ 1], w[ 0], selector); w[39] = hc_byte_perm (w[ 0], 0, selector); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_byte_perm (w[23], w[22], selector); w[62] = hc_byte_perm (w[22], w[21], selector); w[61] = hc_byte_perm (w[21], w[20], selector); w[60] = hc_byte_perm (w[20], w[19], selector); w[59] = hc_byte_perm (w[19], w[18], selector); w[58] = hc_byte_perm (w[18], w[17], selector); w[57] = hc_byte_perm (w[17], w[16], selector); w[56] = hc_byte_perm (w[16], w[15], selector); w[55] = hc_byte_perm (w[15], w[14], selector); w[54] = hc_byte_perm (w[14], w[13], selector); w[53] = hc_byte_perm (w[13], w[12], selector); w[52] = hc_byte_perm (w[12], w[11], selector); w[51] = hc_byte_perm (w[11], w[10], selector); w[50] = hc_byte_perm (w[10], w[ 9], selector); w[49] = hc_byte_perm (w[ 9], w[ 8], selector); w[48] = hc_byte_perm (w[ 8], w[ 7], selector); w[47] = hc_byte_perm (w[ 7], w[ 6], selector); w[46] = hc_byte_perm (w[ 6], w[ 5], selector); w[45] = hc_byte_perm (w[ 5], w[ 4], selector); w[44] = hc_byte_perm (w[ 4], w[ 3], selector); w[43] = hc_byte_perm (w[ 3], w[ 2], selector); w[42] = hc_byte_perm (w[ 2], w[ 1], selector); w[41] = hc_byte_perm (w[ 1], w[ 0], selector); w[40] = hc_byte_perm (w[ 0], 0, selector); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_byte_perm (w[22], w[21], selector); w[62] = hc_byte_perm (w[21], w[20], selector); w[61] = hc_byte_perm (w[20], w[19], selector); w[60] = hc_byte_perm (w[19], w[18], selector); w[59] = hc_byte_perm (w[18], w[17], selector); w[58] = hc_byte_perm (w[17], w[16], selector); w[57] = hc_byte_perm (w[16], w[15], selector); w[56] = hc_byte_perm (w[15], w[14], selector); w[55] = hc_byte_perm (w[14], w[13], selector); w[54] = hc_byte_perm (w[13], w[12], selector); w[53] = hc_byte_perm (w[12], w[11], selector); w[52] = hc_byte_perm (w[11], w[10], selector); w[51] = hc_byte_perm (w[10], w[ 9], selector); w[50] = hc_byte_perm (w[ 9], w[ 8], selector); w[49] = hc_byte_perm (w[ 8], w[ 7], selector); w[48] = hc_byte_perm (w[ 7], w[ 6], selector); w[47] = hc_byte_perm (w[ 6], w[ 5], selector); w[46] = hc_byte_perm (w[ 5], w[ 4], selector); w[45] = hc_byte_perm (w[ 4], w[ 3], selector); w[44] = hc_byte_perm (w[ 3], w[ 2], selector); w[43] = hc_byte_perm (w[ 2], w[ 1], selector); w[42] = hc_byte_perm (w[ 1], w[ 0], selector); w[41] = hc_byte_perm (w[ 0], 0, selector); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_byte_perm (w[21], w[20], selector); w[62] = hc_byte_perm (w[20], w[19], selector); w[61] = hc_byte_perm (w[19], w[18], selector); w[60] = hc_byte_perm (w[18], w[17], selector); w[59] = hc_byte_perm (w[17], w[16], selector); w[58] = hc_byte_perm (w[16], w[15], selector); w[57] = hc_byte_perm (w[15], w[14], selector); w[56] = hc_byte_perm (w[14], w[13], selector); w[55] = hc_byte_perm (w[13], w[12], selector); w[54] = hc_byte_perm (w[12], w[11], selector); w[53] = hc_byte_perm (w[11], w[10], selector); w[52] = hc_byte_perm (w[10], w[ 9], selector); w[51] = hc_byte_perm (w[ 9], w[ 8], selector); w[50] = hc_byte_perm (w[ 8], w[ 7], selector); w[49] = hc_byte_perm (w[ 7], w[ 6], selector); w[48] = hc_byte_perm (w[ 6], w[ 5], selector); w[47] = hc_byte_perm (w[ 5], w[ 4], selector); w[46] = hc_byte_perm (w[ 4], w[ 3], selector); w[45] = hc_byte_perm (w[ 3], w[ 2], selector); w[44] = hc_byte_perm (w[ 2], w[ 1], selector); w[43] = hc_byte_perm (w[ 1], w[ 0], selector); w[42] = hc_byte_perm (w[ 0], 0, selector); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_byte_perm (w[20], w[19], selector); w[62] = hc_byte_perm (w[19], w[18], selector); w[61] = hc_byte_perm (w[18], w[17], selector); w[60] = hc_byte_perm (w[17], w[16], selector); w[59] = hc_byte_perm (w[16], w[15], selector); w[58] = hc_byte_perm (w[15], w[14], selector); w[57] = hc_byte_perm (w[14], w[13], selector); w[56] = hc_byte_perm (w[13], w[12], selector); w[55] = hc_byte_perm (w[12], w[11], selector); w[54] = hc_byte_perm (w[11], w[10], selector); w[53] = hc_byte_perm (w[10], w[ 9], selector); w[52] = hc_byte_perm (w[ 9], w[ 8], selector); w[51] = hc_byte_perm (w[ 8], w[ 7], selector); w[50] = hc_byte_perm (w[ 7], w[ 6], selector); w[49] = hc_byte_perm (w[ 6], w[ 5], selector); w[48] = hc_byte_perm (w[ 5], w[ 4], selector); w[47] = hc_byte_perm (w[ 4], w[ 3], selector); w[46] = hc_byte_perm (w[ 3], w[ 2], selector); w[45] = hc_byte_perm (w[ 2], w[ 1], selector); w[44] = hc_byte_perm (w[ 1], w[ 0], selector); w[43] = hc_byte_perm (w[ 0], 0, selector); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_byte_perm (w[19], w[18], selector); w[62] = hc_byte_perm (w[18], w[17], selector); w[61] = hc_byte_perm (w[17], w[16], selector); w[60] = hc_byte_perm (w[16], w[15], selector); w[59] = hc_byte_perm (w[15], w[14], selector); w[58] = hc_byte_perm (w[14], w[13], selector); w[57] = hc_byte_perm (w[13], w[12], selector); w[56] = hc_byte_perm (w[12], w[11], selector); w[55] = hc_byte_perm (w[11], w[10], selector); w[54] = hc_byte_perm (w[10], w[ 9], selector); w[53] = hc_byte_perm (w[ 9], w[ 8], selector); w[52] = hc_byte_perm (w[ 8], w[ 7], selector); w[51] = hc_byte_perm (w[ 7], w[ 6], selector); w[50] = hc_byte_perm (w[ 6], w[ 5], selector); w[49] = hc_byte_perm (w[ 5], w[ 4], selector); w[48] = hc_byte_perm (w[ 4], w[ 3], selector); w[47] = hc_byte_perm (w[ 3], w[ 2], selector); w[46] = hc_byte_perm (w[ 2], w[ 1], selector); w[45] = hc_byte_perm (w[ 1], w[ 0], selector); w[44] = hc_byte_perm (w[ 0], 0, selector); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_byte_perm (w[18], w[17], selector); w[62] = hc_byte_perm (w[17], w[16], selector); w[61] = hc_byte_perm (w[16], w[15], selector); w[60] = hc_byte_perm (w[15], w[14], selector); w[59] = hc_byte_perm (w[14], w[13], selector); w[58] = hc_byte_perm (w[13], w[12], selector); w[57] = hc_byte_perm (w[12], w[11], selector); w[56] = hc_byte_perm (w[11], w[10], selector); w[55] = hc_byte_perm (w[10], w[ 9], selector); w[54] = hc_byte_perm (w[ 9], w[ 8], selector); w[53] = hc_byte_perm (w[ 8], w[ 7], selector); w[52] = hc_byte_perm (w[ 7], w[ 6], selector); w[51] = hc_byte_perm (w[ 6], w[ 5], selector); w[50] = hc_byte_perm (w[ 5], w[ 4], selector); w[49] = hc_byte_perm (w[ 4], w[ 3], selector); w[48] = hc_byte_perm (w[ 3], w[ 2], selector); w[47] = hc_byte_perm (w[ 2], w[ 1], selector); w[46] = hc_byte_perm (w[ 1], w[ 0], selector); w[45] = hc_byte_perm (w[ 0], 0, selector); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_byte_perm (w[17], w[16], selector); w[62] = hc_byte_perm (w[16], w[15], selector); w[61] = hc_byte_perm (w[15], w[14], selector); w[60] = hc_byte_perm (w[14], w[13], selector); w[59] = hc_byte_perm (w[13], w[12], selector); w[58] = hc_byte_perm (w[12], w[11], selector); w[57] = hc_byte_perm (w[11], w[10], selector); w[56] = hc_byte_perm (w[10], w[ 9], selector); w[55] = hc_byte_perm (w[ 9], w[ 8], selector); w[54] = hc_byte_perm (w[ 8], w[ 7], selector); w[53] = hc_byte_perm (w[ 7], w[ 6], selector); w[52] = hc_byte_perm (w[ 6], w[ 5], selector); w[51] = hc_byte_perm (w[ 5], w[ 4], selector); w[50] = hc_byte_perm (w[ 4], w[ 3], selector); w[49] = hc_byte_perm (w[ 3], w[ 2], selector); w[48] = hc_byte_perm (w[ 2], w[ 1], selector); w[47] = hc_byte_perm (w[ 1], w[ 0], selector); w[46] = hc_byte_perm (w[ 0], 0, selector); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_byte_perm (w[16], w[15], selector); w[62] = hc_byte_perm (w[15], w[14], selector); w[61] = hc_byte_perm (w[14], w[13], selector); w[60] = hc_byte_perm (w[13], w[12], selector); w[59] = hc_byte_perm (w[12], w[11], selector); w[58] = hc_byte_perm (w[11], w[10], selector); w[57] = hc_byte_perm (w[10], w[ 9], selector); w[56] = hc_byte_perm (w[ 9], w[ 8], selector); w[55] = hc_byte_perm (w[ 8], w[ 7], selector); w[54] = hc_byte_perm (w[ 7], w[ 6], selector); w[53] = hc_byte_perm (w[ 6], w[ 5], selector); w[52] = hc_byte_perm (w[ 5], w[ 4], selector); w[51] = hc_byte_perm (w[ 4], w[ 3], selector); w[50] = hc_byte_perm (w[ 3], w[ 2], selector); w[49] = hc_byte_perm (w[ 2], w[ 1], selector); w[48] = hc_byte_perm (w[ 1], w[ 0], selector); w[47] = hc_byte_perm (w[ 0], 0, selector); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_byte_perm (w[15], w[14], selector); w[62] = hc_byte_perm (w[14], w[13], selector); w[61] = hc_byte_perm (w[13], w[12], selector); w[60] = hc_byte_perm (w[12], w[11], selector); w[59] = hc_byte_perm (w[11], w[10], selector); w[58] = hc_byte_perm (w[10], w[ 9], selector); w[57] = hc_byte_perm (w[ 9], w[ 8], selector); w[56] = hc_byte_perm (w[ 8], w[ 7], selector); w[55] = hc_byte_perm (w[ 7], w[ 6], selector); w[54] = hc_byte_perm (w[ 6], w[ 5], selector); w[53] = hc_byte_perm (w[ 5], w[ 4], selector); w[52] = hc_byte_perm (w[ 4], w[ 3], selector); w[51] = hc_byte_perm (w[ 3], w[ 2], selector); w[50] = hc_byte_perm (w[ 2], w[ 1], selector); w[49] = hc_byte_perm (w[ 1], w[ 0], selector); w[48] = hc_byte_perm (w[ 0], 0, selector); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_byte_perm (w[14], w[13], selector); w[62] = hc_byte_perm (w[13], w[12], selector); w[61] = hc_byte_perm (w[12], w[11], selector); w[60] = hc_byte_perm (w[11], w[10], selector); w[59] = hc_byte_perm (w[10], w[ 9], selector); w[58] = hc_byte_perm (w[ 9], w[ 8], selector); w[57] = hc_byte_perm (w[ 8], w[ 7], selector); w[56] = hc_byte_perm (w[ 7], w[ 6], selector); w[55] = hc_byte_perm (w[ 6], w[ 5], selector); w[54] = hc_byte_perm (w[ 5], w[ 4], selector); w[53] = hc_byte_perm (w[ 4], w[ 3], selector); w[52] = hc_byte_perm (w[ 3], w[ 2], selector); w[51] = hc_byte_perm (w[ 2], w[ 1], selector); w[50] = hc_byte_perm (w[ 1], w[ 0], selector); w[49] = hc_byte_perm (w[ 0], 0, selector); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_byte_perm (w[13], w[12], selector); w[62] = hc_byte_perm (w[12], w[11], selector); w[61] = hc_byte_perm (w[11], w[10], selector); w[60] = hc_byte_perm (w[10], w[ 9], selector); w[59] = hc_byte_perm (w[ 9], w[ 8], selector); w[58] = hc_byte_perm (w[ 8], w[ 7], selector); w[57] = hc_byte_perm (w[ 7], w[ 6], selector); w[56] = hc_byte_perm (w[ 6], w[ 5], selector); w[55] = hc_byte_perm (w[ 5], w[ 4], selector); w[54] = hc_byte_perm (w[ 4], w[ 3], selector); w[53] = hc_byte_perm (w[ 3], w[ 2], selector); w[52] = hc_byte_perm (w[ 2], w[ 1], selector); w[51] = hc_byte_perm (w[ 1], w[ 0], selector); w[50] = hc_byte_perm (w[ 0], 0, selector); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_byte_perm (w[12], w[11], selector); w[62] = hc_byte_perm (w[11], w[10], selector); w[61] = hc_byte_perm (w[10], w[ 9], selector); w[60] = hc_byte_perm (w[ 9], w[ 8], selector); w[59] = hc_byte_perm (w[ 8], w[ 7], selector); w[58] = hc_byte_perm (w[ 7], w[ 6], selector); w[57] = hc_byte_perm (w[ 6], w[ 5], selector); w[56] = hc_byte_perm (w[ 5], w[ 4], selector); w[55] = hc_byte_perm (w[ 4], w[ 3], selector); w[54] = hc_byte_perm (w[ 3], w[ 2], selector); w[53] = hc_byte_perm (w[ 2], w[ 1], selector); w[52] = hc_byte_perm (w[ 1], w[ 0], selector); w[51] = hc_byte_perm (w[ 0], 0, selector); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_byte_perm (w[11], w[10], selector); w[62] = hc_byte_perm (w[10], w[ 9], selector); w[61] = hc_byte_perm (w[ 9], w[ 8], selector); w[60] = hc_byte_perm (w[ 8], w[ 7], selector); w[59] = hc_byte_perm (w[ 7], w[ 6], selector); w[58] = hc_byte_perm (w[ 6], w[ 5], selector); w[57] = hc_byte_perm (w[ 5], w[ 4], selector); w[56] = hc_byte_perm (w[ 4], w[ 3], selector); w[55] = hc_byte_perm (w[ 3], w[ 2], selector); w[54] = hc_byte_perm (w[ 2], w[ 1], selector); w[53] = hc_byte_perm (w[ 1], w[ 0], selector); w[52] = hc_byte_perm (w[ 0], 0, selector); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_byte_perm (w[10], w[ 9], selector); w[62] = hc_byte_perm (w[ 9], w[ 8], selector); w[61] = hc_byte_perm (w[ 8], w[ 7], selector); w[60] = hc_byte_perm (w[ 7], w[ 6], selector); w[59] = hc_byte_perm (w[ 6], w[ 5], selector); w[58] = hc_byte_perm (w[ 5], w[ 4], selector); w[57] = hc_byte_perm (w[ 4], w[ 3], selector); w[56] = hc_byte_perm (w[ 3], w[ 2], selector); w[55] = hc_byte_perm (w[ 2], w[ 1], selector); w[54] = hc_byte_perm (w[ 1], w[ 0], selector); w[53] = hc_byte_perm (w[ 0], 0, selector); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_byte_perm (w[ 9], w[ 8], selector); w[62] = hc_byte_perm (w[ 8], w[ 7], selector); w[61] = hc_byte_perm (w[ 7], w[ 6], selector); w[60] = hc_byte_perm (w[ 6], w[ 5], selector); w[59] = hc_byte_perm (w[ 5], w[ 4], selector); w[58] = hc_byte_perm (w[ 4], w[ 3], selector); w[57] = hc_byte_perm (w[ 3], w[ 2], selector); w[56] = hc_byte_perm (w[ 2], w[ 1], selector); w[55] = hc_byte_perm (w[ 1], w[ 0], selector); w[54] = hc_byte_perm (w[ 0], 0, selector); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_byte_perm (w[ 8], w[ 7], selector); w[62] = hc_byte_perm (w[ 7], w[ 6], selector); w[61] = hc_byte_perm (w[ 6], w[ 5], selector); w[60] = hc_byte_perm (w[ 5], w[ 4], selector); w[59] = hc_byte_perm (w[ 4], w[ 3], selector); w[58] = hc_byte_perm (w[ 3], w[ 2], selector); w[57] = hc_byte_perm (w[ 2], w[ 1], selector); w[56] = hc_byte_perm (w[ 1], w[ 0], selector); w[55] = hc_byte_perm (w[ 0], 0, selector); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_byte_perm (w[ 7], w[ 6], selector); w[62] = hc_byte_perm (w[ 6], w[ 5], selector); w[61] = hc_byte_perm (w[ 5], w[ 4], selector); w[60] = hc_byte_perm (w[ 4], w[ 3], selector); w[59] = hc_byte_perm (w[ 3], w[ 2], selector); w[58] = hc_byte_perm (w[ 2], w[ 1], selector); w[57] = hc_byte_perm (w[ 1], w[ 0], selector); w[56] = hc_byte_perm (w[ 0], 0, selector); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_byte_perm (w[ 6], w[ 5], selector); w[62] = hc_byte_perm (w[ 5], w[ 4], selector); w[61] = hc_byte_perm (w[ 4], w[ 3], selector); w[60] = hc_byte_perm (w[ 3], w[ 2], selector); w[59] = hc_byte_perm (w[ 2], w[ 1], selector); w[58] = hc_byte_perm (w[ 1], w[ 0], selector); w[57] = hc_byte_perm (w[ 0], 0, selector); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_byte_perm (w[ 5], w[ 4], selector); w[62] = hc_byte_perm (w[ 4], w[ 3], selector); w[61] = hc_byte_perm (w[ 3], w[ 2], selector); w[60] = hc_byte_perm (w[ 2], w[ 1], selector); w[59] = hc_byte_perm (w[ 1], w[ 0], selector); w[58] = hc_byte_perm (w[ 0], 0, selector); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_byte_perm (w[ 4], w[ 3], selector); w[62] = hc_byte_perm (w[ 3], w[ 2], selector); w[61] = hc_byte_perm (w[ 2], w[ 1], selector); w[60] = hc_byte_perm (w[ 1], w[ 0], selector); w[59] = hc_byte_perm (w[ 0], 0, selector); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_byte_perm (w[ 3], w[ 2], selector); w[62] = hc_byte_perm (w[ 2], w[ 1], selector); w[61] = hc_byte_perm (w[ 1], w[ 0], selector); w[60] = hc_byte_perm (w[ 0], 0, selector); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_byte_perm (w[ 2], w[ 1], selector); w[62] = hc_byte_perm (w[ 1], w[ 0], selector); w[61] = hc_byte_perm (w[ 0], 0, selector); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_byte_perm (w[ 1], w[ 0], selector); w[62] = hc_byte_perm (w[ 0], 0, selector); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_byte_perm (w[ 0], 0, selector); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif } /** * vector functions as scalar (for outer loop usage) */ DECLSPEC void truncate_block_4x4_le_S (PRIVATE_AS u32 *w0, const u32 len) { switch (len) { case 0: w0[0] = 0; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 1: w0[0] &= 0x000000ff; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 2: w0[0] &= 0x0000ffff; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 3: w0[0] &= 0x00ffffff; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 4: w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 5: w0[1] &= 0x000000ff; w0[2] = 0; w0[3] = 0; break; case 6: w0[1] &= 0x0000ffff; w0[2] = 0; w0[3] = 0; break; case 7: w0[1] &= 0x00ffffff; w0[2] = 0; w0[3] = 0; break; case 8: w0[2] = 0; w0[3] = 0; break; case 9: w0[2] &= 0x000000ff; w0[3] = 0; break; case 10: w0[2] &= 0x0000ffff; w0[3] = 0; break; case 11: w0[2] &= 0x00ffffff; w0[3] = 0; break; case 12: w0[3] = 0; break; case 13: w0[3] &= 0x000000ff; break; case 14: w0[3] &= 0x0000ffff; break; case 15: w0[3] &= 0x00ffffff; break; } } DECLSPEC void truncate_block_4x4_be_S (PRIVATE_AS u32 *w0, const u32 len) { switch (len) { case 0: w0[0] = 0; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 1: w0[0] &= 0xff000000; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 2: w0[0] &= 0xffff0000; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 3: w0[0] &= 0xffffff00; w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 4: w0[1] = 0; w0[2] = 0; w0[3] = 0; break; case 5: w0[1] &= 0xff000000; w0[2] = 0; w0[3] = 0; break; case 6: w0[1] &= 0xffff0000; w0[2] = 0; w0[3] = 0; break; case 7: w0[1] &= 0xffffff00; w0[2] = 0; w0[3] = 0; break; case 8: w0[2] = 0; w0[3] = 0; break; case 9: w0[2] &= 0xff000000; w0[3] = 0; break; case 10: w0[2] &= 0xffff0000; w0[3] = 0; break; case 11: w0[2] &= 0xffffff00; w0[3] = 0; break; case 12: w0[3] = 0; break; case 13: w0[3] &= 0xff000000; break; case 14: w0[3] &= 0xffff0000; break; case 15: w0[3] &= 0xffffff00; break; } } DECLSPEC void truncate_block_16x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 len) { switch (len) { case 0: w0[0] = 0; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 1: w0[0] &= 0x000000ff; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 2: w0[0] &= 0x0000ffff; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 3: w0[0] &= 0x00ffffff; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 4: w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 5: w0[1] &= 0x000000ff; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 6: w0[1] &= 0x0000ffff; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 7: w0[1] &= 0x00ffffff; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 8: w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 9: w0[2] &= 0x000000ff; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 10: w0[2] &= 0x0000ffff; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 11: w0[2] &= 0x00ffffff; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 12: w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 13: w0[3] &= 0x000000ff; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 14: w0[3] &= 0x0000ffff; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 15: w0[3] &= 0x00ffffff; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 16: w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 17: w1[0] &= 0x000000ff; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 18: w1[0] &= 0x0000ffff; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 19: w1[0] &= 0x00ffffff; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 20: w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 21: w1[1] &= 0x000000ff; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 22: w1[1] &= 0x0000ffff; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 23: w1[1] &= 0x00ffffff; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 24: w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 25: w1[2] &= 0x000000ff; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 26: w1[2] &= 0x0000ffff; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 27: w1[2] &= 0x00ffffff; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 28: w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 29: w1[3] &= 0x000000ff; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 30: w1[3] &= 0x0000ffff; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 31: w1[3] &= 0x00ffffff; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 32: w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 33: w2[0] &= 0x000000ff; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 34: w2[0] &= 0x0000ffff; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 35: w2[0] &= 0x00ffffff; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 36: w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 37: w2[1] &= 0x000000ff; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 38: w2[1] &= 0x0000ffff; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 39: w2[1] &= 0x00ffffff; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 40: w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 41: w2[2] &= 0x000000ff; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 42: w2[2] &= 0x0000ffff; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 43: w2[2] &= 0x00ffffff; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 44: w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 45: w2[3] &= 0x000000ff; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 46: w2[3] &= 0x0000ffff; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 47: w2[3] &= 0x00ffffff; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 48: w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 49: w3[0] &= 0x000000ff; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 50: w3[0] &= 0x0000ffff; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 51: w3[0] &= 0x00ffffff; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 52: w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 53: w3[1] &= 0x000000ff; w3[2] = 0; w3[3] = 0; break; case 54: w3[1] &= 0x0000ffff; w3[2] = 0; w3[3] = 0; break; case 55: w3[1] &= 0x00ffffff; w3[2] = 0; w3[3] = 0; break; case 56: w3[2] = 0; w3[3] = 0; break; case 57: w3[2] &= 0x000000ff; w3[3] = 0; break; case 58: w3[2] &= 0x0000ffff; w3[3] = 0; break; case 59: w3[2] &= 0x00ffffff; w3[3] = 0; break; case 60: w3[3] = 0; break; case 61: w3[3] &= 0x000000ff; break; case 62: w3[3] &= 0x0000ffff; break; case 63: w3[3] &= 0x00ffffff; break; } } DECLSPEC void truncate_block_16x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 len) { switch (len) { case 0: w0[0] = 0; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 1: w0[0] &= 0xff000000; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 2: w0[0] &= 0xffff0000; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 3: w0[0] &= 0xffffff00; w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 4: w0[1] = 0; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 5: w0[1] &= 0xff000000; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 6: w0[1] &= 0xffff0000; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 7: w0[1] &= 0xffffff00; w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 8: w0[2] = 0; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 9: w0[2] &= 0xff000000; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 10: w0[2] &= 0xffff0000; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 11: w0[2] &= 0xffffff00; w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 12: w0[3] = 0; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 13: w0[3] &= 0xff000000; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 14: w0[3] &= 0xffff0000; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 15: w0[3] &= 0xffffff00; w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 16: w1[0] = 0; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 17: w1[0] &= 0xff000000; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 18: w1[0] &= 0xffff0000; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 19: w1[0] &= 0xffffff00; w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 20: w1[1] = 0; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 21: w1[1] &= 0xff000000; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 22: w1[1] &= 0xffff0000; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 23: w1[1] &= 0xffffff00; w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 24: w1[2] = 0; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 25: w1[2] &= 0xff000000; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 26: w1[2] &= 0xffff0000; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 27: w1[2] &= 0xffffff00; w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 28: w1[3] = 0; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 29: w1[3] &= 0xff000000; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 30: w1[3] &= 0xffff0000; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 31: w1[3] &= 0xffffff00; w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 32: w2[0] = 0; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 33: w2[0] &= 0xff000000; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 34: w2[0] &= 0xffff0000; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 35: w2[0] &= 0xffffff00; w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 36: w2[1] = 0; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 37: w2[1] &= 0xff000000; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 38: w2[1] &= 0xffff0000; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 39: w2[1] &= 0xffffff00; w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 40: w2[2] = 0; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 41: w2[2] &= 0xff000000; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 42: w2[2] &= 0xffff0000; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 43: w2[2] &= 0xffffff00; w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 44: w2[3] = 0; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 45: w2[3] &= 0xff000000; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 46: w2[3] &= 0xffff0000; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 47: w2[3] &= 0xffffff00; w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 48: w3[0] = 0; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 49: w3[0] &= 0xff000000; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 50: w3[0] &= 0xffff0000; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 51: w3[0] &= 0xffffff00; w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 52: w3[1] = 0; w3[2] = 0; w3[3] = 0; break; case 53: w3[1] &= 0xff000000; w3[2] = 0; w3[3] = 0; break; case 54: w3[1] &= 0xffff0000; w3[2] = 0; w3[3] = 0; break; case 55: w3[1] &= 0xffffff00; w3[2] = 0; w3[3] = 0; break; case 56: w3[2] = 0; w3[3] = 0; break; case 57: w3[2] &= 0xff000000; w3[3] = 0; break; case 58: w3[2] &= 0xffff0000; w3[3] = 0; break; case 59: w3[2] &= 0xffffff00; w3[3] = 0; break; case 60: w3[3] = 0; break; case 61: w3[3] &= 0xff000000; break; case 62: w3[3] &= 0xffff0000; break; case 63: w3[3] &= 0xffffff00; break; } } DECLSPEC void set_mark_1x4_S (PRIVATE_AS u32 *v, const u32 offset) { const u32 c = (offset & 15) / 4; const u32 r = 0xff << ((offset & 3) * 8); v[0] = (c == 0) ? r : 0; v[1] = (c == 1) ? r : 0; v[2] = (c == 2) ? r : 0; v[3] = (c == 3) ? r : 0; } DECLSPEC void append_helper_1x4_S (PRIVATE_AS u32 *r, const u32 v, PRIVATE_AS const u32 *m) { r[0] |= v & m[0]; r[1] |= v & m[1]; r[2] |= v & m[2]; r[3] |= v & m[3]; } DECLSPEC void append_0x01_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x01010101 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x01010101 : 0), v); } DECLSPEC void append_0x06_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x06060606 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x06060606 : 0), v); } DECLSPEC void append_0x01_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x01010101 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x01010101 : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x01010101 : 0), v); append_helper_1x4_S (w3, ((offset16 == 3) ? 0x01010101 : 0), v); } DECLSPEC void append_0x2d_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x2d2d2d2d : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x2d2d2d2d : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x2d2d2d2d : 0), v); append_helper_1x4_S (w3, ((offset16 == 3) ? 0x2d2d2d2d : 0), v); } DECLSPEC void append_0x3a_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x3a3a3a3a : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x3a3a3a3a : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x3a3a3a3a : 0), v); append_helper_1x4_S (w3, ((offset16 == 3) ? 0x3a3a3a3a : 0), v); } DECLSPEC void append_0x80_1x4_S (PRIVATE_AS u32 *w0, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); append_helper_1x4_S (w0, 0x80808080, v); } DECLSPEC void append_0x80_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_3x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x80808080 : 0), v); append_helper_1x4_S (w3, ((offset16 == 3) ? 0x80808080 : 0), v); } DECLSPEC void append_0x80_8x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { u32 v[4]; set_mark_1x4_S (v, offset); const u32 offset16 = offset / 16; append_helper_1x4_S (w0, ((offset16 == 0) ? 0x80808080 : 0), v); append_helper_1x4_S (w1, ((offset16 == 1) ? 0x80808080 : 0), v); append_helper_1x4_S (w2, ((offset16 == 2) ? 0x80808080 : 0), v); append_helper_1x4_S (w3, ((offset16 == 3) ? 0x80808080 : 0), v); append_helper_1x4_S (w4, ((offset16 == 4) ? 0x80808080 : 0), v); append_helper_1x4_S (w5, ((offset16 == 5) ? 0x80808080 : 0), v); append_helper_1x4_S (w6, ((offset16 == 6) ? 0x80808080 : 0), v); append_helper_1x4_S (w7, ((offset16 == 7) ? 0x80808080 : 0), v); } DECLSPEC void make_utf16be_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2) { #if defined IS_NV out2[3] = hc_byte_perm_S (in[3], 0, 0x3727); out2[2] = hc_byte_perm_S (in[3], 0, 0x1707); out2[1] = hc_byte_perm_S (in[2], 0, 0x3727); out2[0] = hc_byte_perm_S (in[2], 0, 0x1707); out1[3] = hc_byte_perm_S (in[1], 0, 0x3727); out1[2] = hc_byte_perm_S (in[1], 0, 0x1707); out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); out2[1] = hc_byte_perm_S (in[2], 0, 0x03070207); out2[0] = hc_byte_perm_S (in[2], 0, 0x01070007); out1[3] = hc_byte_perm_S (in[1], 0, 0x03070207); out1[2] = hc_byte_perm_S (in[1], 0, 0x01070007); out1[1] = hc_byte_perm_S (in[0], 0, 0x03070207); out1[0] = hc_byte_perm_S (in[0], 0, 0x01070007); #else out2[3] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00); out2[2] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00); out2[1] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00); out2[0] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00); out1[3] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00); out1[2] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00); out1[1] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00); out1[0] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00); #endif } DECLSPEC void make_utf16beN_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2) { #if defined IS_NV out2[3] = hc_byte_perm_S (in[3], 0, 0x1707); out2[2] = hc_byte_perm_S (in[3], 0, 0x3727); out2[1] = hc_byte_perm_S (in[2], 0, 0x1707); out2[0] = hc_byte_perm_S (in[2], 0, 0x3727); out1[3] = hc_byte_perm_S (in[1], 0, 0x1707); out1[2] = hc_byte_perm_S (in[1], 0, 0x3727); out1[1] = hc_byte_perm_S (in[0], 0, 0x1707); out1[0] = hc_byte_perm_S (in[0], 0, 0x3727); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm_S (in[3], 0, 0x01070007); out2[2] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[1] = hc_byte_perm_S (in[2], 0, 0x01070007); out2[0] = hc_byte_perm_S (in[2], 0, 0x03070207); out1[3] = hc_byte_perm_S (in[1], 0, 0x01070007); out1[2] = hc_byte_perm_S (in[1], 0, 0x03070207); out1[1] = hc_byte_perm_S (in[0], 0, 0x01070007); out1[0] = hc_byte_perm_S (in[0], 0, 0x03070207); #else out2[3] = ((in[3] << 16) & 0xFF000000) | ((in[3] << 8) & 0x0000FF00); out2[2] = ((in[3] >> 0) & 0xFF000000) | ((in[3] >> 8) & 0x0000FF00); out2[1] = ((in[2] << 16) & 0xFF000000) | ((in[2] << 8) & 0x0000FF00); out2[0] = ((in[2] >> 0) & 0xFF000000) | ((in[2] >> 8) & 0x0000FF00); out1[3] = ((in[1] << 16) & 0xFF000000) | ((in[1] << 8) & 0x0000FF00); out1[2] = ((in[1] >> 0) & 0xFF000000) | ((in[1] >> 8) & 0x0000FF00); out1[1] = ((in[0] << 16) & 0xFF000000) | ((in[0] << 8) & 0x0000FF00); out1[0] = ((in[0] >> 0) & 0xFF000000) | ((in[0] >> 8) & 0x0000FF00); #endif } DECLSPEC void make_utf16le_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PRIVATE_AS u32 *out2) { #if defined IS_NV out2[3] = hc_byte_perm_S (in[3], 0, 0x7372); out2[2] = hc_byte_perm_S (in[3], 0, 0x7170); out2[1] = hc_byte_perm_S (in[2], 0, 0x7372); out2[0] = hc_byte_perm_S (in[2], 0, 0x7170); out1[3] = hc_byte_perm_S (in[1], 0, 0x7372); out1[2] = hc_byte_perm_S (in[1], 0, 0x7170); out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); out2[1] = hc_byte_perm_S (in[2], 0, 0x07030702); out2[0] = hc_byte_perm_S (in[2], 0, 0x07010700); out1[3] = hc_byte_perm_S (in[1], 0, 0x07030702); out1[2] = hc_byte_perm_S (in[1], 0, 0x07010700); out1[1] = hc_byte_perm_S (in[0], 0, 0x07030702); out1[0] = hc_byte_perm_S (in[0], 0, 0x07010700); #else out2[3] = ((in[3] >> 8) & 0x00FF0000) | ((in[3] >> 16) & 0x000000FF); out2[2] = ((in[3] << 8) & 0x00FF0000) | ((in[3] >> 0) & 0x000000FF); out2[1] = ((in[2] >> 8) & 0x00FF0000) | ((in[2] >> 16) & 0x000000FF); out2[0] = ((in[2] << 8) & 0x00FF0000) | ((in[2] >> 0) & 0x000000FF); out1[3] = ((in[1] >> 8) & 0x00FF0000) | ((in[1] >> 16) & 0x000000FF); out1[2] = ((in[1] << 8) & 0x00FF0000) | ((in[1] >> 0) & 0x000000FF); out1[1] = ((in[0] >> 8) & 0x00FF0000) | ((in[0] >> 16) & 0x000000FF); out1[0] = ((in[0] << 8) & 0x00FF0000) | ((in[0] >> 0) & 0x000000FF); #endif } DECLSPEC void undo_utf16be_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *in2, PRIVATE_AS u32 *out) { #if defined IS_NV out[0] = hc_byte_perm_S (in1[0], in1[1], 0x4602); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x4602); out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); out[2] = hc_byte_perm_S (in2[0], in2[1], 0x04060002); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x04060002); #else out[0] = ((in1[0] & 0x0000ff00) >> 8) | ((in1[0] & 0xff000000) >> 16) | ((in1[1] & 0x0000ff00) << 8) | ((in1[1] & 0xff000000) << 0); out[1] = ((in1[2] & 0x0000ff00) >> 8) | ((in1[2] & 0xff000000) >> 16) | ((in1[3] & 0x0000ff00) << 8) | ((in1[3] & 0xff000000) << 0); out[2] = ((in2[0] & 0x0000ff00) >> 8) | ((in2[0] & 0xff000000) >> 16) | ((in2[1] & 0x0000ff00) << 8) | ((in2[1] & 0xff000000) << 0); out[3] = ((in2[2] & 0x0000ff00) >> 8) | ((in2[2] & 0xff000000) >> 16) | ((in2[3] & 0x0000ff00) << 8) | ((in2[3] & 0xff000000) << 0); #endif } DECLSPEC void undo_utf16le_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *in2, PRIVATE_AS u32 *out) { #if defined IS_NV out[0] = hc_byte_perm_S (in1[0], in1[1], 0x6420); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x6420); out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); out[2] = hc_byte_perm_S (in2[0], in2[1], 0x06040200); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x06040200); #else out[0] = ((in1[0] & 0x000000ff) >> 0) | ((in1[0] & 0x00ff0000) >> 8) | ((in1[1] & 0x000000ff) << 16) | ((in1[1] & 0x00ff0000) << 8); out[1] = ((in1[2] & 0x000000ff) >> 0) | ((in1[2] & 0x00ff0000) >> 8) | ((in1[3] & 0x000000ff) << 16) | ((in1[3] & 0x00ff0000) << 8); out[2] = ((in2[0] & 0x000000ff) >> 0) | ((in2[0] & 0x00ff0000) >> 8) | ((in2[1] & 0x000000ff) << 16) | ((in2[1] & 0x00ff0000) << 8); out[3] = ((in2[2] & 0x000000ff) >> 0) | ((in2[2] & 0x00ff0000) >> 8) | ((in2[3] & 0x000000ff) << 16) | ((in2[3] & 0x00ff0000) << 8); #endif } DECLSPEC void switch_buffer_by_offset_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w3[3] = hc_bytealign_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_S ( 0, w0[0], offset); break; case 1: w3[3] = hc_bytealign_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: w3[3] = hc_bytealign_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_bytealign_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_bytealign_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_bytealign_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_bytealign_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_bytealign_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_bytealign_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_bytealign_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_bytealign_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_bytealign_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_bytealign_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_bytealign_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_bytealign_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_bytealign_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); w0[0] = hc_byte_perm_S ( 0, w0[0], selector); break; case 1: w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); w0[1] = hc_byte_perm_S ( 0, w0[0], selector); w0[0] = 0; break; case 2: w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); w0[2] = hc_byte_perm_S ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); w0[3] = hc_byte_perm_S ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); w1[0] = hc_byte_perm_S ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); w1[1] = hc_byte_perm_S ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); w1[2] = hc_byte_perm_S ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); w1[3] = hc_byte_perm_S ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); w2[0] = hc_byte_perm_S ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); w2[1] = hc_byte_perm_S ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); w2[2] = hc_byte_perm_S ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); w2[3] = hc_byte_perm_S ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); w3[0] = hc_byte_perm_S ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); w3[1] = hc_byte_perm_S ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); w3[2] = hc_byte_perm_S ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_byte_perm_S ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_S (w3[3], 0, offset); w3[3] = hc_bytealign_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_S ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_S (w3[3], 0, offset); c0[0] = hc_bytealign_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_S (w3[3], 0, offset); c0[1] = hc_bytealign_S (w3[2], w3[3], offset); c0[0] = hc_bytealign_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_S (w3[3], 0, offset); c0[2] = hc_bytealign_S (w3[2], w3[3], offset); c0[1] = hc_bytealign_S (w3[1], w3[2], offset); c0[0] = hc_bytealign_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_S (w3[3], 0, offset); c0[3] = hc_bytealign_S (w3[2], w3[3], offset); c0[2] = hc_bytealign_S (w3[1], w3[2], offset); c0[1] = hc_bytealign_S (w3[0], w3[1], offset); c0[0] = hc_bytealign_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_S (w3[3], 0, offset); c1[0] = hc_bytealign_S (w3[2], w3[3], offset); c0[3] = hc_bytealign_S (w3[1], w3[2], offset); c0[2] = hc_bytealign_S (w3[0], w3[1], offset); c0[1] = hc_bytealign_S (w2[3], w3[0], offset); c0[0] = hc_bytealign_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_S (w3[3], 0, offset); c1[1] = hc_bytealign_S (w3[2], w3[3], offset); c1[0] = hc_bytealign_S (w3[1], w3[2], offset); c0[3] = hc_bytealign_S (w3[0], w3[1], offset); c0[2] = hc_bytealign_S (w2[3], w3[0], offset); c0[1] = hc_bytealign_S (w2[2], w2[3], offset); c0[0] = hc_bytealign_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_S (w3[3], 0, offset); c1[2] = hc_bytealign_S (w3[2], w3[3], offset); c1[1] = hc_bytealign_S (w3[1], w3[2], offset); c1[0] = hc_bytealign_S (w3[0], w3[1], offset); c0[3] = hc_bytealign_S (w2[3], w3[0], offset); c0[2] = hc_bytealign_S (w2[2], w2[3], offset); c0[1] = hc_bytealign_S (w2[1], w2[2], offset); c0[0] = hc_bytealign_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_S (w3[3], 0, offset); c1[3] = hc_bytealign_S (w3[2], w3[3], offset); c1[2] = hc_bytealign_S (w3[1], w3[2], offset); c1[1] = hc_bytealign_S (w3[0], w3[1], offset); c1[0] = hc_bytealign_S (w2[3], w3[0], offset); c0[3] = hc_bytealign_S (w2[2], w2[3], offset); c0[2] = hc_bytealign_S (w2[1], w2[2], offset); c0[1] = hc_bytealign_S (w2[0], w2[1], offset); c0[0] = hc_bytealign_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_S (w3[3], 0, offset); c2[0] = hc_bytealign_S (w3[2], w3[3], offset); c1[3] = hc_bytealign_S (w3[1], w3[2], offset); c1[2] = hc_bytealign_S (w3[0], w3[1], offset); c1[1] = hc_bytealign_S (w2[3], w3[0], offset); c1[0] = hc_bytealign_S (w2[2], w2[3], offset); c0[3] = hc_bytealign_S (w2[1], w2[2], offset); c0[2] = hc_bytealign_S (w2[0], w2[1], offset); c0[1] = hc_bytealign_S (w1[3], w2[0], offset); c0[0] = hc_bytealign_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_S (w3[3], 0, offset); c2[1] = hc_bytealign_S (w3[2], w3[3], offset); c2[0] = hc_bytealign_S (w3[1], w3[2], offset); c1[3] = hc_bytealign_S (w3[0], w3[1], offset); c1[2] = hc_bytealign_S (w2[3], w3[0], offset); c1[1] = hc_bytealign_S (w2[2], w2[3], offset); c1[0] = hc_bytealign_S (w2[1], w2[2], offset); c0[3] = hc_bytealign_S (w2[0], w2[1], offset); c0[2] = hc_bytealign_S (w1[3], w2[0], offset); c0[1] = hc_bytealign_S (w1[2], w1[3], offset); c0[0] = hc_bytealign_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_S (w3[3], 0, offset); c2[2] = hc_bytealign_S (w3[2], w3[3], offset); c2[1] = hc_bytealign_S (w3[1], w3[2], offset); c2[0] = hc_bytealign_S (w3[0], w3[1], offset); c1[3] = hc_bytealign_S (w2[3], w3[0], offset); c1[2] = hc_bytealign_S (w2[2], w2[3], offset); c1[1] = hc_bytealign_S (w2[1], w2[2], offset); c1[0] = hc_bytealign_S (w2[0], w2[1], offset); c0[3] = hc_bytealign_S (w1[3], w2[0], offset); c0[2] = hc_bytealign_S (w1[2], w1[3], offset); c0[1] = hc_bytealign_S (w1[1], w1[2], offset); c0[0] = hc_bytealign_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_S (w3[3], 0, offset); c2[3] = hc_bytealign_S (w3[2], w3[3], offset); c2[2] = hc_bytealign_S (w3[1], w3[2], offset); c2[1] = hc_bytealign_S (w3[0], w3[1], offset); c2[0] = hc_bytealign_S (w2[3], w3[0], offset); c1[3] = hc_bytealign_S (w2[2], w2[3], offset); c1[2] = hc_bytealign_S (w2[1], w2[2], offset); c1[1] = hc_bytealign_S (w2[0], w2[1], offset); c1[0] = hc_bytealign_S (w1[3], w2[0], offset); c0[3] = hc_bytealign_S (w1[2], w1[3], offset); c0[2] = hc_bytealign_S (w1[1], w1[2], offset); c0[1] = hc_bytealign_S (w1[0], w1[1], offset); c0[0] = hc_bytealign_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_S (w3[3], 0, offset); c3[0] = hc_bytealign_S (w3[2], w3[3], offset); c2[3] = hc_bytealign_S (w3[1], w3[2], offset); c2[2] = hc_bytealign_S (w3[0], w3[1], offset); c2[1] = hc_bytealign_S (w2[3], w3[0], offset); c2[0] = hc_bytealign_S (w2[2], w2[3], offset); c1[3] = hc_bytealign_S (w2[1], w2[2], offset); c1[2] = hc_bytealign_S (w2[0], w2[1], offset); c1[1] = hc_bytealign_S (w1[3], w2[0], offset); c1[0] = hc_bytealign_S (w1[2], w1[3], offset); c0[3] = hc_bytealign_S (w1[1], w1[2], offset); c0[2] = hc_bytealign_S (w1[0], w1[1], offset); c0[1] = hc_bytealign_S (w0[3], w1[0], offset); c0[0] = hc_bytealign_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_S (w3[3], 0, offset); c3[1] = hc_bytealign_S (w3[2], w3[3], offset); c3[0] = hc_bytealign_S (w3[1], w3[2], offset); c2[3] = hc_bytealign_S (w3[0], w3[1], offset); c2[2] = hc_bytealign_S (w2[3], w3[0], offset); c2[1] = hc_bytealign_S (w2[2], w2[3], offset); c2[0] = hc_bytealign_S (w2[1], w2[2], offset); c1[3] = hc_bytealign_S (w2[0], w2[1], offset); c1[2] = hc_bytealign_S (w1[3], w2[0], offset); c1[1] = hc_bytealign_S (w1[2], w1[3], offset); c1[0] = hc_bytealign_S (w1[1], w1[2], offset); c0[3] = hc_bytealign_S (w1[0], w1[1], offset); c0[2] = hc_bytealign_S (w0[3], w1[0], offset); c0[1] = hc_bytealign_S (w0[2], w0[3], offset); c0[0] = hc_bytealign_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_S (w3[3], 0, offset); c3[2] = hc_bytealign_S (w3[2], w3[3], offset); c3[1] = hc_bytealign_S (w3[1], w3[2], offset); c3[0] = hc_bytealign_S (w3[0], w3[1], offset); c2[3] = hc_bytealign_S (w2[3], w3[0], offset); c2[2] = hc_bytealign_S (w2[2], w2[3], offset); c2[1] = hc_bytealign_S (w2[1], w2[2], offset); c2[0] = hc_bytealign_S (w2[0], w2[1], offset); c1[3] = hc_bytealign_S (w1[3], w2[0], offset); c1[2] = hc_bytealign_S (w1[2], w1[3], offset); c1[1] = hc_bytealign_S (w1[1], w1[2], offset); c1[0] = hc_bytealign_S (w1[0], w1[1], offset); c0[3] = hc_bytealign_S (w0[3], w1[0], offset); c0[2] = hc_bytealign_S (w0[2], w0[3], offset); c0[1] = hc_bytealign_S (w0[1], w0[2], offset); c0[0] = hc_bytealign_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #ifdef IS_NV // could be improved, too switch (offset_switch) { case 0: c0[0] = hc_bytealign_S (w3[3], 0, offset); w3[3] = hc_bytealign_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_S ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_S (w3[3], 0, offset); c0[0] = hc_bytealign_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_S (w3[3], 0, offset); c0[1] = hc_bytealign_S (w3[2], w3[3], offset); c0[0] = hc_bytealign_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_S (w3[3], 0, offset); c0[2] = hc_bytealign_S (w3[2], w3[3], offset); c0[1] = hc_bytealign_S (w3[1], w3[2], offset); c0[0] = hc_bytealign_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_S (w3[3], 0, offset); c0[3] = hc_bytealign_S (w3[2], w3[3], offset); c0[2] = hc_bytealign_S (w3[1], w3[2], offset); c0[1] = hc_bytealign_S (w3[0], w3[1], offset); c0[0] = hc_bytealign_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_S (w3[3], 0, offset); c1[0] = hc_bytealign_S (w3[2], w3[3], offset); c0[3] = hc_bytealign_S (w3[1], w3[2], offset); c0[2] = hc_bytealign_S (w3[0], w3[1], offset); c0[1] = hc_bytealign_S (w2[3], w3[0], offset); c0[0] = hc_bytealign_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_S (w3[3], 0, offset); c1[1] = hc_bytealign_S (w3[2], w3[3], offset); c1[0] = hc_bytealign_S (w3[1], w3[2], offset); c0[3] = hc_bytealign_S (w3[0], w3[1], offset); c0[2] = hc_bytealign_S (w2[3], w3[0], offset); c0[1] = hc_bytealign_S (w2[2], w2[3], offset); c0[0] = hc_bytealign_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_S (w3[3], 0, offset); c1[2] = hc_bytealign_S (w3[2], w3[3], offset); c1[1] = hc_bytealign_S (w3[1], w3[2], offset); c1[0] = hc_bytealign_S (w3[0], w3[1], offset); c0[3] = hc_bytealign_S (w2[3], w3[0], offset); c0[2] = hc_bytealign_S (w2[2], w2[3], offset); c0[1] = hc_bytealign_S (w2[1], w2[2], offset); c0[0] = hc_bytealign_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_S (w3[3], 0, offset); c1[3] = hc_bytealign_S (w3[2], w3[3], offset); c1[2] = hc_bytealign_S (w3[1], w3[2], offset); c1[1] = hc_bytealign_S (w3[0], w3[1], offset); c1[0] = hc_bytealign_S (w2[3], w3[0], offset); c0[3] = hc_bytealign_S (w2[2], w2[3], offset); c0[2] = hc_bytealign_S (w2[1], w2[2], offset); c0[1] = hc_bytealign_S (w2[0], w2[1], offset); c0[0] = hc_bytealign_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_S (w3[3], 0, offset); c2[0] = hc_bytealign_S (w3[2], w3[3], offset); c1[3] = hc_bytealign_S (w3[1], w3[2], offset); c1[2] = hc_bytealign_S (w3[0], w3[1], offset); c1[1] = hc_bytealign_S (w2[3], w3[0], offset); c1[0] = hc_bytealign_S (w2[2], w2[3], offset); c0[3] = hc_bytealign_S (w2[1], w2[2], offset); c0[2] = hc_bytealign_S (w2[0], w2[1], offset); c0[1] = hc_bytealign_S (w1[3], w2[0], offset); c0[0] = hc_bytealign_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_S (w3[3], 0, offset); c2[1] = hc_bytealign_S (w3[2], w3[3], offset); c2[0] = hc_bytealign_S (w3[1], w3[2], offset); c1[3] = hc_bytealign_S (w3[0], w3[1], offset); c1[2] = hc_bytealign_S (w2[3], w3[0], offset); c1[1] = hc_bytealign_S (w2[2], w2[3], offset); c1[0] = hc_bytealign_S (w2[1], w2[2], offset); c0[3] = hc_bytealign_S (w2[0], w2[1], offset); c0[2] = hc_bytealign_S (w1[3], w2[0], offset); c0[1] = hc_bytealign_S (w1[2], w1[3], offset); c0[0] = hc_bytealign_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_S (w3[3], 0, offset); c2[2] = hc_bytealign_S (w3[2], w3[3], offset); c2[1] = hc_bytealign_S (w3[1], w3[2], offset); c2[0] = hc_bytealign_S (w3[0], w3[1], offset); c1[3] = hc_bytealign_S (w2[3], w3[0], offset); c1[2] = hc_bytealign_S (w2[2], w2[3], offset); c1[1] = hc_bytealign_S (w2[1], w2[2], offset); c1[0] = hc_bytealign_S (w2[0], w2[1], offset); c0[3] = hc_bytealign_S (w1[3], w2[0], offset); c0[2] = hc_bytealign_S (w1[2], w1[3], offset); c0[1] = hc_bytealign_S (w1[1], w1[2], offset); c0[0] = hc_bytealign_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_S (w3[3], 0, offset); c2[3] = hc_bytealign_S (w3[2], w3[3], offset); c2[2] = hc_bytealign_S (w3[1], w3[2], offset); c2[1] = hc_bytealign_S (w3[0], w3[1], offset); c2[0] = hc_bytealign_S (w2[3], w3[0], offset); c1[3] = hc_bytealign_S (w2[2], w2[3], offset); c1[2] = hc_bytealign_S (w2[1], w2[2], offset); c1[1] = hc_bytealign_S (w2[0], w2[1], offset); c1[0] = hc_bytealign_S (w1[3], w2[0], offset); c0[3] = hc_bytealign_S (w1[2], w1[3], offset); c0[2] = hc_bytealign_S (w1[1], w1[2], offset); c0[1] = hc_bytealign_S (w1[0], w1[1], offset); c0[0] = hc_bytealign_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_S (w3[3], 0, offset); c3[0] = hc_bytealign_S (w3[2], w3[3], offset); c2[3] = hc_bytealign_S (w3[1], w3[2], offset); c2[2] = hc_bytealign_S (w3[0], w3[1], offset); c2[1] = hc_bytealign_S (w2[3], w3[0], offset); c2[0] = hc_bytealign_S (w2[2], w2[3], offset); c1[3] = hc_bytealign_S (w2[1], w2[2], offset); c1[2] = hc_bytealign_S (w2[0], w2[1], offset); c1[1] = hc_bytealign_S (w1[3], w2[0], offset); c1[0] = hc_bytealign_S (w1[2], w1[3], offset); c0[3] = hc_bytealign_S (w1[1], w1[2], offset); c0[2] = hc_bytealign_S (w1[0], w1[1], offset); c0[1] = hc_bytealign_S (w0[3], w1[0], offset); c0[0] = hc_bytealign_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_S (w3[3], 0, offset); c3[1] = hc_bytealign_S (w3[2], w3[3], offset); c3[0] = hc_bytealign_S (w3[1], w3[2], offset); c2[3] = hc_bytealign_S (w3[0], w3[1], offset); c2[2] = hc_bytealign_S (w2[3], w3[0], offset); c2[1] = hc_bytealign_S (w2[2], w2[3], offset); c2[0] = hc_bytealign_S (w2[1], w2[2], offset); c1[3] = hc_bytealign_S (w2[0], w2[1], offset); c1[2] = hc_bytealign_S (w1[3], w2[0], offset); c1[1] = hc_bytealign_S (w1[2], w1[3], offset); c1[0] = hc_bytealign_S (w1[1], w1[2], offset); c0[3] = hc_bytealign_S (w1[0], w1[1], offset); c0[2] = hc_bytealign_S (w0[3], w1[0], offset); c0[1] = hc_bytealign_S (w0[2], w0[3], offset); c0[0] = hc_bytealign_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_S (w3[3], 0, offset); c3[2] = hc_bytealign_S (w3[2], w3[3], offset); c3[1] = hc_bytealign_S (w3[1], w3[2], offset); c3[0] = hc_bytealign_S (w3[0], w3[1], offset); c2[3] = hc_bytealign_S (w2[3], w3[0], offset); c2[2] = hc_bytealign_S (w2[2], w2[3], offset); c2[1] = hc_bytealign_S (w2[1], w2[2], offset); c2[0] = hc_bytealign_S (w2[0], w2[1], offset); c1[3] = hc_bytealign_S (w1[3], w2[0], offset); c1[2] = hc_bytealign_S (w1[2], w1[3], offset); c1[1] = hc_bytealign_S (w1[1], w1[2], offset); c1[0] = hc_bytealign_S (w1[0], w1[1], offset); c0[3] = hc_bytealign_S (w0[3], w1[0], offset); c0[2] = hc_bytealign_S (w0[2], w0[3], offset); c0[1] = hc_bytealign_S (w0[1], w0[2], offset); c0[0] = hc_bytealign_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w3[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; case 1: w3[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: w3[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_be_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_be_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_be_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_be_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_be_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_be_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_be_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_be_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_be_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_be_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_be_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_bytealign_be_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; case 1: w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; case 2: w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); w1[0] = hc_byte_perm_S (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); w1[1] = hc_byte_perm_S (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); w1[2] = hc_byte_perm_S (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); w1[3] = hc_byte_perm_S (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); w2[0] = hc_byte_perm_S (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); w2[1] = hc_byte_perm_S (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); w2[2] = hc_byte_perm_S (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); w2[3] = hc_byte_perm_S (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); w3[0] = hc_byte_perm_S (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); w3[1] = hc_byte_perm_S (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); w3[2] = hc_byte_perm_S (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w3[3] = hc_byte_perm_S (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_be_S (w3[3], 0, offset); w3[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_be_S (w3[3], 0, offset); c0[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_be_S (w3[3], 0, offset); c0[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_be_S (w3[3], 0, offset); c0[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_be_S (w3[3], 0, offset); c0[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_be_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_be_S (w3[3], 0, offset); c1[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_be_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_be_S (w3[3], 0, offset); c1[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_be_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_be_S (w3[3], 0, offset); c1[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_be_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_be_S (w3[3], 0, offset); c1[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[0] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[3] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[2] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[1] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_be_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_be_S (w3[3], 0, offset); c2[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[0] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[3] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[2] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[1] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_be_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_be_S (w3[3], 0, offset); c2[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[0] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[3] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[2] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[1] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_be_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_be_S (w3[3], 0, offset); c2[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[0] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[3] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[2] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[1] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_be_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_be_S (w3[3], 0, offset); c2[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[0] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[3] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[2] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[1] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[0] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[3] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[2] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[1] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_be_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_be_S (w3[3], 0, offset); c3[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[0] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[3] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[2] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[1] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[0] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[3] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[2] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[1] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_be_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_be_S (w3[3], 0, offset); c3[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c3[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c2[0] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[3] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[2] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[1] = hc_bytealign_be_S (w1[2], w1[3], offset); c1[0] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[3] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[2] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[1] = hc_bytealign_be_S (w0[2], w0[3], offset); c0[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_be_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_be_S (w3[3], 0, offset); c3[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c3[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c3[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c2[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c2[0] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[3] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[2] = hc_bytealign_be_S (w1[2], w1[3], offset); c1[1] = hc_bytealign_be_S (w1[1], w1[2], offset); c1[0] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[3] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[2] = hc_bytealign_be_S (w0[2], w0[3], offset); c0[1] = hc_bytealign_be_S (w0[1], w0[2], offset); c0[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_be_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm_S ( 0, w3[3], selector); w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; case 1: c0[1] = hc_byte_perm_S ( 0, w3[3], selector); c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm_S ( 0, w3[3], selector); c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm_S ( 0, w3[3], selector); c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm_S ( 0, w3[3], selector); c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); w1[0] = hc_byte_perm_S (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm_S ( 0, w3[3], selector); c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); w1[1] = hc_byte_perm_S (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm_S ( 0, w3[3], selector); c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); w1[2] = hc_byte_perm_S (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm_S ( 0, w3[3], selector); c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); w1[3] = hc_byte_perm_S (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm_S ( 0, w3[3], selector); c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); w2[0] = hc_byte_perm_S (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm_S ( 0, w3[3], selector); c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); w2[1] = hc_byte_perm_S (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm_S ( 0, w3[3], selector); c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); w2[2] = hc_byte_perm_S (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm_S ( 0, w3[3], selector); c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); w2[3] = hc_byte_perm_S (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm_S ( 0, w3[3], selector); c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); w3[0] = hc_byte_perm_S (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm_S ( 0, w3[3], selector); c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); w3[1] = hc_byte_perm_S (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm_S ( 0, w3[3], selector); c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); w3[2] = hc_byte_perm_S (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm_S ( 0, w3[3], selector); c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); w3[3] = hc_byte_perm_S (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w7[3] = hc_bytealign_S (w7[2], w7[3], offset); w7[2] = hc_bytealign_S (w7[1], w7[2], offset); w7[1] = hc_bytealign_S (w7[0], w7[1], offset); w7[0] = hc_bytealign_S (w6[3], w7[0], offset); w6[3] = hc_bytealign_S (w6[2], w6[3], offset); w6[2] = hc_bytealign_S (w6[1], w6[2], offset); w6[1] = hc_bytealign_S (w6[0], w6[1], offset); w6[0] = hc_bytealign_S (w5[3], w6[0], offset); w5[3] = hc_bytealign_S (w5[2], w5[3], offset); w5[2] = hc_bytealign_S (w5[1], w5[2], offset); w5[1] = hc_bytealign_S (w5[0], w5[1], offset); w5[0] = hc_bytealign_S (w4[3], w5[0], offset); w4[3] = hc_bytealign_S (w4[2], w4[3], offset); w4[2] = hc_bytealign_S (w4[1], w4[2], offset); w4[1] = hc_bytealign_S (w4[0], w4[1], offset); w4[0] = hc_bytealign_S (w3[3], w4[0], offset); w3[3] = hc_bytealign_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_S ( 0, w0[0], offset); break; case 1: w7[3] = hc_bytealign_S (w7[1], w7[2], offset); w7[2] = hc_bytealign_S (w7[0], w7[1], offset); w7[1] = hc_bytealign_S (w6[3], w7[0], offset); w7[0] = hc_bytealign_S (w6[2], w6[3], offset); w6[3] = hc_bytealign_S (w6[1], w6[2], offset); w6[2] = hc_bytealign_S (w6[0], w6[1], offset); w6[1] = hc_bytealign_S (w5[3], w6[0], offset); w6[0] = hc_bytealign_S (w5[2], w5[3], offset); w5[3] = hc_bytealign_S (w5[1], w5[2], offset); w5[2] = hc_bytealign_S (w5[0], w5[1], offset); w5[1] = hc_bytealign_S (w4[3], w5[0], offset); w5[0] = hc_bytealign_S (w4[2], w4[3], offset); w4[3] = hc_bytealign_S (w4[1], w4[2], offset); w4[2] = hc_bytealign_S (w4[0], w4[1], offset); w4[1] = hc_bytealign_S (w3[3], w4[0], offset); w4[0] = hc_bytealign_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: w7[3] = hc_bytealign_S (w7[0], w7[1], offset); w7[2] = hc_bytealign_S (w6[3], w7[0], offset); w7[1] = hc_bytealign_S (w6[2], w6[3], offset); w7[0] = hc_bytealign_S (w6[1], w6[2], offset); w6[3] = hc_bytealign_S (w6[0], w6[1], offset); w6[2] = hc_bytealign_S (w5[3], w6[0], offset); w6[1] = hc_bytealign_S (w5[2], w5[3], offset); w6[0] = hc_bytealign_S (w5[1], w5[2], offset); w5[3] = hc_bytealign_S (w5[0], w5[1], offset); w5[2] = hc_bytealign_S (w4[3], w5[0], offset); w5[1] = hc_bytealign_S (w4[2], w4[3], offset); w5[0] = hc_bytealign_S (w4[1], w4[2], offset); w4[3] = hc_bytealign_S (w4[0], w4[1], offset); w4[2] = hc_bytealign_S (w3[3], w4[0], offset); w4[1] = hc_bytealign_S (w3[2], w3[3], offset); w4[0] = hc_bytealign_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_bytealign_S (w6[3], w7[0], offset); w7[2] = hc_bytealign_S (w6[2], w6[3], offset); w7[1] = hc_bytealign_S (w6[1], w6[2], offset); w7[0] = hc_bytealign_S (w6[0], w6[1], offset); w6[3] = hc_bytealign_S (w5[3], w6[0], offset); w6[2] = hc_bytealign_S (w5[2], w5[3], offset); w6[1] = hc_bytealign_S (w5[1], w5[2], offset); w6[0] = hc_bytealign_S (w5[0], w5[1], offset); w5[3] = hc_bytealign_S (w4[3], w5[0], offset); w5[2] = hc_bytealign_S (w4[2], w4[3], offset); w5[1] = hc_bytealign_S (w4[1], w4[2], offset); w5[0] = hc_bytealign_S (w4[0], w4[1], offset); w4[3] = hc_bytealign_S (w3[3], w4[0], offset); w4[2] = hc_bytealign_S (w3[2], w3[3], offset); w4[1] = hc_bytealign_S (w3[1], w3[2], offset); w4[0] = hc_bytealign_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_bytealign_S (w6[2], w6[3], offset); w7[2] = hc_bytealign_S (w6[1], w6[2], offset); w7[1] = hc_bytealign_S (w6[0], w6[1], offset); w7[0] = hc_bytealign_S (w5[3], w6[0], offset); w6[3] = hc_bytealign_S (w5[2], w5[3], offset); w6[2] = hc_bytealign_S (w5[1], w5[2], offset); w6[1] = hc_bytealign_S (w5[0], w5[1], offset); w6[0] = hc_bytealign_S (w4[3], w5[0], offset); w5[3] = hc_bytealign_S (w4[2], w4[3], offset); w5[2] = hc_bytealign_S (w4[1], w4[2], offset); w5[1] = hc_bytealign_S (w4[0], w4[1], offset); w5[0] = hc_bytealign_S (w3[3], w4[0], offset); w4[3] = hc_bytealign_S (w3[2], w3[3], offset); w4[2] = hc_bytealign_S (w3[1], w3[2], offset); w4[1] = hc_bytealign_S (w3[0], w3[1], offset); w4[0] = hc_bytealign_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_bytealign_S (w6[1], w6[2], offset); w7[2] = hc_bytealign_S (w6[0], w6[1], offset); w7[1] = hc_bytealign_S (w5[3], w6[0], offset); w7[0] = hc_bytealign_S (w5[2], w5[3], offset); w6[3] = hc_bytealign_S (w5[1], w5[2], offset); w6[2] = hc_bytealign_S (w5[0], w5[1], offset); w6[1] = hc_bytealign_S (w4[3], w5[0], offset); w6[0] = hc_bytealign_S (w4[2], w4[3], offset); w5[3] = hc_bytealign_S (w4[1], w4[2], offset); w5[2] = hc_bytealign_S (w4[0], w4[1], offset); w5[1] = hc_bytealign_S (w3[3], w4[0], offset); w5[0] = hc_bytealign_S (w3[2], w3[3], offset); w4[3] = hc_bytealign_S (w3[1], w3[2], offset); w4[2] = hc_bytealign_S (w3[0], w3[1], offset); w4[1] = hc_bytealign_S (w2[3], w3[0], offset); w4[0] = hc_bytealign_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_bytealign_S (w6[0], w6[1], offset); w7[2] = hc_bytealign_S (w5[3], w6[0], offset); w7[1] = hc_bytealign_S (w5[2], w5[3], offset); w7[0] = hc_bytealign_S (w5[1], w5[2], offset); w6[3] = hc_bytealign_S (w5[0], w5[1], offset); w6[2] = hc_bytealign_S (w4[3], w5[0], offset); w6[1] = hc_bytealign_S (w4[2], w4[3], offset); w6[0] = hc_bytealign_S (w4[1], w4[2], offset); w5[3] = hc_bytealign_S (w4[0], w4[1], offset); w5[2] = hc_bytealign_S (w3[3], w4[0], offset); w5[1] = hc_bytealign_S (w3[2], w3[3], offset); w5[0] = hc_bytealign_S (w3[1], w3[2], offset); w4[3] = hc_bytealign_S (w3[0], w3[1], offset); w4[2] = hc_bytealign_S (w2[3], w3[0], offset); w4[1] = hc_bytealign_S (w2[2], w2[3], offset); w4[0] = hc_bytealign_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_bytealign_S (w5[3], w6[0], offset); w7[2] = hc_bytealign_S (w5[2], w5[3], offset); w7[1] = hc_bytealign_S (w5[1], w5[2], offset); w7[0] = hc_bytealign_S (w5[0], w5[1], offset); w6[3] = hc_bytealign_S (w4[3], w5[0], offset); w6[2] = hc_bytealign_S (w4[2], w4[3], offset); w6[1] = hc_bytealign_S (w4[1], w4[2], offset); w6[0] = hc_bytealign_S (w4[0], w4[1], offset); w5[3] = hc_bytealign_S (w3[3], w4[0], offset); w5[2] = hc_bytealign_S (w3[2], w3[3], offset); w5[1] = hc_bytealign_S (w3[1], w3[2], offset); w5[0] = hc_bytealign_S (w3[0], w3[1], offset); w4[3] = hc_bytealign_S (w2[3], w3[0], offset); w4[2] = hc_bytealign_S (w2[2], w2[3], offset); w4[1] = hc_bytealign_S (w2[1], w2[2], offset); w4[0] = hc_bytealign_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_bytealign_S (w5[2], w5[3], offset); w7[2] = hc_bytealign_S (w5[1], w5[2], offset); w7[1] = hc_bytealign_S (w5[0], w5[1], offset); w7[0] = hc_bytealign_S (w4[3], w5[0], offset); w6[3] = hc_bytealign_S (w4[2], w4[3], offset); w6[2] = hc_bytealign_S (w4[1], w4[2], offset); w6[1] = hc_bytealign_S (w4[0], w4[1], offset); w6[0] = hc_bytealign_S (w3[3], w4[0], offset); w5[3] = hc_bytealign_S (w3[2], w3[3], offset); w5[2] = hc_bytealign_S (w3[1], w3[2], offset); w5[1] = hc_bytealign_S (w3[0], w3[1], offset); w5[0] = hc_bytealign_S (w2[3], w3[0], offset); w4[3] = hc_bytealign_S (w2[2], w2[3], offset); w4[2] = hc_bytealign_S (w2[1], w2[2], offset); w4[1] = hc_bytealign_S (w2[0], w2[1], offset); w4[0] = hc_bytealign_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_bytealign_S (w5[1], w5[2], offset); w7[2] = hc_bytealign_S (w5[0], w5[1], offset); w7[1] = hc_bytealign_S (w4[3], w5[0], offset); w7[0] = hc_bytealign_S (w4[2], w4[3], offset); w6[3] = hc_bytealign_S (w4[1], w4[2], offset); w6[2] = hc_bytealign_S (w4[0], w4[1], offset); w6[1] = hc_bytealign_S (w3[3], w4[0], offset); w6[0] = hc_bytealign_S (w3[2], w3[3], offset); w5[3] = hc_bytealign_S (w3[1], w3[2], offset); w5[2] = hc_bytealign_S (w3[0], w3[1], offset); w5[1] = hc_bytealign_S (w2[3], w3[0], offset); w5[0] = hc_bytealign_S (w2[2], w2[3], offset); w4[3] = hc_bytealign_S (w2[1], w2[2], offset); w4[2] = hc_bytealign_S (w2[0], w2[1], offset); w4[1] = hc_bytealign_S (w1[3], w2[0], offset); w4[0] = hc_bytealign_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_bytealign_S (w5[0], w5[1], offset); w7[2] = hc_bytealign_S (w4[3], w5[0], offset); w7[1] = hc_bytealign_S (w4[2], w4[3], offset); w7[0] = hc_bytealign_S (w4[1], w4[2], offset); w6[3] = hc_bytealign_S (w4[0], w4[1], offset); w6[2] = hc_bytealign_S (w3[3], w4[0], offset); w6[1] = hc_bytealign_S (w3[2], w3[3], offset); w6[0] = hc_bytealign_S (w3[1], w3[2], offset); w5[3] = hc_bytealign_S (w3[0], w3[1], offset); w5[2] = hc_bytealign_S (w2[3], w3[0], offset); w5[1] = hc_bytealign_S (w2[2], w2[3], offset); w5[0] = hc_bytealign_S (w2[1], w2[2], offset); w4[3] = hc_bytealign_S (w2[0], w2[1], offset); w4[2] = hc_bytealign_S (w1[3], w2[0], offset); w4[1] = hc_bytealign_S (w1[2], w1[3], offset); w4[0] = hc_bytealign_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_bytealign_S (w4[3], w5[0], offset); w7[2] = hc_bytealign_S (w4[2], w4[3], offset); w7[1] = hc_bytealign_S (w4[1], w4[2], offset); w7[0] = hc_bytealign_S (w4[0], w4[1], offset); w6[3] = hc_bytealign_S (w3[3], w4[0], offset); w6[2] = hc_bytealign_S (w3[2], w3[3], offset); w6[1] = hc_bytealign_S (w3[1], w3[2], offset); w6[0] = hc_bytealign_S (w3[0], w3[1], offset); w5[3] = hc_bytealign_S (w2[3], w3[0], offset); w5[2] = hc_bytealign_S (w2[2], w2[3], offset); w5[1] = hc_bytealign_S (w2[1], w2[2], offset); w5[0] = hc_bytealign_S (w2[0], w2[1], offset); w4[3] = hc_bytealign_S (w1[3], w2[0], offset); w4[2] = hc_bytealign_S (w1[2], w1[3], offset); w4[1] = hc_bytealign_S (w1[1], w1[2], offset); w4[0] = hc_bytealign_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_bytealign_S (w4[2], w4[3], offset); w7[2] = hc_bytealign_S (w4[1], w4[2], offset); w7[1] = hc_bytealign_S (w4[0], w4[1], offset); w7[0] = hc_bytealign_S (w3[3], w4[0], offset); w6[3] = hc_bytealign_S (w3[2], w3[3], offset); w6[2] = hc_bytealign_S (w3[1], w3[2], offset); w6[1] = hc_bytealign_S (w3[0], w3[1], offset); w6[0] = hc_bytealign_S (w2[3], w3[0], offset); w5[3] = hc_bytealign_S (w2[2], w2[3], offset); w5[2] = hc_bytealign_S (w2[1], w2[2], offset); w5[1] = hc_bytealign_S (w2[0], w2[1], offset); w5[0] = hc_bytealign_S (w1[3], w2[0], offset); w4[3] = hc_bytealign_S (w1[2], w1[3], offset); w4[2] = hc_bytealign_S (w1[1], w1[2], offset); w4[1] = hc_bytealign_S (w1[0], w1[1], offset); w4[0] = hc_bytealign_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_bytealign_S (w4[1], w4[2], offset); w7[2] = hc_bytealign_S (w4[0], w4[1], offset); w7[1] = hc_bytealign_S (w3[3], w4[0], offset); w7[0] = hc_bytealign_S (w3[2], w3[3], offset); w6[3] = hc_bytealign_S (w3[1], w3[2], offset); w6[2] = hc_bytealign_S (w3[0], w3[1], offset); w6[1] = hc_bytealign_S (w2[3], w3[0], offset); w6[0] = hc_bytealign_S (w2[2], w2[3], offset); w5[3] = hc_bytealign_S (w2[1], w2[2], offset); w5[2] = hc_bytealign_S (w2[0], w2[1], offset); w5[1] = hc_bytealign_S (w1[3], w2[0], offset); w5[0] = hc_bytealign_S (w1[2], w1[3], offset); w4[3] = hc_bytealign_S (w1[1], w1[2], offset); w4[2] = hc_bytealign_S (w1[0], w1[1], offset); w4[1] = hc_bytealign_S (w0[3], w1[0], offset); w4[0] = hc_bytealign_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_bytealign_S (w4[0], w4[1], offset); w7[2] = hc_bytealign_S (w3[3], w4[0], offset); w7[1] = hc_bytealign_S (w3[2], w3[3], offset); w7[0] = hc_bytealign_S (w3[1], w3[2], offset); w6[3] = hc_bytealign_S (w3[0], w3[1], offset); w6[2] = hc_bytealign_S (w2[3], w3[0], offset); w6[1] = hc_bytealign_S (w2[2], w2[3], offset); w6[0] = hc_bytealign_S (w2[1], w2[2], offset); w5[3] = hc_bytealign_S (w2[0], w2[1], offset); w5[2] = hc_bytealign_S (w1[3], w2[0], offset); w5[1] = hc_bytealign_S (w1[2], w1[3], offset); w5[0] = hc_bytealign_S (w1[1], w1[2], offset); w4[3] = hc_bytealign_S (w1[0], w1[1], offset); w4[2] = hc_bytealign_S (w0[3], w1[0], offset); w4[1] = hc_bytealign_S (w0[2], w0[3], offset); w4[0] = hc_bytealign_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_bytealign_S (w3[3], w4[0], offset); w7[2] = hc_bytealign_S (w3[2], w3[3], offset); w7[1] = hc_bytealign_S (w3[1], w3[2], offset); w7[0] = hc_bytealign_S (w3[0], w3[1], offset); w6[3] = hc_bytealign_S (w2[3], w3[0], offset); w6[2] = hc_bytealign_S (w2[2], w2[3], offset); w6[1] = hc_bytealign_S (w2[1], w2[2], offset); w6[0] = hc_bytealign_S (w2[0], w2[1], offset); w5[3] = hc_bytealign_S (w1[3], w2[0], offset); w5[2] = hc_bytealign_S (w1[2], w1[3], offset); w5[1] = hc_bytealign_S (w1[1], w1[2], offset); w5[0] = hc_bytealign_S (w1[0], w1[1], offset); w4[3] = hc_bytealign_S (w0[3], w1[0], offset); w4[2] = hc_bytealign_S (w0[2], w0[3], offset); w4[1] = hc_bytealign_S (w0[1], w0[2], offset); w4[0] = hc_bytealign_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_bytealign_S (w3[2], w3[3], offset); w7[2] = hc_bytealign_S (w3[1], w3[2], offset); w7[1] = hc_bytealign_S (w3[0], w3[1], offset); w7[0] = hc_bytealign_S (w2[3], w3[0], offset); w6[3] = hc_bytealign_S (w2[2], w2[3], offset); w6[2] = hc_bytealign_S (w2[1], w2[2], offset); w6[1] = hc_bytealign_S (w2[0], w2[1], offset); w6[0] = hc_bytealign_S (w1[3], w2[0], offset); w5[3] = hc_bytealign_S (w1[2], w1[3], offset); w5[2] = hc_bytealign_S (w1[1], w1[2], offset); w5[1] = hc_bytealign_S (w1[0], w1[1], offset); w5[0] = hc_bytealign_S (w0[3], w1[0], offset); w4[3] = hc_bytealign_S (w0[2], w0[3], offset); w4[2] = hc_bytealign_S (w0[1], w0[2], offset); w4[1] = hc_bytealign_S (w0[0], w0[1], offset); w4[0] = hc_bytealign_S ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_bytealign_S (w3[1], w3[2], offset); w7[2] = hc_bytealign_S (w3[0], w3[1], offset); w7[1] = hc_bytealign_S (w2[3], w3[0], offset); w7[0] = hc_bytealign_S (w2[2], w2[3], offset); w6[3] = hc_bytealign_S (w2[1], w2[2], offset); w6[2] = hc_bytealign_S (w2[0], w2[1], offset); w6[1] = hc_bytealign_S (w1[3], w2[0], offset); w6[0] = hc_bytealign_S (w1[2], w1[3], offset); w5[3] = hc_bytealign_S (w1[1], w1[2], offset); w5[2] = hc_bytealign_S (w1[0], w1[1], offset); w5[1] = hc_bytealign_S (w0[3], w1[0], offset); w5[0] = hc_bytealign_S (w0[2], w0[3], offset); w4[3] = hc_bytealign_S (w0[1], w0[2], offset); w4[2] = hc_bytealign_S (w0[0], w0[1], offset); w4[1] = hc_bytealign_S ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_bytealign_S (w3[0], w3[1], offset); w7[2] = hc_bytealign_S (w2[3], w3[0], offset); w7[1] = hc_bytealign_S (w2[2], w2[3], offset); w7[0] = hc_bytealign_S (w2[1], w2[2], offset); w6[3] = hc_bytealign_S (w2[0], w2[1], offset); w6[2] = hc_bytealign_S (w1[3], w2[0], offset); w6[1] = hc_bytealign_S (w1[2], w1[3], offset); w6[0] = hc_bytealign_S (w1[1], w1[2], offset); w5[3] = hc_bytealign_S (w1[0], w1[1], offset); w5[2] = hc_bytealign_S (w0[3], w1[0], offset); w5[1] = hc_bytealign_S (w0[2], w0[3], offset); w5[0] = hc_bytealign_S (w0[1], w0[2], offset); w4[3] = hc_bytealign_S (w0[0], w0[1], offset); w4[2] = hc_bytealign_S ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_bytealign_S (w2[3], w3[0], offset); w7[2] = hc_bytealign_S (w2[2], w2[3], offset); w7[1] = hc_bytealign_S (w2[1], w2[2], offset); w7[0] = hc_bytealign_S (w2[0], w2[1], offset); w6[3] = hc_bytealign_S (w1[3], w2[0], offset); w6[2] = hc_bytealign_S (w1[2], w1[3], offset); w6[1] = hc_bytealign_S (w1[1], w1[2], offset); w6[0] = hc_bytealign_S (w1[0], w1[1], offset); w5[3] = hc_bytealign_S (w0[3], w1[0], offset); w5[2] = hc_bytealign_S (w0[2], w0[3], offset); w5[1] = hc_bytealign_S (w0[1], w0[2], offset); w5[0] = hc_bytealign_S (w0[0], w0[1], offset); w4[3] = hc_bytealign_S ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_bytealign_S (w2[2], w2[3], offset); w7[2] = hc_bytealign_S (w2[1], w2[2], offset); w7[1] = hc_bytealign_S (w2[0], w2[1], offset); w7[0] = hc_bytealign_S (w1[3], w2[0], offset); w6[3] = hc_bytealign_S (w1[2], w1[3], offset); w6[2] = hc_bytealign_S (w1[1], w1[2], offset); w6[1] = hc_bytealign_S (w1[0], w1[1], offset); w6[0] = hc_bytealign_S (w0[3], w1[0], offset); w5[3] = hc_bytealign_S (w0[2], w0[3], offset); w5[2] = hc_bytealign_S (w0[1], w0[2], offset); w5[1] = hc_bytealign_S (w0[0], w0[1], offset); w5[0] = hc_bytealign_S ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_bytealign_S (w2[1], w2[2], offset); w7[2] = hc_bytealign_S (w2[0], w2[1], offset); w7[1] = hc_bytealign_S (w1[3], w2[0], offset); w7[0] = hc_bytealign_S (w1[2], w1[3], offset); w6[3] = hc_bytealign_S (w1[1], w1[2], offset); w6[2] = hc_bytealign_S (w1[0], w1[1], offset); w6[1] = hc_bytealign_S (w0[3], w1[0], offset); w6[0] = hc_bytealign_S (w0[2], w0[3], offset); w5[3] = hc_bytealign_S (w0[1], w0[2], offset); w5[2] = hc_bytealign_S (w0[0], w0[1], offset); w5[1] = hc_bytealign_S ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_bytealign_S (w2[0], w2[1], offset); w7[2] = hc_bytealign_S (w1[3], w2[0], offset); w7[1] = hc_bytealign_S (w1[2], w1[3], offset); w7[0] = hc_bytealign_S (w1[1], w1[2], offset); w6[3] = hc_bytealign_S (w1[0], w1[1], offset); w6[2] = hc_bytealign_S (w0[3], w1[0], offset); w6[1] = hc_bytealign_S (w0[2], w0[3], offset); w6[0] = hc_bytealign_S (w0[1], w0[2], offset); w5[3] = hc_bytealign_S (w0[0], w0[1], offset); w5[2] = hc_bytealign_S ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_bytealign_S (w1[3], w2[0], offset); w7[2] = hc_bytealign_S (w1[2], w1[3], offset); w7[1] = hc_bytealign_S (w1[1], w1[2], offset); w7[0] = hc_bytealign_S (w1[0], w1[1], offset); w6[3] = hc_bytealign_S (w0[3], w1[0], offset); w6[2] = hc_bytealign_S (w0[2], w0[3], offset); w6[1] = hc_bytealign_S (w0[1], w0[2], offset); w6[0] = hc_bytealign_S (w0[0], w0[1], offset); w5[3] = hc_bytealign_S ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_bytealign_S (w1[2], w1[3], offset); w7[2] = hc_bytealign_S (w1[1], w1[2], offset); w7[1] = hc_bytealign_S (w1[0], w1[1], offset); w7[0] = hc_bytealign_S (w0[3], w1[0], offset); w6[3] = hc_bytealign_S (w0[2], w0[3], offset); w6[2] = hc_bytealign_S (w0[1], w0[2], offset); w6[1] = hc_bytealign_S (w0[0], w0[1], offset); w6[0] = hc_bytealign_S ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_bytealign_S (w1[1], w1[2], offset); w7[2] = hc_bytealign_S (w1[0], w1[1], offset); w7[1] = hc_bytealign_S (w0[3], w1[0], offset); w7[0] = hc_bytealign_S (w0[2], w0[3], offset); w6[3] = hc_bytealign_S (w0[1], w0[2], offset); w6[2] = hc_bytealign_S (w0[0], w0[1], offset); w6[1] = hc_bytealign_S ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_bytealign_S (w1[0], w1[1], offset); w7[2] = hc_bytealign_S (w0[3], w1[0], offset); w7[1] = hc_bytealign_S (w0[2], w0[3], offset); w7[0] = hc_bytealign_S (w0[1], w0[2], offset); w6[3] = hc_bytealign_S (w0[0], w0[1], offset); w6[2] = hc_bytealign_S ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_bytealign_S (w0[3], w1[0], offset); w7[2] = hc_bytealign_S (w0[2], w0[3], offset); w7[1] = hc_bytealign_S (w0[1], w0[2], offset); w7[0] = hc_bytealign_S (w0[0], w0[1], offset); w6[3] = hc_bytealign_S ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_bytealign_S (w0[2], w0[3], offset); w7[2] = hc_bytealign_S (w0[1], w0[2], offset); w7[1] = hc_bytealign_S (w0[0], w0[1], offset); w7[0] = hc_bytealign_S ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_bytealign_S (w0[1], w0[2], offset); w7[2] = hc_bytealign_S (w0[0], w0[1], offset); w7[1] = hc_bytealign_S ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_bytealign_S (w0[0], w0[1], offset); w7[2] = hc_bytealign_S ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_bytealign_S ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); w0[0] = hc_byte_perm_S ( 0, w0[0], selector); break; case 1: w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); w0[1] = hc_byte_perm_S ( 0, w0[0], selector); w0[0] = 0; break; case 2: w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); w0[2] = hc_byte_perm_S ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); w0[3] = hc_byte_perm_S ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); w1[0] = hc_byte_perm_S ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); w1[1] = hc_byte_perm_S ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); w1[2] = hc_byte_perm_S ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); w1[3] = hc_byte_perm_S ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); w2[0] = hc_byte_perm_S ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); w2[1] = hc_byte_perm_S ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); w2[2] = hc_byte_perm_S ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); w2[3] = hc_byte_perm_S ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); w3[0] = hc_byte_perm_S ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); w3[1] = hc_byte_perm_S ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); w3[2] = hc_byte_perm_S ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); w3[3] = hc_byte_perm_S ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_S (w7[3], 0, offset); w7[3] = hc_bytealign_S (w7[2], w7[3], offset); w7[2] = hc_bytealign_S (w7[1], w7[2], offset); w7[1] = hc_bytealign_S (w7[0], w7[1], offset); w7[0] = hc_bytealign_S (w6[3], w7[0], offset); w6[3] = hc_bytealign_S (w6[2], w6[3], offset); w6[2] = hc_bytealign_S (w6[1], w6[2], offset); w6[1] = hc_bytealign_S (w6[0], w6[1], offset); w6[0] = hc_bytealign_S (w5[3], w6[0], offset); w5[3] = hc_bytealign_S (w5[2], w5[3], offset); w5[2] = hc_bytealign_S (w5[1], w5[2], offset); w5[1] = hc_bytealign_S (w5[0], w5[1], offset); w5[0] = hc_bytealign_S (w4[3], w5[0], offset); w4[3] = hc_bytealign_S (w4[2], w4[3], offset); w4[2] = hc_bytealign_S (w4[1], w4[2], offset); w4[1] = hc_bytealign_S (w4[0], w4[1], offset); w4[0] = hc_bytealign_S (w3[3], w4[0], offset); w3[3] = hc_bytealign_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_S ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_S (w7[3], 0, offset); c0[0] = hc_bytealign_S (w7[2], w7[3], offset); w7[3] = hc_bytealign_S (w7[1], w7[2], offset); w7[2] = hc_bytealign_S (w7[0], w7[1], offset); w7[1] = hc_bytealign_S (w6[3], w7[0], offset); w7[0] = hc_bytealign_S (w6[2], w6[3], offset); w6[3] = hc_bytealign_S (w6[1], w6[2], offset); w6[2] = hc_bytealign_S (w6[0], w6[1], offset); w6[1] = hc_bytealign_S (w5[3], w6[0], offset); w6[0] = hc_bytealign_S (w5[2], w5[3], offset); w5[3] = hc_bytealign_S (w5[1], w5[2], offset); w5[2] = hc_bytealign_S (w5[0], w5[1], offset); w5[1] = hc_bytealign_S (w4[3], w5[0], offset); w5[0] = hc_bytealign_S (w4[2], w4[3], offset); w4[3] = hc_bytealign_S (w4[1], w4[2], offset); w4[2] = hc_bytealign_S (w4[0], w4[1], offset); w4[1] = hc_bytealign_S (w3[3], w4[0], offset); w4[0] = hc_bytealign_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_S (w7[3], 0, offset); c0[1] = hc_bytealign_S (w7[2], w7[3], offset); c0[0] = hc_bytealign_S (w7[1], w7[2], offset); w7[3] = hc_bytealign_S (w7[0], w7[1], offset); w7[2] = hc_bytealign_S (w6[3], w7[0], offset); w7[1] = hc_bytealign_S (w6[2], w6[3], offset); w7[0] = hc_bytealign_S (w6[1], w6[2], offset); w6[3] = hc_bytealign_S (w6[0], w6[1], offset); w6[2] = hc_bytealign_S (w5[3], w6[0], offset); w6[1] = hc_bytealign_S (w5[2], w5[3], offset); w6[0] = hc_bytealign_S (w5[1], w5[2], offset); w5[3] = hc_bytealign_S (w5[0], w5[1], offset); w5[2] = hc_bytealign_S (w4[3], w5[0], offset); w5[1] = hc_bytealign_S (w4[2], w4[3], offset); w5[0] = hc_bytealign_S (w4[1], w4[2], offset); w4[3] = hc_bytealign_S (w4[0], w4[1], offset); w4[2] = hc_bytealign_S (w3[3], w4[0], offset); w4[1] = hc_bytealign_S (w3[2], w3[3], offset); w4[0] = hc_bytealign_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_S (w7[3], 0, offset); c0[2] = hc_bytealign_S (w7[2], w7[3], offset); c0[1] = hc_bytealign_S (w7[1], w7[2], offset); c0[0] = hc_bytealign_S (w7[0], w7[1], offset); w7[3] = hc_bytealign_S (w6[3], w7[0], offset); w7[2] = hc_bytealign_S (w6[2], w6[3], offset); w7[1] = hc_bytealign_S (w6[1], w6[2], offset); w7[0] = hc_bytealign_S (w6[0], w6[1], offset); w6[3] = hc_bytealign_S (w5[3], w6[0], offset); w6[2] = hc_bytealign_S (w5[2], w5[3], offset); w6[1] = hc_bytealign_S (w5[1], w5[2], offset); w6[0] = hc_bytealign_S (w5[0], w5[1], offset); w5[3] = hc_bytealign_S (w4[3], w5[0], offset); w5[2] = hc_bytealign_S (w4[2], w4[3], offset); w5[1] = hc_bytealign_S (w4[1], w4[2], offset); w5[0] = hc_bytealign_S (w4[0], w4[1], offset); w4[3] = hc_bytealign_S (w3[3], w4[0], offset); w4[2] = hc_bytealign_S (w3[2], w3[3], offset); w4[1] = hc_bytealign_S (w3[1], w3[2], offset); w4[0] = hc_bytealign_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_S (w7[3], 0, offset); c0[3] = hc_bytealign_S (w7[2], w7[3], offset); c0[2] = hc_bytealign_S (w7[1], w7[2], offset); c0[1] = hc_bytealign_S (w7[0], w7[1], offset); c0[0] = hc_bytealign_S (w6[3], w7[0], offset); w7[3] = hc_bytealign_S (w6[2], w6[3], offset); w7[2] = hc_bytealign_S (w6[1], w6[2], offset); w7[1] = hc_bytealign_S (w6[0], w6[1], offset); w7[0] = hc_bytealign_S (w5[3], w6[0], offset); w6[3] = hc_bytealign_S (w5[2], w5[3], offset); w6[2] = hc_bytealign_S (w5[1], w5[2], offset); w6[1] = hc_bytealign_S (w5[0], w5[1], offset); w6[0] = hc_bytealign_S (w4[3], w5[0], offset); w5[3] = hc_bytealign_S (w4[2], w4[3], offset); w5[2] = hc_bytealign_S (w4[1], w4[2], offset); w5[1] = hc_bytealign_S (w4[0], w4[1], offset); w5[0] = hc_bytealign_S (w3[3], w4[0], offset); w4[3] = hc_bytealign_S (w3[2], w3[3], offset); w4[2] = hc_bytealign_S (w3[1], w3[2], offset); w4[1] = hc_bytealign_S (w3[0], w3[1], offset); w4[0] = hc_bytealign_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_S (w7[3], 0, offset); c1[0] = hc_bytealign_S (w7[2], w7[3], offset); c0[3] = hc_bytealign_S (w7[1], w7[2], offset); c0[2] = hc_bytealign_S (w7[0], w7[1], offset); c0[1] = hc_bytealign_S (w6[3], w7[0], offset); c0[0] = hc_bytealign_S (w6[2], w6[3], offset); w7[3] = hc_bytealign_S (w6[1], w6[2], offset); w7[2] = hc_bytealign_S (w6[0], w6[1], offset); w7[1] = hc_bytealign_S (w5[3], w6[0], offset); w7[0] = hc_bytealign_S (w5[2], w5[3], offset); w6[3] = hc_bytealign_S (w5[1], w5[2], offset); w6[2] = hc_bytealign_S (w5[0], w5[1], offset); w6[1] = hc_bytealign_S (w4[3], w5[0], offset); w6[0] = hc_bytealign_S (w4[2], w4[3], offset); w5[3] = hc_bytealign_S (w4[1], w4[2], offset); w5[2] = hc_bytealign_S (w4[0], w4[1], offset); w5[1] = hc_bytealign_S (w3[3], w4[0], offset); w5[0] = hc_bytealign_S (w3[2], w3[3], offset); w4[3] = hc_bytealign_S (w3[1], w3[2], offset); w4[2] = hc_bytealign_S (w3[0], w3[1], offset); w4[1] = hc_bytealign_S (w2[3], w3[0], offset); w4[0] = hc_bytealign_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_S (w7[3], 0, offset); c1[1] = hc_bytealign_S (w7[2], w7[3], offset); c1[0] = hc_bytealign_S (w7[1], w7[2], offset); c0[3] = hc_bytealign_S (w7[0], w7[1], offset); c0[2] = hc_bytealign_S (w6[3], w7[0], offset); c0[1] = hc_bytealign_S (w6[2], w6[3], offset); c0[0] = hc_bytealign_S (w6[1], w6[2], offset); w7[3] = hc_bytealign_S (w6[0], w6[1], offset); w7[2] = hc_bytealign_S (w5[3], w6[0], offset); w7[1] = hc_bytealign_S (w5[2], w5[3], offset); w7[0] = hc_bytealign_S (w5[1], w5[2], offset); w6[3] = hc_bytealign_S (w5[0], w5[1], offset); w6[2] = hc_bytealign_S (w4[3], w5[0], offset); w6[1] = hc_bytealign_S (w4[2], w4[3], offset); w6[0] = hc_bytealign_S (w4[1], w4[2], offset); w5[3] = hc_bytealign_S (w4[0], w4[1], offset); w5[2] = hc_bytealign_S (w3[3], w4[0], offset); w5[1] = hc_bytealign_S (w3[2], w3[3], offset); w5[0] = hc_bytealign_S (w3[1], w3[2], offset); w4[3] = hc_bytealign_S (w3[0], w3[1], offset); w4[2] = hc_bytealign_S (w2[3], w3[0], offset); w4[1] = hc_bytealign_S (w2[2], w2[3], offset); w4[0] = hc_bytealign_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_S (w7[3], 0, offset); c1[2] = hc_bytealign_S (w7[2], w7[3], offset); c1[1] = hc_bytealign_S (w7[1], w7[2], offset); c1[0] = hc_bytealign_S (w7[0], w7[1], offset); c0[3] = hc_bytealign_S (w6[3], w7[0], offset); c0[2] = hc_bytealign_S (w6[2], w6[3], offset); c0[1] = hc_bytealign_S (w6[1], w6[2], offset); c0[0] = hc_bytealign_S (w6[0], w6[1], offset); w7[3] = hc_bytealign_S (w5[3], w6[0], offset); w7[2] = hc_bytealign_S (w5[2], w5[3], offset); w7[1] = hc_bytealign_S (w5[1], w5[2], offset); w7[0] = hc_bytealign_S (w5[0], w5[1], offset); w6[3] = hc_bytealign_S (w4[3], w5[0], offset); w6[2] = hc_bytealign_S (w4[2], w4[3], offset); w6[1] = hc_bytealign_S (w4[1], w4[2], offset); w6[0] = hc_bytealign_S (w4[0], w4[1], offset); w5[3] = hc_bytealign_S (w3[3], w4[0], offset); w5[2] = hc_bytealign_S (w3[2], w3[3], offset); w5[1] = hc_bytealign_S (w3[1], w3[2], offset); w5[0] = hc_bytealign_S (w3[0], w3[1], offset); w4[3] = hc_bytealign_S (w2[3], w3[0], offset); w4[2] = hc_bytealign_S (w2[2], w2[3], offset); w4[1] = hc_bytealign_S (w2[1], w2[2], offset); w4[0] = hc_bytealign_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_S (w7[3], 0, offset); c1[3] = hc_bytealign_S (w7[2], w7[3], offset); c1[2] = hc_bytealign_S (w7[1], w7[2], offset); c1[1] = hc_bytealign_S (w7[0], w7[1], offset); c1[0] = hc_bytealign_S (w6[3], w7[0], offset); c0[3] = hc_bytealign_S (w6[2], w6[3], offset); c0[2] = hc_bytealign_S (w6[1], w6[2], offset); c0[1] = hc_bytealign_S (w6[0], w6[1], offset); c0[0] = hc_bytealign_S (w5[3], w6[0], offset); w7[3] = hc_bytealign_S (w5[2], w5[3], offset); w7[2] = hc_bytealign_S (w5[1], w5[2], offset); w7[1] = hc_bytealign_S (w5[0], w5[1], offset); w7[0] = hc_bytealign_S (w4[3], w5[0], offset); w6[3] = hc_bytealign_S (w4[2], w4[3], offset); w6[2] = hc_bytealign_S (w4[1], w4[2], offset); w6[1] = hc_bytealign_S (w4[0], w4[1], offset); w6[0] = hc_bytealign_S (w3[3], w4[0], offset); w5[3] = hc_bytealign_S (w3[2], w3[3], offset); w5[2] = hc_bytealign_S (w3[1], w3[2], offset); w5[1] = hc_bytealign_S (w3[0], w3[1], offset); w5[0] = hc_bytealign_S (w2[3], w3[0], offset); w4[3] = hc_bytealign_S (w2[2], w2[3], offset); w4[2] = hc_bytealign_S (w2[1], w2[2], offset); w4[1] = hc_bytealign_S (w2[0], w2[1], offset); w4[0] = hc_bytealign_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_S (w7[3], 0, offset); c2[0] = hc_bytealign_S (w7[2], w7[3], offset); c1[3] = hc_bytealign_S (w7[1], w7[2], offset); c1[2] = hc_bytealign_S (w7[0], w7[1], offset); c1[1] = hc_bytealign_S (w6[3], w7[0], offset); c1[0] = hc_bytealign_S (w6[2], w6[3], offset); c0[3] = hc_bytealign_S (w6[1], w6[2], offset); c0[2] = hc_bytealign_S (w6[0], w6[1], offset); c0[1] = hc_bytealign_S (w5[3], w6[0], offset); c0[0] = hc_bytealign_S (w5[2], w5[3], offset); w7[3] = hc_bytealign_S (w5[1], w5[2], offset); w7[2] = hc_bytealign_S (w5[0], w5[1], offset); w7[1] = hc_bytealign_S (w4[3], w5[0], offset); w7[0] = hc_bytealign_S (w4[2], w4[3], offset); w6[3] = hc_bytealign_S (w4[1], w4[2], offset); w6[2] = hc_bytealign_S (w4[0], w4[1], offset); w6[1] = hc_bytealign_S (w3[3], w4[0], offset); w6[0] = hc_bytealign_S (w3[2], w3[3], offset); w5[3] = hc_bytealign_S (w3[1], w3[2], offset); w5[2] = hc_bytealign_S (w3[0], w3[1], offset); w5[1] = hc_bytealign_S (w2[3], w3[0], offset); w5[0] = hc_bytealign_S (w2[2], w2[3], offset); w4[3] = hc_bytealign_S (w2[1], w2[2], offset); w4[2] = hc_bytealign_S (w2[0], w2[1], offset); w4[1] = hc_bytealign_S (w1[3], w2[0], offset); w4[0] = hc_bytealign_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_S (w7[3], 0, offset); c2[1] = hc_bytealign_S (w7[2], w7[3], offset); c2[0] = hc_bytealign_S (w7[1], w7[2], offset); c1[3] = hc_bytealign_S (w7[0], w7[1], offset); c1[2] = hc_bytealign_S (w6[3], w7[0], offset); c1[1] = hc_bytealign_S (w6[2], w6[3], offset); c1[0] = hc_bytealign_S (w6[1], w6[2], offset); c0[3] = hc_bytealign_S (w6[0], w6[1], offset); c0[2] = hc_bytealign_S (w5[3], w6[0], offset); c0[1] = hc_bytealign_S (w5[2], w5[3], offset); c0[0] = hc_bytealign_S (w5[1], w5[2], offset); w7[3] = hc_bytealign_S (w5[0], w5[1], offset); w7[2] = hc_bytealign_S (w4[3], w5[0], offset); w7[1] = hc_bytealign_S (w4[2], w4[3], offset); w7[0] = hc_bytealign_S (w4[1], w4[2], offset); w6[3] = hc_bytealign_S (w4[0], w4[1], offset); w6[2] = hc_bytealign_S (w3[3], w4[0], offset); w6[1] = hc_bytealign_S (w3[2], w3[3], offset); w6[0] = hc_bytealign_S (w3[1], w3[2], offset); w5[3] = hc_bytealign_S (w3[0], w3[1], offset); w5[2] = hc_bytealign_S (w2[3], w3[0], offset); w5[1] = hc_bytealign_S (w2[2], w2[3], offset); w5[0] = hc_bytealign_S (w2[1], w2[2], offset); w4[3] = hc_bytealign_S (w2[0], w2[1], offset); w4[2] = hc_bytealign_S (w1[3], w2[0], offset); w4[1] = hc_bytealign_S (w1[2], w1[3], offset); w4[0] = hc_bytealign_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_S (w7[3], 0, offset); c2[2] = hc_bytealign_S (w7[2], w7[3], offset); c2[1] = hc_bytealign_S (w7[1], w7[2], offset); c2[0] = hc_bytealign_S (w7[0], w7[1], offset); c1[3] = hc_bytealign_S (w6[3], w7[0], offset); c1[2] = hc_bytealign_S (w6[2], w6[3], offset); c1[1] = hc_bytealign_S (w6[1], w6[2], offset); c1[0] = hc_bytealign_S (w6[0], w6[1], offset); c0[3] = hc_bytealign_S (w5[3], w6[0], offset); c0[2] = hc_bytealign_S (w5[2], w5[3], offset); c0[1] = hc_bytealign_S (w5[1], w5[2], offset); c0[0] = hc_bytealign_S (w5[0], w5[1], offset); w7[3] = hc_bytealign_S (w4[3], w5[0], offset); w7[2] = hc_bytealign_S (w4[2], w4[3], offset); w7[1] = hc_bytealign_S (w4[1], w4[2], offset); w7[0] = hc_bytealign_S (w4[0], w4[1], offset); w6[3] = hc_bytealign_S (w3[3], w4[0], offset); w6[2] = hc_bytealign_S (w3[2], w3[3], offset); w6[1] = hc_bytealign_S (w3[1], w3[2], offset); w6[0] = hc_bytealign_S (w3[0], w3[1], offset); w5[3] = hc_bytealign_S (w2[3], w3[0], offset); w5[2] = hc_bytealign_S (w2[2], w2[3], offset); w5[1] = hc_bytealign_S (w2[1], w2[2], offset); w5[0] = hc_bytealign_S (w2[0], w2[1], offset); w4[3] = hc_bytealign_S (w1[3], w2[0], offset); w4[2] = hc_bytealign_S (w1[2], w1[3], offset); w4[1] = hc_bytealign_S (w1[1], w1[2], offset); w4[0] = hc_bytealign_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_S (w7[3], 0, offset); c2[3] = hc_bytealign_S (w7[2], w7[3], offset); c2[2] = hc_bytealign_S (w7[1], w7[2], offset); c2[1] = hc_bytealign_S (w7[0], w7[1], offset); c2[0] = hc_bytealign_S (w6[3], w7[0], offset); c1[3] = hc_bytealign_S (w6[2], w6[3], offset); c1[2] = hc_bytealign_S (w6[1], w6[2], offset); c1[1] = hc_bytealign_S (w6[0], w6[1], offset); c1[0] = hc_bytealign_S (w5[3], w6[0], offset); c0[3] = hc_bytealign_S (w5[2], w5[3], offset); c0[2] = hc_bytealign_S (w5[1], w5[2], offset); c0[1] = hc_bytealign_S (w5[0], w5[1], offset); c0[0] = hc_bytealign_S (w4[3], w5[0], offset); w7[3] = hc_bytealign_S (w4[2], w4[3], offset); w7[2] = hc_bytealign_S (w4[1], w4[2], offset); w7[1] = hc_bytealign_S (w4[0], w4[1], offset); w7[0] = hc_bytealign_S (w3[3], w4[0], offset); w6[3] = hc_bytealign_S (w3[2], w3[3], offset); w6[2] = hc_bytealign_S (w3[1], w3[2], offset); w6[1] = hc_bytealign_S (w3[0], w3[1], offset); w6[0] = hc_bytealign_S (w2[3], w3[0], offset); w5[3] = hc_bytealign_S (w2[2], w2[3], offset); w5[2] = hc_bytealign_S (w2[1], w2[2], offset); w5[1] = hc_bytealign_S (w2[0], w2[1], offset); w5[0] = hc_bytealign_S (w1[3], w2[0], offset); w4[3] = hc_bytealign_S (w1[2], w1[3], offset); w4[2] = hc_bytealign_S (w1[1], w1[2], offset); w4[1] = hc_bytealign_S (w1[0], w1[1], offset); w4[0] = hc_bytealign_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_S (w7[3], 0, offset); c3[0] = hc_bytealign_S (w7[2], w7[3], offset); c2[3] = hc_bytealign_S (w7[1], w7[2], offset); c2[2] = hc_bytealign_S (w7[0], w7[1], offset); c2[1] = hc_bytealign_S (w6[3], w7[0], offset); c2[0] = hc_bytealign_S (w6[2], w6[3], offset); c1[3] = hc_bytealign_S (w6[1], w6[2], offset); c1[2] = hc_bytealign_S (w6[0], w6[1], offset); c1[1] = hc_bytealign_S (w5[3], w6[0], offset); c1[0] = hc_bytealign_S (w5[2], w5[3], offset); c0[3] = hc_bytealign_S (w5[1], w5[2], offset); c0[2] = hc_bytealign_S (w5[0], w5[1], offset); c0[1] = hc_bytealign_S (w4[3], w5[0], offset); c0[0] = hc_bytealign_S (w4[2], w4[3], offset); w7[3] = hc_bytealign_S (w4[1], w4[2], offset); w7[2] = hc_bytealign_S (w4[0], w4[1], offset); w7[1] = hc_bytealign_S (w3[3], w4[0], offset); w7[0] = hc_bytealign_S (w3[2], w3[3], offset); w6[3] = hc_bytealign_S (w3[1], w3[2], offset); w6[2] = hc_bytealign_S (w3[0], w3[1], offset); w6[1] = hc_bytealign_S (w2[3], w3[0], offset); w6[0] = hc_bytealign_S (w2[2], w2[3], offset); w5[3] = hc_bytealign_S (w2[1], w2[2], offset); w5[2] = hc_bytealign_S (w2[0], w2[1], offset); w5[1] = hc_bytealign_S (w1[3], w2[0], offset); w5[0] = hc_bytealign_S (w1[2], w1[3], offset); w4[3] = hc_bytealign_S (w1[1], w1[2], offset); w4[2] = hc_bytealign_S (w1[0], w1[1], offset); w4[1] = hc_bytealign_S (w0[3], w1[0], offset); w4[0] = hc_bytealign_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_S (w7[3], 0, offset); c3[1] = hc_bytealign_S (w7[2], w7[3], offset); c3[0] = hc_bytealign_S (w7[1], w7[2], offset); c2[3] = hc_bytealign_S (w7[0], w7[1], offset); c2[2] = hc_bytealign_S (w6[3], w7[0], offset); c2[1] = hc_bytealign_S (w6[2], w6[3], offset); c2[0] = hc_bytealign_S (w6[1], w6[2], offset); c1[3] = hc_bytealign_S (w6[0], w6[1], offset); c1[2] = hc_bytealign_S (w5[3], w6[0], offset); c1[1] = hc_bytealign_S (w5[2], w5[3], offset); c1[0] = hc_bytealign_S (w5[1], w5[2], offset); c0[3] = hc_bytealign_S (w5[0], w5[1], offset); c0[2] = hc_bytealign_S (w4[3], w5[0], offset); c0[1] = hc_bytealign_S (w4[2], w4[3], offset); c0[0] = hc_bytealign_S (w4[1], w4[2], offset); w7[3] = hc_bytealign_S (w4[0], w4[1], offset); w7[2] = hc_bytealign_S (w3[3], w4[0], offset); w7[1] = hc_bytealign_S (w3[2], w3[3], offset); w7[0] = hc_bytealign_S (w3[1], w3[2], offset); w6[3] = hc_bytealign_S (w3[0], w3[1], offset); w6[2] = hc_bytealign_S (w2[3], w3[0], offset); w6[1] = hc_bytealign_S (w2[2], w2[3], offset); w6[0] = hc_bytealign_S (w2[1], w2[2], offset); w5[3] = hc_bytealign_S (w2[0], w2[1], offset); w5[2] = hc_bytealign_S (w1[3], w2[0], offset); w5[1] = hc_bytealign_S (w1[2], w1[3], offset); w5[0] = hc_bytealign_S (w1[1], w1[2], offset); w4[3] = hc_bytealign_S (w1[0], w1[1], offset); w4[2] = hc_bytealign_S (w0[3], w1[0], offset); w4[1] = hc_bytealign_S (w0[2], w0[3], offset); w4[0] = hc_bytealign_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_S (w7[3], 0, offset); c3[2] = hc_bytealign_S (w7[2], w7[3], offset); c3[1] = hc_bytealign_S (w7[1], w7[2], offset); c3[0] = hc_bytealign_S (w7[0], w7[1], offset); c2[3] = hc_bytealign_S (w6[3], w7[0], offset); c2[2] = hc_bytealign_S (w6[2], w6[3], offset); c2[1] = hc_bytealign_S (w6[1], w6[2], offset); c2[0] = hc_bytealign_S (w6[0], w6[1], offset); c1[3] = hc_bytealign_S (w5[3], w6[0], offset); c1[2] = hc_bytealign_S (w5[2], w5[3], offset); c1[1] = hc_bytealign_S (w5[1], w5[2], offset); c1[0] = hc_bytealign_S (w5[0], w5[1], offset); c0[3] = hc_bytealign_S (w4[3], w5[0], offset); c0[2] = hc_bytealign_S (w4[2], w4[3], offset); c0[1] = hc_bytealign_S (w4[1], w4[2], offset); c0[0] = hc_bytealign_S (w4[0], w4[1], offset); w7[3] = hc_bytealign_S (w3[3], w4[0], offset); w7[2] = hc_bytealign_S (w3[2], w3[3], offset); w7[1] = hc_bytealign_S (w3[1], w3[2], offset); w7[0] = hc_bytealign_S (w3[0], w3[1], offset); w6[3] = hc_bytealign_S (w2[3], w3[0], offset); w6[2] = hc_bytealign_S (w2[2], w2[3], offset); w6[1] = hc_bytealign_S (w2[1], w2[2], offset); w6[0] = hc_bytealign_S (w2[0], w2[1], offset); w5[3] = hc_bytealign_S (w1[3], w2[0], offset); w5[2] = hc_bytealign_S (w1[2], w1[3], offset); w5[1] = hc_bytealign_S (w1[1], w1[2], offset); w5[0] = hc_bytealign_S (w1[0], w1[1], offset); w4[3] = hc_bytealign_S (w0[3], w1[0], offset); w4[2] = hc_bytealign_S (w0[2], w0[3], offset); w4[1] = hc_bytealign_S (w0[1], w0[2], offset); w4[0] = hc_bytealign_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_bytealign_S (w7[3], 0, offset); c3[3] = hc_bytealign_S (w7[2], w7[3], offset); c3[2] = hc_bytealign_S (w7[1], w7[2], offset); c3[1] = hc_bytealign_S (w7[0], w7[1], offset); c3[0] = hc_bytealign_S (w6[3], w7[0], offset); c2[3] = hc_bytealign_S (w6[2], w6[3], offset); c2[2] = hc_bytealign_S (w6[1], w6[2], offset); c2[1] = hc_bytealign_S (w6[0], w6[1], offset); c2[0] = hc_bytealign_S (w5[3], w6[0], offset); c1[3] = hc_bytealign_S (w5[2], w5[3], offset); c1[2] = hc_bytealign_S (w5[1], w5[2], offset); c1[1] = hc_bytealign_S (w5[0], w5[1], offset); c1[0] = hc_bytealign_S (w4[3], w5[0], offset); c0[3] = hc_bytealign_S (w4[2], w4[3], offset); c0[2] = hc_bytealign_S (w4[1], w4[2], offset); c0[1] = hc_bytealign_S (w4[0], w4[1], offset); c0[0] = hc_bytealign_S (w3[3], w4[0], offset); w7[3] = hc_bytealign_S (w3[2], w3[3], offset); w7[2] = hc_bytealign_S (w3[1], w3[2], offset); w7[1] = hc_bytealign_S (w3[0], w3[1], offset); w7[0] = hc_bytealign_S (w2[3], w3[0], offset); w6[3] = hc_bytealign_S (w2[2], w2[3], offset); w6[2] = hc_bytealign_S (w2[1], w2[2], offset); w6[1] = hc_bytealign_S (w2[0], w2[1], offset); w6[0] = hc_bytealign_S (w1[3], w2[0], offset); w5[3] = hc_bytealign_S (w1[2], w1[3], offset); w5[2] = hc_bytealign_S (w1[1], w1[2], offset); w5[1] = hc_bytealign_S (w1[0], w1[1], offset); w5[0] = hc_bytealign_S (w0[3], w1[0], offset); w4[3] = hc_bytealign_S (w0[2], w0[3], offset); w4[2] = hc_bytealign_S (w0[1], w0[2], offset); w4[1] = hc_bytealign_S (w0[0], w0[1], offset); w4[0] = hc_bytealign_S ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_bytealign_S (w7[3], 0, offset); c4[0] = hc_bytealign_S (w7[2], w7[3], offset); c3[3] = hc_bytealign_S (w7[1], w7[2], offset); c3[2] = hc_bytealign_S (w7[0], w7[1], offset); c3[1] = hc_bytealign_S (w6[3], w7[0], offset); c3[0] = hc_bytealign_S (w6[2], w6[3], offset); c2[3] = hc_bytealign_S (w6[1], w6[2], offset); c2[2] = hc_bytealign_S (w6[0], w6[1], offset); c2[1] = hc_bytealign_S (w5[3], w6[0], offset); c2[0] = hc_bytealign_S (w5[2], w5[3], offset); c1[3] = hc_bytealign_S (w5[1], w5[2], offset); c1[2] = hc_bytealign_S (w5[0], w5[1], offset); c1[1] = hc_bytealign_S (w4[3], w5[0], offset); c1[0] = hc_bytealign_S (w4[2], w4[3], offset); c0[3] = hc_bytealign_S (w4[1], w4[2], offset); c0[2] = hc_bytealign_S (w4[0], w4[1], offset); c0[1] = hc_bytealign_S (w3[3], w4[0], offset); c0[0] = hc_bytealign_S (w3[2], w3[3], offset); w7[3] = hc_bytealign_S (w3[1], w3[2], offset); w7[2] = hc_bytealign_S (w3[0], w3[1], offset); w7[1] = hc_bytealign_S (w2[3], w3[0], offset); w7[0] = hc_bytealign_S (w2[2], w2[3], offset); w6[3] = hc_bytealign_S (w2[1], w2[2], offset); w6[2] = hc_bytealign_S (w2[0], w2[1], offset); w6[1] = hc_bytealign_S (w1[3], w2[0], offset); w6[0] = hc_bytealign_S (w1[2], w1[3], offset); w5[3] = hc_bytealign_S (w1[1], w1[2], offset); w5[2] = hc_bytealign_S (w1[0], w1[1], offset); w5[1] = hc_bytealign_S (w0[3], w1[0], offset); w5[0] = hc_bytealign_S (w0[2], w0[3], offset); w4[3] = hc_bytealign_S (w0[1], w0[2], offset); w4[2] = hc_bytealign_S (w0[0], w0[1], offset); w4[1] = hc_bytealign_S ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_bytealign_S (w7[3], 0, offset); c4[1] = hc_bytealign_S (w7[2], w7[3], offset); c4[0] = hc_bytealign_S (w7[1], w7[2], offset); c3[3] = hc_bytealign_S (w7[0], w7[1], offset); c3[2] = hc_bytealign_S (w6[3], w7[0], offset); c3[1] = hc_bytealign_S (w6[2], w6[3], offset); c3[0] = hc_bytealign_S (w6[1], w6[2], offset); c2[3] = hc_bytealign_S (w6[0], w6[1], offset); c2[2] = hc_bytealign_S (w5[3], w6[0], offset); c2[1] = hc_bytealign_S (w5[2], w5[3], offset); c2[0] = hc_bytealign_S (w5[1], w5[2], offset); c1[3] = hc_bytealign_S (w5[0], w5[1], offset); c1[2] = hc_bytealign_S (w4[3], w5[0], offset); c1[1] = hc_bytealign_S (w4[2], w4[3], offset); c1[0] = hc_bytealign_S (w4[1], w4[2], offset); c0[3] = hc_bytealign_S (w4[0], w4[1], offset); c0[2] = hc_bytealign_S (w3[3], w4[0], offset); c0[1] = hc_bytealign_S (w3[2], w3[3], offset); c0[0] = hc_bytealign_S (w3[1], w3[2], offset); w7[3] = hc_bytealign_S (w3[0], w3[1], offset); w7[2] = hc_bytealign_S (w2[3], w3[0], offset); w7[1] = hc_bytealign_S (w2[2], w2[3], offset); w7[0] = hc_bytealign_S (w2[1], w2[2], offset); w6[3] = hc_bytealign_S (w2[0], w2[1], offset); w6[2] = hc_bytealign_S (w1[3], w2[0], offset); w6[1] = hc_bytealign_S (w1[2], w1[3], offset); w6[0] = hc_bytealign_S (w1[1], w1[2], offset); w5[3] = hc_bytealign_S (w1[0], w1[1], offset); w5[2] = hc_bytealign_S (w0[3], w1[0], offset); w5[1] = hc_bytealign_S (w0[2], w0[3], offset); w5[0] = hc_bytealign_S (w0[1], w0[2], offset); w4[3] = hc_bytealign_S (w0[0], w0[1], offset); w4[2] = hc_bytealign_S ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_bytealign_S (w7[3], 0, offset); c4[2] = hc_bytealign_S (w7[2], w7[3], offset); c4[1] = hc_bytealign_S (w7[1], w7[2], offset); c4[0] = hc_bytealign_S (w7[0], w7[1], offset); c3[3] = hc_bytealign_S (w6[3], w7[0], offset); c3[2] = hc_bytealign_S (w6[2], w6[3], offset); c3[1] = hc_bytealign_S (w6[1], w6[2], offset); c3[0] = hc_bytealign_S (w6[0], w6[1], offset); c2[3] = hc_bytealign_S (w5[3], w6[0], offset); c2[2] = hc_bytealign_S (w5[2], w5[3], offset); c2[1] = hc_bytealign_S (w5[1], w5[2], offset); c2[0] = hc_bytealign_S (w5[0], w5[1], offset); c1[3] = hc_bytealign_S (w4[3], w5[0], offset); c1[2] = hc_bytealign_S (w4[2], w4[3], offset); c1[1] = hc_bytealign_S (w4[1], w4[2], offset); c1[0] = hc_bytealign_S (w4[0], w4[1], offset); c0[3] = hc_bytealign_S (w3[3], w4[0], offset); c0[2] = hc_bytealign_S (w3[2], w3[3], offset); c0[1] = hc_bytealign_S (w3[1], w3[2], offset); c0[0] = hc_bytealign_S (w3[0], w3[1], offset); w7[3] = hc_bytealign_S (w2[3], w3[0], offset); w7[2] = hc_bytealign_S (w2[2], w2[3], offset); w7[1] = hc_bytealign_S (w2[1], w2[2], offset); w7[0] = hc_bytealign_S (w2[0], w2[1], offset); w6[3] = hc_bytealign_S (w1[3], w2[0], offset); w6[2] = hc_bytealign_S (w1[2], w1[3], offset); w6[1] = hc_bytealign_S (w1[1], w1[2], offset); w6[0] = hc_bytealign_S (w1[0], w1[1], offset); w5[3] = hc_bytealign_S (w0[3], w1[0], offset); w5[2] = hc_bytealign_S (w0[2], w0[3], offset); w5[1] = hc_bytealign_S (w0[1], w0[2], offset); w5[0] = hc_bytealign_S (w0[0], w0[1], offset); w4[3] = hc_bytealign_S ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_bytealign_S (w7[3], 0, offset); c4[3] = hc_bytealign_S (w7[2], w7[3], offset); c4[2] = hc_bytealign_S (w7[1], w7[2], offset); c4[1] = hc_bytealign_S (w7[0], w7[1], offset); c4[0] = hc_bytealign_S (w6[3], w7[0], offset); c3[3] = hc_bytealign_S (w6[2], w6[3], offset); c3[2] = hc_bytealign_S (w6[1], w6[2], offset); c3[1] = hc_bytealign_S (w6[0], w6[1], offset); c3[0] = hc_bytealign_S (w5[3], w6[0], offset); c2[3] = hc_bytealign_S (w5[2], w5[3], offset); c2[2] = hc_bytealign_S (w5[1], w5[2], offset); c2[1] = hc_bytealign_S (w5[0], w5[1], offset); c2[0] = hc_bytealign_S (w4[3], w5[0], offset); c1[3] = hc_bytealign_S (w4[2], w4[3], offset); c1[2] = hc_bytealign_S (w4[1], w4[2], offset); c1[1] = hc_bytealign_S (w4[0], w4[1], offset); c1[0] = hc_bytealign_S (w3[3], w4[0], offset); c0[3] = hc_bytealign_S (w3[2], w3[3], offset); c0[2] = hc_bytealign_S (w3[1], w3[2], offset); c0[1] = hc_bytealign_S (w3[0], w3[1], offset); c0[0] = hc_bytealign_S (w2[3], w3[0], offset); w7[3] = hc_bytealign_S (w2[2], w2[3], offset); w7[2] = hc_bytealign_S (w2[1], w2[2], offset); w7[1] = hc_bytealign_S (w2[0], w2[1], offset); w7[0] = hc_bytealign_S (w1[3], w2[0], offset); w6[3] = hc_bytealign_S (w1[2], w1[3], offset); w6[2] = hc_bytealign_S (w1[1], w1[2], offset); w6[1] = hc_bytealign_S (w1[0], w1[1], offset); w6[0] = hc_bytealign_S (w0[3], w1[0], offset); w5[3] = hc_bytealign_S (w0[2], w0[3], offset); w5[2] = hc_bytealign_S (w0[1], w0[2], offset); w5[1] = hc_bytealign_S (w0[0], w0[1], offset); w5[0] = hc_bytealign_S ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_bytealign_S (w7[3], 0, offset); c5[0] = hc_bytealign_S (w7[2], w7[3], offset); c4[3] = hc_bytealign_S (w7[1], w7[2], offset); c4[2] = hc_bytealign_S (w7[0], w7[1], offset); c4[1] = hc_bytealign_S (w6[3], w7[0], offset); c4[0] = hc_bytealign_S (w6[2], w6[3], offset); c3[3] = hc_bytealign_S (w6[1], w6[2], offset); c3[2] = hc_bytealign_S (w6[0], w6[1], offset); c3[1] = hc_bytealign_S (w5[3], w6[0], offset); c3[0] = hc_bytealign_S (w5[2], w5[3], offset); c2[3] = hc_bytealign_S (w5[1], w5[2], offset); c2[2] = hc_bytealign_S (w5[0], w5[1], offset); c2[1] = hc_bytealign_S (w4[3], w5[0], offset); c2[0] = hc_bytealign_S (w4[2], w4[3], offset); c1[3] = hc_bytealign_S (w4[1], w4[2], offset); c1[2] = hc_bytealign_S (w4[0], w4[1], offset); c1[1] = hc_bytealign_S (w3[3], w4[0], offset); c1[0] = hc_bytealign_S (w3[2], w3[3], offset); c0[3] = hc_bytealign_S (w3[1], w3[2], offset); c0[2] = hc_bytealign_S (w3[0], w3[1], offset); c0[1] = hc_bytealign_S (w2[3], w3[0], offset); c0[0] = hc_bytealign_S (w2[2], w2[3], offset); w7[3] = hc_bytealign_S (w2[1], w2[2], offset); w7[2] = hc_bytealign_S (w2[0], w2[1], offset); w7[1] = hc_bytealign_S (w1[3], w2[0], offset); w7[0] = hc_bytealign_S (w1[2], w1[3], offset); w6[3] = hc_bytealign_S (w1[1], w1[2], offset); w6[2] = hc_bytealign_S (w1[0], w1[1], offset); w6[1] = hc_bytealign_S (w0[3], w1[0], offset); w6[0] = hc_bytealign_S (w0[2], w0[3], offset); w5[3] = hc_bytealign_S (w0[1], w0[2], offset); w5[2] = hc_bytealign_S (w0[0], w0[1], offset); w5[1] = hc_bytealign_S ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_bytealign_S (w7[3], 0, offset); c5[1] = hc_bytealign_S (w7[2], w7[3], offset); c5[0] = hc_bytealign_S (w7[1], w7[2], offset); c4[3] = hc_bytealign_S (w7[0], w7[1], offset); c4[2] = hc_bytealign_S (w6[3], w7[0], offset); c4[1] = hc_bytealign_S (w6[2], w6[3], offset); c4[0] = hc_bytealign_S (w6[1], w6[2], offset); c3[3] = hc_bytealign_S (w6[0], w6[1], offset); c3[2] = hc_bytealign_S (w5[3], w6[0], offset); c3[1] = hc_bytealign_S (w5[2], w5[3], offset); c3[0] = hc_bytealign_S (w5[1], w5[2], offset); c2[3] = hc_bytealign_S (w5[0], w5[1], offset); c2[2] = hc_bytealign_S (w4[3], w5[0], offset); c2[1] = hc_bytealign_S (w4[2], w4[3], offset); c2[0] = hc_bytealign_S (w4[1], w4[2], offset); c1[3] = hc_bytealign_S (w4[0], w4[1], offset); c1[2] = hc_bytealign_S (w3[3], w4[0], offset); c1[1] = hc_bytealign_S (w3[2], w3[3], offset); c1[0] = hc_bytealign_S (w3[1], w3[2], offset); c0[3] = hc_bytealign_S (w3[0], w3[1], offset); c0[2] = hc_bytealign_S (w2[3], w3[0], offset); c0[1] = hc_bytealign_S (w2[2], w2[3], offset); c0[0] = hc_bytealign_S (w2[1], w2[2], offset); w7[3] = hc_bytealign_S (w2[0], w2[1], offset); w7[2] = hc_bytealign_S (w1[3], w2[0], offset); w7[1] = hc_bytealign_S (w1[2], w1[3], offset); w7[0] = hc_bytealign_S (w1[1], w1[2], offset); w6[3] = hc_bytealign_S (w1[0], w1[1], offset); w6[2] = hc_bytealign_S (w0[3], w1[0], offset); w6[1] = hc_bytealign_S (w0[2], w0[3], offset); w6[0] = hc_bytealign_S (w0[1], w0[2], offset); w5[3] = hc_bytealign_S (w0[0], w0[1], offset); w5[2] = hc_bytealign_S ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_bytealign_S (w7[3], 0, offset); c5[2] = hc_bytealign_S (w7[2], w7[3], offset); c5[1] = hc_bytealign_S (w7[1], w7[2], offset); c5[0] = hc_bytealign_S (w7[0], w7[1], offset); c4[3] = hc_bytealign_S (w6[3], w7[0], offset); c4[2] = hc_bytealign_S (w6[2], w6[3], offset); c4[1] = hc_bytealign_S (w6[1], w6[2], offset); c4[0] = hc_bytealign_S (w6[0], w6[1], offset); c3[3] = hc_bytealign_S (w5[3], w6[0], offset); c3[2] = hc_bytealign_S (w5[2], w5[3], offset); c3[1] = hc_bytealign_S (w5[1], w5[2], offset); c3[0] = hc_bytealign_S (w5[0], w5[1], offset); c2[3] = hc_bytealign_S (w4[3], w5[0], offset); c2[2] = hc_bytealign_S (w4[2], w4[3], offset); c2[1] = hc_bytealign_S (w4[1], w4[2], offset); c2[0] = hc_bytealign_S (w4[0], w4[1], offset); c1[3] = hc_bytealign_S (w3[3], w4[0], offset); c1[2] = hc_bytealign_S (w3[2], w3[3], offset); c1[1] = hc_bytealign_S (w3[1], w3[2], offset); c1[0] = hc_bytealign_S (w3[0], w3[1], offset); c0[3] = hc_bytealign_S (w2[3], w3[0], offset); c0[2] = hc_bytealign_S (w2[2], w2[3], offset); c0[1] = hc_bytealign_S (w2[1], w2[2], offset); c0[0] = hc_bytealign_S (w2[0], w2[1], offset); w7[3] = hc_bytealign_S (w1[3], w2[0], offset); w7[2] = hc_bytealign_S (w1[2], w1[3], offset); w7[1] = hc_bytealign_S (w1[1], w1[2], offset); w7[0] = hc_bytealign_S (w1[0], w1[1], offset); w6[3] = hc_bytealign_S (w0[3], w1[0], offset); w6[2] = hc_bytealign_S (w0[2], w0[3], offset); w6[1] = hc_bytealign_S (w0[1], w0[2], offset); w6[0] = hc_bytealign_S (w0[0], w0[1], offset); w5[3] = hc_bytealign_S ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_bytealign_S (w7[3], 0, offset); c5[3] = hc_bytealign_S (w7[2], w7[3], offset); c5[2] = hc_bytealign_S (w7[1], w7[2], offset); c5[1] = hc_bytealign_S (w7[0], w7[1], offset); c5[0] = hc_bytealign_S (w6[3], w7[0], offset); c4[3] = hc_bytealign_S (w6[2], w6[3], offset); c4[2] = hc_bytealign_S (w6[1], w6[2], offset); c4[1] = hc_bytealign_S (w6[0], w6[1], offset); c4[0] = hc_bytealign_S (w5[3], w6[0], offset); c3[3] = hc_bytealign_S (w5[2], w5[3], offset); c3[2] = hc_bytealign_S (w5[1], w5[2], offset); c3[1] = hc_bytealign_S (w5[0], w5[1], offset); c3[0] = hc_bytealign_S (w4[3], w5[0], offset); c2[3] = hc_bytealign_S (w4[2], w4[3], offset); c2[2] = hc_bytealign_S (w4[1], w4[2], offset); c2[1] = hc_bytealign_S (w4[0], w4[1], offset); c2[0] = hc_bytealign_S (w3[3], w4[0], offset); c1[3] = hc_bytealign_S (w3[2], w3[3], offset); c1[2] = hc_bytealign_S (w3[1], w3[2], offset); c1[1] = hc_bytealign_S (w3[0], w3[1], offset); c1[0] = hc_bytealign_S (w2[3], w3[0], offset); c0[3] = hc_bytealign_S (w2[2], w2[3], offset); c0[2] = hc_bytealign_S (w2[1], w2[2], offset); c0[1] = hc_bytealign_S (w2[0], w2[1], offset); c0[0] = hc_bytealign_S (w1[3], w2[0], offset); w7[3] = hc_bytealign_S (w1[2], w1[3], offset); w7[2] = hc_bytealign_S (w1[1], w1[2], offset); w7[1] = hc_bytealign_S (w1[0], w1[1], offset); w7[0] = hc_bytealign_S (w0[3], w1[0], offset); w6[3] = hc_bytealign_S (w0[2], w0[3], offset); w6[2] = hc_bytealign_S (w0[1], w0[2], offset); w6[1] = hc_bytealign_S (w0[0], w0[1], offset); w6[0] = hc_bytealign_S ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_bytealign_S (w7[3], 0, offset); c6[0] = hc_bytealign_S (w7[2], w7[3], offset); c5[3] = hc_bytealign_S (w7[1], w7[2], offset); c5[2] = hc_bytealign_S (w7[0], w7[1], offset); c5[1] = hc_bytealign_S (w6[3], w7[0], offset); c5[0] = hc_bytealign_S (w6[2], w6[3], offset); c4[3] = hc_bytealign_S (w6[1], w6[2], offset); c4[2] = hc_bytealign_S (w6[0], w6[1], offset); c4[1] = hc_bytealign_S (w5[3], w6[0], offset); c4[0] = hc_bytealign_S (w5[2], w5[3], offset); c3[3] = hc_bytealign_S (w5[1], w5[2], offset); c3[2] = hc_bytealign_S (w5[0], w5[1], offset); c3[1] = hc_bytealign_S (w4[3], w5[0], offset); c3[0] = hc_bytealign_S (w4[2], w4[3], offset); c2[3] = hc_bytealign_S (w4[1], w4[2], offset); c2[2] = hc_bytealign_S (w4[0], w4[1], offset); c2[1] = hc_bytealign_S (w3[3], w4[0], offset); c2[0] = hc_bytealign_S (w3[2], w3[3], offset); c1[3] = hc_bytealign_S (w3[1], w3[2], offset); c1[2] = hc_bytealign_S (w3[0], w3[1], offset); c1[1] = hc_bytealign_S (w2[3], w3[0], offset); c1[0] = hc_bytealign_S (w2[2], w2[3], offset); c0[3] = hc_bytealign_S (w2[1], w2[2], offset); c0[2] = hc_bytealign_S (w2[0], w2[1], offset); c0[1] = hc_bytealign_S (w1[3], w2[0], offset); c0[0] = hc_bytealign_S (w1[2], w1[3], offset); w7[3] = hc_bytealign_S (w1[1], w1[2], offset); w7[2] = hc_bytealign_S (w1[0], w1[1], offset); w7[1] = hc_bytealign_S (w0[3], w1[0], offset); w7[0] = hc_bytealign_S (w0[2], w0[3], offset); w6[3] = hc_bytealign_S (w0[1], w0[2], offset); w6[2] = hc_bytealign_S (w0[0], w0[1], offset); w6[1] = hc_bytealign_S ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_bytealign_S (w7[3], 0, offset); c6[1] = hc_bytealign_S (w7[2], w7[3], offset); c6[0] = hc_bytealign_S (w7[1], w7[2], offset); c5[3] = hc_bytealign_S (w7[0], w7[1], offset); c5[2] = hc_bytealign_S (w6[3], w7[0], offset); c5[1] = hc_bytealign_S (w6[2], w6[3], offset); c5[0] = hc_bytealign_S (w6[1], w6[2], offset); c4[3] = hc_bytealign_S (w6[0], w6[1], offset); c4[2] = hc_bytealign_S (w5[3], w6[0], offset); c4[1] = hc_bytealign_S (w5[2], w5[3], offset); c4[0] = hc_bytealign_S (w5[1], w5[2], offset); c3[3] = hc_bytealign_S (w5[0], w5[1], offset); c3[2] = hc_bytealign_S (w4[3], w5[0], offset); c3[1] = hc_bytealign_S (w4[2], w4[3], offset); c3[0] = hc_bytealign_S (w4[1], w4[2], offset); c2[3] = hc_bytealign_S (w4[0], w4[1], offset); c2[2] = hc_bytealign_S (w3[3], w4[0], offset); c2[1] = hc_bytealign_S (w3[2], w3[3], offset); c2[0] = hc_bytealign_S (w3[1], w3[2], offset); c1[3] = hc_bytealign_S (w3[0], w3[1], offset); c1[2] = hc_bytealign_S (w2[3], w3[0], offset); c1[1] = hc_bytealign_S (w2[2], w2[3], offset); c1[0] = hc_bytealign_S (w2[1], w2[2], offset); c0[3] = hc_bytealign_S (w2[0], w2[1], offset); c0[2] = hc_bytealign_S (w1[3], w2[0], offset); c0[1] = hc_bytealign_S (w1[2], w1[3], offset); c0[0] = hc_bytealign_S (w1[1], w1[2], offset); w7[3] = hc_bytealign_S (w1[0], w1[1], offset); w7[2] = hc_bytealign_S (w0[3], w1[0], offset); w7[1] = hc_bytealign_S (w0[2], w0[3], offset); w7[0] = hc_bytealign_S (w0[1], w0[2], offset); w6[3] = hc_bytealign_S (w0[0], w0[1], offset); w6[2] = hc_bytealign_S ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_bytealign_S (w7[3], 0, offset); c6[2] = hc_bytealign_S (w7[2], w7[3], offset); c6[1] = hc_bytealign_S (w7[1], w7[2], offset); c6[0] = hc_bytealign_S (w7[0], w7[1], offset); c5[3] = hc_bytealign_S (w6[3], w7[0], offset); c5[2] = hc_bytealign_S (w6[2], w6[3], offset); c5[1] = hc_bytealign_S (w6[1], w6[2], offset); c5[0] = hc_bytealign_S (w6[0], w6[1], offset); c4[3] = hc_bytealign_S (w5[3], w6[0], offset); c4[2] = hc_bytealign_S (w5[2], w5[3], offset); c4[1] = hc_bytealign_S (w5[1], w5[2], offset); c4[0] = hc_bytealign_S (w5[0], w5[1], offset); c3[3] = hc_bytealign_S (w4[3], w5[0], offset); c3[2] = hc_bytealign_S (w4[2], w4[3], offset); c3[1] = hc_bytealign_S (w4[1], w4[2], offset); c3[0] = hc_bytealign_S (w4[0], w4[1], offset); c2[3] = hc_bytealign_S (w3[3], w4[0], offset); c2[2] = hc_bytealign_S (w3[2], w3[3], offset); c2[1] = hc_bytealign_S (w3[1], w3[2], offset); c2[0] = hc_bytealign_S (w3[0], w3[1], offset); c1[3] = hc_bytealign_S (w2[3], w3[0], offset); c1[2] = hc_bytealign_S (w2[2], w2[3], offset); c1[1] = hc_bytealign_S (w2[1], w2[2], offset); c1[0] = hc_bytealign_S (w2[0], w2[1], offset); c0[3] = hc_bytealign_S (w1[3], w2[0], offset); c0[2] = hc_bytealign_S (w1[2], w1[3], offset); c0[1] = hc_bytealign_S (w1[1], w1[2], offset); c0[0] = hc_bytealign_S (w1[0], w1[1], offset); w7[3] = hc_bytealign_S (w0[3], w1[0], offset); w7[2] = hc_bytealign_S (w0[2], w0[3], offset); w7[1] = hc_bytealign_S (w0[1], w0[2], offset); w7[0] = hc_bytealign_S (w0[0], w0[1], offset); w6[3] = hc_bytealign_S ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_bytealign_S (w7[3], 0, offset); c6[3] = hc_bytealign_S (w7[2], w7[3], offset); c6[2] = hc_bytealign_S (w7[1], w7[2], offset); c6[1] = hc_bytealign_S (w7[0], w7[1], offset); c6[0] = hc_bytealign_S (w6[3], w7[0], offset); c5[3] = hc_bytealign_S (w6[2], w6[3], offset); c5[2] = hc_bytealign_S (w6[1], w6[2], offset); c5[1] = hc_bytealign_S (w6[0], w6[1], offset); c5[0] = hc_bytealign_S (w5[3], w6[0], offset); c4[3] = hc_bytealign_S (w5[2], w5[3], offset); c4[2] = hc_bytealign_S (w5[1], w5[2], offset); c4[1] = hc_bytealign_S (w5[0], w5[1], offset); c4[0] = hc_bytealign_S (w4[3], w5[0], offset); c3[3] = hc_bytealign_S (w4[2], w4[3], offset); c3[2] = hc_bytealign_S (w4[1], w4[2], offset); c3[1] = hc_bytealign_S (w4[0], w4[1], offset); c3[0] = hc_bytealign_S (w3[3], w4[0], offset); c2[3] = hc_bytealign_S (w3[2], w3[3], offset); c2[2] = hc_bytealign_S (w3[1], w3[2], offset); c2[1] = hc_bytealign_S (w3[0], w3[1], offset); c2[0] = hc_bytealign_S (w2[3], w3[0], offset); c1[3] = hc_bytealign_S (w2[2], w2[3], offset); c1[2] = hc_bytealign_S (w2[1], w2[2], offset); c1[1] = hc_bytealign_S (w2[0], w2[1], offset); c1[0] = hc_bytealign_S (w1[3], w2[0], offset); c0[3] = hc_bytealign_S (w1[2], w1[3], offset); c0[2] = hc_bytealign_S (w1[1], w1[2], offset); c0[1] = hc_bytealign_S (w1[0], w1[1], offset); c0[0] = hc_bytealign_S (w0[3], w1[0], offset); w7[3] = hc_bytealign_S (w0[2], w0[3], offset); w7[2] = hc_bytealign_S (w0[1], w0[2], offset); w7[1] = hc_bytealign_S (w0[0], w0[1], offset); w7[0] = hc_bytealign_S ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_bytealign_S (w7[3], 0, offset); c7[0] = hc_bytealign_S (w7[2], w7[3], offset); c6[3] = hc_bytealign_S (w7[1], w7[2], offset); c6[2] = hc_bytealign_S (w7[0], w7[1], offset); c6[1] = hc_bytealign_S (w6[3], w7[0], offset); c6[0] = hc_bytealign_S (w6[2], w6[3], offset); c5[3] = hc_bytealign_S (w6[1], w6[2], offset); c5[2] = hc_bytealign_S (w6[0], w6[1], offset); c5[1] = hc_bytealign_S (w5[3], w6[0], offset); c5[0] = hc_bytealign_S (w5[2], w5[3], offset); c4[3] = hc_bytealign_S (w5[1], w5[2], offset); c4[2] = hc_bytealign_S (w5[0], w5[1], offset); c4[1] = hc_bytealign_S (w4[3], w5[0], offset); c4[0] = hc_bytealign_S (w4[2], w4[3], offset); c3[3] = hc_bytealign_S (w4[1], w4[2], offset); c3[2] = hc_bytealign_S (w4[0], w4[1], offset); c3[1] = hc_bytealign_S (w3[3], w4[0], offset); c3[0] = hc_bytealign_S (w3[2], w3[3], offset); c2[3] = hc_bytealign_S (w3[1], w3[2], offset); c2[2] = hc_bytealign_S (w3[0], w3[1], offset); c2[1] = hc_bytealign_S (w2[3], w3[0], offset); c2[0] = hc_bytealign_S (w2[2], w2[3], offset); c1[3] = hc_bytealign_S (w2[1], w2[2], offset); c1[2] = hc_bytealign_S (w2[0], w2[1], offset); c1[1] = hc_bytealign_S (w1[3], w2[0], offset); c1[0] = hc_bytealign_S (w1[2], w1[3], offset); c0[3] = hc_bytealign_S (w1[1], w1[2], offset); c0[2] = hc_bytealign_S (w1[0], w1[1], offset); c0[1] = hc_bytealign_S (w0[3], w1[0], offset); c0[0] = hc_bytealign_S (w0[2], w0[3], offset); w7[3] = hc_bytealign_S (w0[1], w0[2], offset); w7[2] = hc_bytealign_S (w0[0], w0[1], offset); w7[1] = hc_bytealign_S ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_bytealign_S (w7[3], 0, offset); c7[1] = hc_bytealign_S (w7[2], w7[3], offset); c7[0] = hc_bytealign_S (w7[1], w7[2], offset); c6[3] = hc_bytealign_S (w7[0], w7[1], offset); c6[2] = hc_bytealign_S (w6[3], w7[0], offset); c6[1] = hc_bytealign_S (w6[2], w6[3], offset); c6[0] = hc_bytealign_S (w6[1], w6[2], offset); c5[3] = hc_bytealign_S (w6[0], w6[1], offset); c5[2] = hc_bytealign_S (w5[3], w6[0], offset); c5[1] = hc_bytealign_S (w5[2], w5[3], offset); c5[0] = hc_bytealign_S (w5[1], w5[2], offset); c4[3] = hc_bytealign_S (w5[0], w5[1], offset); c4[2] = hc_bytealign_S (w4[3], w5[0], offset); c4[1] = hc_bytealign_S (w4[2], w4[3], offset); c4[0] = hc_bytealign_S (w4[1], w4[2], offset); c3[3] = hc_bytealign_S (w4[0], w4[1], offset); c3[2] = hc_bytealign_S (w3[3], w4[0], offset); c3[1] = hc_bytealign_S (w3[2], w3[3], offset); c3[0] = hc_bytealign_S (w3[1], w3[2], offset); c2[3] = hc_bytealign_S (w3[0], w3[1], offset); c2[2] = hc_bytealign_S (w2[3], w3[0], offset); c2[1] = hc_bytealign_S (w2[2], w2[3], offset); c2[0] = hc_bytealign_S (w2[1], w2[2], offset); c1[3] = hc_bytealign_S (w2[0], w2[1], offset); c1[2] = hc_bytealign_S (w1[3], w2[0], offset); c1[1] = hc_bytealign_S (w1[2], w1[3], offset); c1[0] = hc_bytealign_S (w1[1], w1[2], offset); c0[3] = hc_bytealign_S (w1[0], w1[1], offset); c0[2] = hc_bytealign_S (w0[3], w1[0], offset); c0[1] = hc_bytealign_S (w0[2], w0[3], offset); c0[0] = hc_bytealign_S (w0[1], w0[2], offset); w7[3] = hc_bytealign_S (w0[0], w0[1], offset); w7[2] = hc_bytealign_S ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_bytealign_S (w7[3], 0, offset); c7[2] = hc_bytealign_S (w7[2], w7[3], offset); c7[1] = hc_bytealign_S (w7[1], w7[2], offset); c7[0] = hc_bytealign_S (w7[0], w7[1], offset); c6[3] = hc_bytealign_S (w6[3], w7[0], offset); c6[2] = hc_bytealign_S (w6[2], w6[3], offset); c6[1] = hc_bytealign_S (w6[1], w6[2], offset); c6[0] = hc_bytealign_S (w6[0], w6[1], offset); c5[3] = hc_bytealign_S (w5[3], w6[0], offset); c5[2] = hc_bytealign_S (w5[2], w5[3], offset); c5[1] = hc_bytealign_S (w5[1], w5[2], offset); c5[0] = hc_bytealign_S (w5[0], w5[1], offset); c4[3] = hc_bytealign_S (w4[3], w5[0], offset); c4[2] = hc_bytealign_S (w4[2], w4[3], offset); c4[1] = hc_bytealign_S (w4[1], w4[2], offset); c4[0] = hc_bytealign_S (w4[0], w4[1], offset); c3[3] = hc_bytealign_S (w3[3], w4[0], offset); c3[2] = hc_bytealign_S (w3[2], w3[3], offset); c3[1] = hc_bytealign_S (w3[1], w3[2], offset); c3[0] = hc_bytealign_S (w3[0], w3[1], offset); c2[3] = hc_bytealign_S (w2[3], w3[0], offset); c2[2] = hc_bytealign_S (w2[2], w2[3], offset); c2[1] = hc_bytealign_S (w2[1], w2[2], offset); c2[0] = hc_bytealign_S (w2[0], w2[1], offset); c1[3] = hc_bytealign_S (w1[3], w2[0], offset); c1[2] = hc_bytealign_S (w1[2], w1[3], offset); c1[1] = hc_bytealign_S (w1[1], w1[2], offset); c1[0] = hc_bytealign_S (w1[0], w1[1], offset); c0[3] = hc_bytealign_S (w0[3], w1[0], offset); c0[2] = hc_bytealign_S (w0[2], w0[3], offset); c0[1] = hc_bytealign_S (w0[1], w0[2], offset); c0[0] = hc_bytealign_S (w0[0], w0[1], offset); w7[3] = hc_bytealign_S ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm_S (w7[3], 0, selector); w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); w0[0] = hc_byte_perm_S ( 0, w0[0], selector); break; case 1: c0[1] = hc_byte_perm_S (w7[3], 0, selector); c0[0] = hc_byte_perm_S (w7[2], w7[3], selector); w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); w0[1] = hc_byte_perm_S ( 0, w0[0], selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm_S (w7[3], 0, selector); c0[1] = hc_byte_perm_S (w7[2], w7[3], selector); c0[0] = hc_byte_perm_S (w7[1], w7[2], selector); w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); w0[2] = hc_byte_perm_S ( 0, w0[0], selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm_S (w7[3], 0, selector); c0[2] = hc_byte_perm_S (w7[2], w7[3], selector); c0[1] = hc_byte_perm_S (w7[1], w7[2], selector); c0[0] = hc_byte_perm_S (w7[0], w7[1], selector); w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); w0[3] = hc_byte_perm_S ( 0, w0[0], selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm_S (w7[3], 0, selector); c0[3] = hc_byte_perm_S (w7[2], w7[3], selector); c0[2] = hc_byte_perm_S (w7[1], w7[2], selector); c0[1] = hc_byte_perm_S (w7[0], w7[1], selector); c0[0] = hc_byte_perm_S (w6[3], w7[0], selector); w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); w1[0] = hc_byte_perm_S ( 0, w0[0], selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm_S (w7[3], 0, selector); c1[0] = hc_byte_perm_S (w7[2], w7[3], selector); c0[3] = hc_byte_perm_S (w7[1], w7[2], selector); c0[2] = hc_byte_perm_S (w7[0], w7[1], selector); c0[1] = hc_byte_perm_S (w6[3], w7[0], selector); c0[0] = hc_byte_perm_S (w6[2], w6[3], selector); w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); w1[1] = hc_byte_perm_S ( 0, w0[0], selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm_S (w7[3], 0, selector); c1[1] = hc_byte_perm_S (w7[2], w7[3], selector); c1[0] = hc_byte_perm_S (w7[1], w7[2], selector); c0[3] = hc_byte_perm_S (w7[0], w7[1], selector); c0[2] = hc_byte_perm_S (w6[3], w7[0], selector); c0[1] = hc_byte_perm_S (w6[2], w6[3], selector); c0[0] = hc_byte_perm_S (w6[1], w6[2], selector); w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); w1[2] = hc_byte_perm_S ( 0, w0[0], selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm_S (w7[3], 0, selector); c1[2] = hc_byte_perm_S (w7[2], w7[3], selector); c1[1] = hc_byte_perm_S (w7[1], w7[2], selector); c1[0] = hc_byte_perm_S (w7[0], w7[1], selector); c0[3] = hc_byte_perm_S (w6[3], w7[0], selector); c0[2] = hc_byte_perm_S (w6[2], w6[3], selector); c0[1] = hc_byte_perm_S (w6[1], w6[2], selector); c0[0] = hc_byte_perm_S (w6[0], w6[1], selector); w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); w1[3] = hc_byte_perm_S ( 0, w0[0], selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm_S (w7[3], 0, selector); c1[3] = hc_byte_perm_S (w7[2], w7[3], selector); c1[2] = hc_byte_perm_S (w7[1], w7[2], selector); c1[1] = hc_byte_perm_S (w7[0], w7[1], selector); c1[0] = hc_byte_perm_S (w6[3], w7[0], selector); c0[3] = hc_byte_perm_S (w6[2], w6[3], selector); c0[2] = hc_byte_perm_S (w6[1], w6[2], selector); c0[1] = hc_byte_perm_S (w6[0], w6[1], selector); c0[0] = hc_byte_perm_S (w5[3], w6[0], selector); w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); w2[0] = hc_byte_perm_S ( 0, w0[0], selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm_S (w7[3], 0, selector); c2[0] = hc_byte_perm_S (w7[2], w7[3], selector); c1[3] = hc_byte_perm_S (w7[1], w7[2], selector); c1[2] = hc_byte_perm_S (w7[0], w7[1], selector); c1[1] = hc_byte_perm_S (w6[3], w7[0], selector); c1[0] = hc_byte_perm_S (w6[2], w6[3], selector); c0[3] = hc_byte_perm_S (w6[1], w6[2], selector); c0[2] = hc_byte_perm_S (w6[0], w6[1], selector); c0[1] = hc_byte_perm_S (w5[3], w6[0], selector); c0[0] = hc_byte_perm_S (w5[2], w5[3], selector); w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); w2[1] = hc_byte_perm_S ( 0, w0[0], selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm_S (w7[3], 0, selector); c2[1] = hc_byte_perm_S (w7[2], w7[3], selector); c2[0] = hc_byte_perm_S (w7[1], w7[2], selector); c1[3] = hc_byte_perm_S (w7[0], w7[1], selector); c1[2] = hc_byte_perm_S (w6[3], w7[0], selector); c1[1] = hc_byte_perm_S (w6[2], w6[3], selector); c1[0] = hc_byte_perm_S (w6[1], w6[2], selector); c0[3] = hc_byte_perm_S (w6[0], w6[1], selector); c0[2] = hc_byte_perm_S (w5[3], w6[0], selector); c0[1] = hc_byte_perm_S (w5[2], w5[3], selector); c0[0] = hc_byte_perm_S (w5[1], w5[2], selector); w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); w2[2] = hc_byte_perm_S ( 0, w0[0], selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm_S (w7[3], 0, selector); c2[2] = hc_byte_perm_S (w7[2], w7[3], selector); c2[1] = hc_byte_perm_S (w7[1], w7[2], selector); c2[0] = hc_byte_perm_S (w7[0], w7[1], selector); c1[3] = hc_byte_perm_S (w6[3], w7[0], selector); c1[2] = hc_byte_perm_S (w6[2], w6[3], selector); c1[1] = hc_byte_perm_S (w6[1], w6[2], selector); c1[0] = hc_byte_perm_S (w6[0], w6[1], selector); c0[3] = hc_byte_perm_S (w5[3], w6[0], selector); c0[2] = hc_byte_perm_S (w5[2], w5[3], selector); c0[1] = hc_byte_perm_S (w5[1], w5[2], selector); c0[0] = hc_byte_perm_S (w5[0], w5[1], selector); w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); w2[3] = hc_byte_perm_S ( 0, w0[0], selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm_S (w7[3], 0, selector); c2[3] = hc_byte_perm_S (w7[2], w7[3], selector); c2[2] = hc_byte_perm_S (w7[1], w7[2], selector); c2[1] = hc_byte_perm_S (w7[0], w7[1], selector); c2[0] = hc_byte_perm_S (w6[3], w7[0], selector); c1[3] = hc_byte_perm_S (w6[2], w6[3], selector); c1[2] = hc_byte_perm_S (w6[1], w6[2], selector); c1[1] = hc_byte_perm_S (w6[0], w6[1], selector); c1[0] = hc_byte_perm_S (w5[3], w6[0], selector); c0[3] = hc_byte_perm_S (w5[2], w5[3], selector); c0[2] = hc_byte_perm_S (w5[1], w5[2], selector); c0[1] = hc_byte_perm_S (w5[0], w5[1], selector); c0[0] = hc_byte_perm_S (w4[3], w5[0], selector); w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); w3[0] = hc_byte_perm_S ( 0, w0[0], selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm_S (w7[3], 0, selector); c3[0] = hc_byte_perm_S (w7[2], w7[3], selector); c2[3] = hc_byte_perm_S (w7[1], w7[2], selector); c2[2] = hc_byte_perm_S (w7[0], w7[1], selector); c2[1] = hc_byte_perm_S (w6[3], w7[0], selector); c2[0] = hc_byte_perm_S (w6[2], w6[3], selector); c1[3] = hc_byte_perm_S (w6[1], w6[2], selector); c1[2] = hc_byte_perm_S (w6[0], w6[1], selector); c1[1] = hc_byte_perm_S (w5[3], w6[0], selector); c1[0] = hc_byte_perm_S (w5[2], w5[3], selector); c0[3] = hc_byte_perm_S (w5[1], w5[2], selector); c0[2] = hc_byte_perm_S (w5[0], w5[1], selector); c0[1] = hc_byte_perm_S (w4[3], w5[0], selector); c0[0] = hc_byte_perm_S (w4[2], w4[3], selector); w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); w3[1] = hc_byte_perm_S ( 0, w0[0], selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm_S (w7[3], 0, selector); c3[1] = hc_byte_perm_S (w7[2], w7[3], selector); c3[0] = hc_byte_perm_S (w7[1], w7[2], selector); c2[3] = hc_byte_perm_S (w7[0], w7[1], selector); c2[2] = hc_byte_perm_S (w6[3], w7[0], selector); c2[1] = hc_byte_perm_S (w6[2], w6[3], selector); c2[0] = hc_byte_perm_S (w6[1], w6[2], selector); c1[3] = hc_byte_perm_S (w6[0], w6[1], selector); c1[2] = hc_byte_perm_S (w5[3], w6[0], selector); c1[1] = hc_byte_perm_S (w5[2], w5[3], selector); c1[0] = hc_byte_perm_S (w5[1], w5[2], selector); c0[3] = hc_byte_perm_S (w5[0], w5[1], selector); c0[2] = hc_byte_perm_S (w4[3], w5[0], selector); c0[1] = hc_byte_perm_S (w4[2], w4[3], selector); c0[0] = hc_byte_perm_S (w4[1], w4[2], selector); w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); w3[2] = hc_byte_perm_S ( 0, w0[0], selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm_S (w7[3], 0, selector); c3[2] = hc_byte_perm_S (w7[2], w7[3], selector); c3[1] = hc_byte_perm_S (w7[1], w7[2], selector); c3[0] = hc_byte_perm_S (w7[0], w7[1], selector); c2[3] = hc_byte_perm_S (w6[3], w7[0], selector); c2[2] = hc_byte_perm_S (w6[2], w6[3], selector); c2[1] = hc_byte_perm_S (w6[1], w6[2], selector); c2[0] = hc_byte_perm_S (w6[0], w6[1], selector); c1[3] = hc_byte_perm_S (w5[3], w6[0], selector); c1[2] = hc_byte_perm_S (w5[2], w5[3], selector); c1[1] = hc_byte_perm_S (w5[1], w5[2], selector); c1[0] = hc_byte_perm_S (w5[0], w5[1], selector); c0[3] = hc_byte_perm_S (w4[3], w5[0], selector); c0[2] = hc_byte_perm_S (w4[2], w4[3], selector); c0[1] = hc_byte_perm_S (w4[1], w4[2], selector); c0[0] = hc_byte_perm_S (w4[0], w4[1], selector); w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); w3[3] = hc_byte_perm_S ( 0, w0[0], selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_byte_perm_S (w7[3], 0, selector); c3[3] = hc_byte_perm_S (w7[2], w7[3], selector); c3[2] = hc_byte_perm_S (w7[1], w7[2], selector); c3[1] = hc_byte_perm_S (w7[0], w7[1], selector); c3[0] = hc_byte_perm_S (w6[3], w7[0], selector); c2[3] = hc_byte_perm_S (w6[2], w6[3], selector); c2[2] = hc_byte_perm_S (w6[1], w6[2], selector); c2[1] = hc_byte_perm_S (w6[0], w6[1], selector); c2[0] = hc_byte_perm_S (w5[3], w6[0], selector); c1[3] = hc_byte_perm_S (w5[2], w5[3], selector); c1[2] = hc_byte_perm_S (w5[1], w5[2], selector); c1[1] = hc_byte_perm_S (w5[0], w5[1], selector); c1[0] = hc_byte_perm_S (w4[3], w5[0], selector); c0[3] = hc_byte_perm_S (w4[2], w4[3], selector); c0[2] = hc_byte_perm_S (w4[1], w4[2], selector); c0[1] = hc_byte_perm_S (w4[0], w4[1], selector); c0[0] = hc_byte_perm_S (w3[3], w4[0], selector); w7[3] = hc_byte_perm_S (w3[2], w3[3], selector); w7[2] = hc_byte_perm_S (w3[1], w3[2], selector); w7[1] = hc_byte_perm_S (w3[0], w3[1], selector); w7[0] = hc_byte_perm_S (w2[3], w3[0], selector); w6[3] = hc_byte_perm_S (w2[2], w2[3], selector); w6[2] = hc_byte_perm_S (w2[1], w2[2], selector); w6[1] = hc_byte_perm_S (w2[0], w2[1], selector); w6[0] = hc_byte_perm_S (w1[3], w2[0], selector); w5[3] = hc_byte_perm_S (w1[2], w1[3], selector); w5[2] = hc_byte_perm_S (w1[1], w1[2], selector); w5[1] = hc_byte_perm_S (w1[0], w1[1], selector); w5[0] = hc_byte_perm_S (w0[3], w1[0], selector); w4[3] = hc_byte_perm_S (w0[2], w0[3], selector); w4[2] = hc_byte_perm_S (w0[1], w0[2], selector); w4[1] = hc_byte_perm_S (w0[0], w0[1], selector); w4[0] = hc_byte_perm_S ( 0, w0[0], selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_byte_perm_S (w7[3], 0, selector); c4[0] = hc_byte_perm_S (w7[2], w7[3], selector); c3[3] = hc_byte_perm_S (w7[1], w7[2], selector); c3[2] = hc_byte_perm_S (w7[0], w7[1], selector); c3[1] = hc_byte_perm_S (w6[3], w7[0], selector); c3[0] = hc_byte_perm_S (w6[2], w6[3], selector); c2[3] = hc_byte_perm_S (w6[1], w6[2], selector); c2[2] = hc_byte_perm_S (w6[0], w6[1], selector); c2[1] = hc_byte_perm_S (w5[3], w6[0], selector); c2[0] = hc_byte_perm_S (w5[2], w5[3], selector); c1[3] = hc_byte_perm_S (w5[1], w5[2], selector); c1[2] = hc_byte_perm_S (w5[0], w5[1], selector); c1[1] = hc_byte_perm_S (w4[3], w5[0], selector); c1[0] = hc_byte_perm_S (w4[2], w4[3], selector); c0[3] = hc_byte_perm_S (w4[1], w4[2], selector); c0[2] = hc_byte_perm_S (w4[0], w4[1], selector); c0[1] = hc_byte_perm_S (w3[3], w4[0], selector); c0[0] = hc_byte_perm_S (w3[2], w3[3], selector); w7[3] = hc_byte_perm_S (w3[1], w3[2], selector); w7[2] = hc_byte_perm_S (w3[0], w3[1], selector); w7[1] = hc_byte_perm_S (w2[3], w3[0], selector); w7[0] = hc_byte_perm_S (w2[2], w2[3], selector); w6[3] = hc_byte_perm_S (w2[1], w2[2], selector); w6[2] = hc_byte_perm_S (w2[0], w2[1], selector); w6[1] = hc_byte_perm_S (w1[3], w2[0], selector); w6[0] = hc_byte_perm_S (w1[2], w1[3], selector); w5[3] = hc_byte_perm_S (w1[1], w1[2], selector); w5[2] = hc_byte_perm_S (w1[0], w1[1], selector); w5[1] = hc_byte_perm_S (w0[3], w1[0], selector); w5[0] = hc_byte_perm_S (w0[2], w0[3], selector); w4[3] = hc_byte_perm_S (w0[1], w0[2], selector); w4[2] = hc_byte_perm_S (w0[0], w0[1], selector); w4[1] = hc_byte_perm_S ( 0, w0[0], selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_byte_perm_S (w7[3], 0, selector); c4[1] = hc_byte_perm_S (w7[2], w7[3], selector); c4[0] = hc_byte_perm_S (w7[1], w7[2], selector); c3[3] = hc_byte_perm_S (w7[0], w7[1], selector); c3[2] = hc_byte_perm_S (w6[3], w7[0], selector); c3[1] = hc_byte_perm_S (w6[2], w6[3], selector); c3[0] = hc_byte_perm_S (w6[1], w6[2], selector); c2[3] = hc_byte_perm_S (w6[0], w6[1], selector); c2[2] = hc_byte_perm_S (w5[3], w6[0], selector); c2[1] = hc_byte_perm_S (w5[2], w5[3], selector); c2[0] = hc_byte_perm_S (w5[1], w5[2], selector); c1[3] = hc_byte_perm_S (w5[0], w5[1], selector); c1[2] = hc_byte_perm_S (w4[3], w5[0], selector); c1[1] = hc_byte_perm_S (w4[2], w4[3], selector); c1[0] = hc_byte_perm_S (w4[1], w4[2], selector); c0[3] = hc_byte_perm_S (w4[0], w4[1], selector); c0[2] = hc_byte_perm_S (w3[3], w4[0], selector); c0[1] = hc_byte_perm_S (w3[2], w3[3], selector); c0[0] = hc_byte_perm_S (w3[1], w3[2], selector); w7[3] = hc_byte_perm_S (w3[0], w3[1], selector); w7[2] = hc_byte_perm_S (w2[3], w3[0], selector); w7[1] = hc_byte_perm_S (w2[2], w2[3], selector); w7[0] = hc_byte_perm_S (w2[1], w2[2], selector); w6[3] = hc_byte_perm_S (w2[0], w2[1], selector); w6[2] = hc_byte_perm_S (w1[3], w2[0], selector); w6[1] = hc_byte_perm_S (w1[2], w1[3], selector); w6[0] = hc_byte_perm_S (w1[1], w1[2], selector); w5[3] = hc_byte_perm_S (w1[0], w1[1], selector); w5[2] = hc_byte_perm_S (w0[3], w1[0], selector); w5[1] = hc_byte_perm_S (w0[2], w0[3], selector); w5[0] = hc_byte_perm_S (w0[1], w0[2], selector); w4[3] = hc_byte_perm_S (w0[0], w0[1], selector); w4[2] = hc_byte_perm_S ( 0, w0[0], selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_byte_perm_S (w7[3], 0, selector); c4[2] = hc_byte_perm_S (w7[2], w7[3], selector); c4[1] = hc_byte_perm_S (w7[1], w7[2], selector); c4[0] = hc_byte_perm_S (w7[0], w7[1], selector); c3[3] = hc_byte_perm_S (w6[3], w7[0], selector); c3[2] = hc_byte_perm_S (w6[2], w6[3], selector); c3[1] = hc_byte_perm_S (w6[1], w6[2], selector); c3[0] = hc_byte_perm_S (w6[0], w6[1], selector); c2[3] = hc_byte_perm_S (w5[3], w6[0], selector); c2[2] = hc_byte_perm_S (w5[2], w5[3], selector); c2[1] = hc_byte_perm_S (w5[1], w5[2], selector); c2[0] = hc_byte_perm_S (w5[0], w5[1], selector); c1[3] = hc_byte_perm_S (w4[3], w5[0], selector); c1[2] = hc_byte_perm_S (w4[2], w4[3], selector); c1[1] = hc_byte_perm_S (w4[1], w4[2], selector); c1[0] = hc_byte_perm_S (w4[0], w4[1], selector); c0[3] = hc_byte_perm_S (w3[3], w4[0], selector); c0[2] = hc_byte_perm_S (w3[2], w3[3], selector); c0[1] = hc_byte_perm_S (w3[1], w3[2], selector); c0[0] = hc_byte_perm_S (w3[0], w3[1], selector); w7[3] = hc_byte_perm_S (w2[3], w3[0], selector); w7[2] = hc_byte_perm_S (w2[2], w2[3], selector); w7[1] = hc_byte_perm_S (w2[1], w2[2], selector); w7[0] = hc_byte_perm_S (w2[0], w2[1], selector); w6[3] = hc_byte_perm_S (w1[3], w2[0], selector); w6[2] = hc_byte_perm_S (w1[2], w1[3], selector); w6[1] = hc_byte_perm_S (w1[1], w1[2], selector); w6[0] = hc_byte_perm_S (w1[0], w1[1], selector); w5[3] = hc_byte_perm_S (w0[3], w1[0], selector); w5[2] = hc_byte_perm_S (w0[2], w0[3], selector); w5[1] = hc_byte_perm_S (w0[1], w0[2], selector); w5[0] = hc_byte_perm_S (w0[0], w0[1], selector); w4[3] = hc_byte_perm_S ( 0, w0[0], selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_byte_perm_S (w7[3], 0, selector); c4[3] = hc_byte_perm_S (w7[2], w7[3], selector); c4[2] = hc_byte_perm_S (w7[1], w7[2], selector); c4[1] = hc_byte_perm_S (w7[0], w7[1], selector); c4[0] = hc_byte_perm_S (w6[3], w7[0], selector); c3[3] = hc_byte_perm_S (w6[2], w6[3], selector); c3[2] = hc_byte_perm_S (w6[1], w6[2], selector); c3[1] = hc_byte_perm_S (w6[0], w6[1], selector); c3[0] = hc_byte_perm_S (w5[3], w6[0], selector); c2[3] = hc_byte_perm_S (w5[2], w5[3], selector); c2[2] = hc_byte_perm_S (w5[1], w5[2], selector); c2[1] = hc_byte_perm_S (w5[0], w5[1], selector); c2[0] = hc_byte_perm_S (w4[3], w5[0], selector); c1[3] = hc_byte_perm_S (w4[2], w4[3], selector); c1[2] = hc_byte_perm_S (w4[1], w4[2], selector); c1[1] = hc_byte_perm_S (w4[0], w4[1], selector); c1[0] = hc_byte_perm_S (w3[3], w4[0], selector); c0[3] = hc_byte_perm_S (w3[2], w3[3], selector); c0[2] = hc_byte_perm_S (w3[1], w3[2], selector); c0[1] = hc_byte_perm_S (w3[0], w3[1], selector); c0[0] = hc_byte_perm_S (w2[3], w3[0], selector); w7[3] = hc_byte_perm_S (w2[2], w2[3], selector); w7[2] = hc_byte_perm_S (w2[1], w2[2], selector); w7[1] = hc_byte_perm_S (w2[0], w2[1], selector); w7[0] = hc_byte_perm_S (w1[3], w2[0], selector); w6[3] = hc_byte_perm_S (w1[2], w1[3], selector); w6[2] = hc_byte_perm_S (w1[1], w1[2], selector); w6[1] = hc_byte_perm_S (w1[0], w1[1], selector); w6[0] = hc_byte_perm_S (w0[3], w1[0], selector); w5[3] = hc_byte_perm_S (w0[2], w0[3], selector); w5[2] = hc_byte_perm_S (w0[1], w0[2], selector); w5[1] = hc_byte_perm_S (w0[0], w0[1], selector); w5[0] = hc_byte_perm_S ( 0, w0[0], selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_byte_perm_S (w7[3], 0, selector); c5[0] = hc_byte_perm_S (w7[2], w7[3], selector); c4[3] = hc_byte_perm_S (w7[1], w7[2], selector); c4[2] = hc_byte_perm_S (w7[0], w7[1], selector); c4[1] = hc_byte_perm_S (w6[3], w7[0], selector); c4[0] = hc_byte_perm_S (w6[2], w6[3], selector); c3[3] = hc_byte_perm_S (w6[1], w6[2], selector); c3[2] = hc_byte_perm_S (w6[0], w6[1], selector); c3[1] = hc_byte_perm_S (w5[3], w6[0], selector); c3[0] = hc_byte_perm_S (w5[2], w5[3], selector); c2[3] = hc_byte_perm_S (w5[1], w5[2], selector); c2[2] = hc_byte_perm_S (w5[0], w5[1], selector); c2[1] = hc_byte_perm_S (w4[3], w5[0], selector); c2[0] = hc_byte_perm_S (w4[2], w4[3], selector); c1[3] = hc_byte_perm_S (w4[1], w4[2], selector); c1[2] = hc_byte_perm_S (w4[0], w4[1], selector); c1[1] = hc_byte_perm_S (w3[3], w4[0], selector); c1[0] = hc_byte_perm_S (w3[2], w3[3], selector); c0[3] = hc_byte_perm_S (w3[1], w3[2], selector); c0[2] = hc_byte_perm_S (w3[0], w3[1], selector); c0[1] = hc_byte_perm_S (w2[3], w3[0], selector); c0[0] = hc_byte_perm_S (w2[2], w2[3], selector); w7[3] = hc_byte_perm_S (w2[1], w2[2], selector); w7[2] = hc_byte_perm_S (w2[0], w2[1], selector); w7[1] = hc_byte_perm_S (w1[3], w2[0], selector); w7[0] = hc_byte_perm_S (w1[2], w1[3], selector); w6[3] = hc_byte_perm_S (w1[1], w1[2], selector); w6[2] = hc_byte_perm_S (w1[0], w1[1], selector); w6[1] = hc_byte_perm_S (w0[3], w1[0], selector); w6[0] = hc_byte_perm_S (w0[2], w0[3], selector); w5[3] = hc_byte_perm_S (w0[1], w0[2], selector); w5[2] = hc_byte_perm_S (w0[0], w0[1], selector); w5[1] = hc_byte_perm_S ( 0, w0[0], selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_byte_perm_S (w7[3], 0, selector); c5[1] = hc_byte_perm_S (w7[2], w7[3], selector); c5[0] = hc_byte_perm_S (w7[1], w7[2], selector); c4[3] = hc_byte_perm_S (w7[0], w7[1], selector); c4[2] = hc_byte_perm_S (w6[3], w7[0], selector); c4[1] = hc_byte_perm_S (w6[2], w6[3], selector); c4[0] = hc_byte_perm_S (w6[1], w6[2], selector); c3[3] = hc_byte_perm_S (w6[0], w6[1], selector); c3[2] = hc_byte_perm_S (w5[3], w6[0], selector); c3[1] = hc_byte_perm_S (w5[2], w5[3], selector); c3[0] = hc_byte_perm_S (w5[1], w5[2], selector); c2[3] = hc_byte_perm_S (w5[0], w5[1], selector); c2[2] = hc_byte_perm_S (w4[3], w5[0], selector); c2[1] = hc_byte_perm_S (w4[2], w4[3], selector); c2[0] = hc_byte_perm_S (w4[1], w4[2], selector); c1[3] = hc_byte_perm_S (w4[0], w4[1], selector); c1[2] = hc_byte_perm_S (w3[3], w4[0], selector); c1[1] = hc_byte_perm_S (w3[2], w3[3], selector); c1[0] = hc_byte_perm_S (w3[1], w3[2], selector); c0[3] = hc_byte_perm_S (w3[0], w3[1], selector); c0[2] = hc_byte_perm_S (w2[3], w3[0], selector); c0[1] = hc_byte_perm_S (w2[2], w2[3], selector); c0[0] = hc_byte_perm_S (w2[1], w2[2], selector); w7[3] = hc_byte_perm_S (w2[0], w2[1], selector); w7[2] = hc_byte_perm_S (w1[3], w2[0], selector); w7[1] = hc_byte_perm_S (w1[2], w1[3], selector); w7[0] = hc_byte_perm_S (w1[1], w1[2], selector); w6[3] = hc_byte_perm_S (w1[0], w1[1], selector); w6[2] = hc_byte_perm_S (w0[3], w1[0], selector); w6[1] = hc_byte_perm_S (w0[2], w0[3], selector); w6[0] = hc_byte_perm_S (w0[1], w0[2], selector); w5[3] = hc_byte_perm_S (w0[0], w0[1], selector); w5[2] = hc_byte_perm_S ( 0, w0[0], selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_byte_perm_S (w7[3], 0, selector); c5[2] = hc_byte_perm_S (w7[2], w7[3], selector); c5[1] = hc_byte_perm_S (w7[1], w7[2], selector); c5[0] = hc_byte_perm_S (w7[0], w7[1], selector); c4[3] = hc_byte_perm_S (w6[3], w7[0], selector); c4[2] = hc_byte_perm_S (w6[2], w6[3], selector); c4[1] = hc_byte_perm_S (w6[1], w6[2], selector); c4[0] = hc_byte_perm_S (w6[0], w6[1], selector); c3[3] = hc_byte_perm_S (w5[3], w6[0], selector); c3[2] = hc_byte_perm_S (w5[2], w5[3], selector); c3[1] = hc_byte_perm_S (w5[1], w5[2], selector); c3[0] = hc_byte_perm_S (w5[0], w5[1], selector); c2[3] = hc_byte_perm_S (w4[3], w5[0], selector); c2[2] = hc_byte_perm_S (w4[2], w4[3], selector); c2[1] = hc_byte_perm_S (w4[1], w4[2], selector); c2[0] = hc_byte_perm_S (w4[0], w4[1], selector); c1[3] = hc_byte_perm_S (w3[3], w4[0], selector); c1[2] = hc_byte_perm_S (w3[2], w3[3], selector); c1[1] = hc_byte_perm_S (w3[1], w3[2], selector); c1[0] = hc_byte_perm_S (w3[0], w3[1], selector); c0[3] = hc_byte_perm_S (w2[3], w3[0], selector); c0[2] = hc_byte_perm_S (w2[2], w2[3], selector); c0[1] = hc_byte_perm_S (w2[1], w2[2], selector); c0[0] = hc_byte_perm_S (w2[0], w2[1], selector); w7[3] = hc_byte_perm_S (w1[3], w2[0], selector); w7[2] = hc_byte_perm_S (w1[2], w1[3], selector); w7[1] = hc_byte_perm_S (w1[1], w1[2], selector); w7[0] = hc_byte_perm_S (w1[0], w1[1], selector); w6[3] = hc_byte_perm_S (w0[3], w1[0], selector); w6[2] = hc_byte_perm_S (w0[2], w0[3], selector); w6[1] = hc_byte_perm_S (w0[1], w0[2], selector); w6[0] = hc_byte_perm_S (w0[0], w0[1], selector); w5[3] = hc_byte_perm_S ( 0, w0[0], selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_byte_perm_S (w7[3], 0, selector); c5[3] = hc_byte_perm_S (w7[2], w7[3], selector); c5[2] = hc_byte_perm_S (w7[1], w7[2], selector); c5[1] = hc_byte_perm_S (w7[0], w7[1], selector); c5[0] = hc_byte_perm_S (w6[3], w7[0], selector); c4[3] = hc_byte_perm_S (w6[2], w6[3], selector); c4[2] = hc_byte_perm_S (w6[1], w6[2], selector); c4[1] = hc_byte_perm_S (w6[0], w6[1], selector); c4[0] = hc_byte_perm_S (w5[3], w6[0], selector); c3[3] = hc_byte_perm_S (w5[2], w5[3], selector); c3[2] = hc_byte_perm_S (w5[1], w5[2], selector); c3[1] = hc_byte_perm_S (w5[0], w5[1], selector); c3[0] = hc_byte_perm_S (w4[3], w5[0], selector); c2[3] = hc_byte_perm_S (w4[2], w4[3], selector); c2[2] = hc_byte_perm_S (w4[1], w4[2], selector); c2[1] = hc_byte_perm_S (w4[0], w4[1], selector); c2[0] = hc_byte_perm_S (w3[3], w4[0], selector); c1[3] = hc_byte_perm_S (w3[2], w3[3], selector); c1[2] = hc_byte_perm_S (w3[1], w3[2], selector); c1[1] = hc_byte_perm_S (w3[0], w3[1], selector); c1[0] = hc_byte_perm_S (w2[3], w3[0], selector); c0[3] = hc_byte_perm_S (w2[2], w2[3], selector); c0[2] = hc_byte_perm_S (w2[1], w2[2], selector); c0[1] = hc_byte_perm_S (w2[0], w2[1], selector); c0[0] = hc_byte_perm_S (w1[3], w2[0], selector); w7[3] = hc_byte_perm_S (w1[2], w1[3], selector); w7[2] = hc_byte_perm_S (w1[1], w1[2], selector); w7[1] = hc_byte_perm_S (w1[0], w1[1], selector); w7[0] = hc_byte_perm_S (w0[3], w1[0], selector); w6[3] = hc_byte_perm_S (w0[2], w0[3], selector); w6[2] = hc_byte_perm_S (w0[1], w0[2], selector); w6[1] = hc_byte_perm_S (w0[0], w0[1], selector); w6[0] = hc_byte_perm_S ( 0, w0[0], selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_byte_perm_S (w7[3], 0, selector); c6[0] = hc_byte_perm_S (w7[2], w7[3], selector); c5[3] = hc_byte_perm_S (w7[1], w7[2], selector); c5[2] = hc_byte_perm_S (w7[0], w7[1], selector); c5[1] = hc_byte_perm_S (w6[3], w7[0], selector); c5[0] = hc_byte_perm_S (w6[2], w6[3], selector); c4[3] = hc_byte_perm_S (w6[1], w6[2], selector); c4[2] = hc_byte_perm_S (w6[0], w6[1], selector); c4[1] = hc_byte_perm_S (w5[3], w6[0], selector); c4[0] = hc_byte_perm_S (w5[2], w5[3], selector); c3[3] = hc_byte_perm_S (w5[1], w5[2], selector); c3[2] = hc_byte_perm_S (w5[0], w5[1], selector); c3[1] = hc_byte_perm_S (w4[3], w5[0], selector); c3[0] = hc_byte_perm_S (w4[2], w4[3], selector); c2[3] = hc_byte_perm_S (w4[1], w4[2], selector); c2[2] = hc_byte_perm_S (w4[0], w4[1], selector); c2[1] = hc_byte_perm_S (w3[3], w4[0], selector); c2[0] = hc_byte_perm_S (w3[2], w3[3], selector); c1[3] = hc_byte_perm_S (w3[1], w3[2], selector); c1[2] = hc_byte_perm_S (w3[0], w3[1], selector); c1[1] = hc_byte_perm_S (w2[3], w3[0], selector); c1[0] = hc_byte_perm_S (w2[2], w2[3], selector); c0[3] = hc_byte_perm_S (w2[1], w2[2], selector); c0[2] = hc_byte_perm_S (w2[0], w2[1], selector); c0[1] = hc_byte_perm_S (w1[3], w2[0], selector); c0[0] = hc_byte_perm_S (w1[2], w1[3], selector); w7[3] = hc_byte_perm_S (w1[1], w1[2], selector); w7[2] = hc_byte_perm_S (w1[0], w1[1], selector); w7[1] = hc_byte_perm_S (w0[3], w1[0], selector); w7[0] = hc_byte_perm_S (w0[2], w0[3], selector); w6[3] = hc_byte_perm_S (w0[1], w0[2], selector); w6[2] = hc_byte_perm_S (w0[0], w0[1], selector); w6[1] = hc_byte_perm_S ( 0, w0[0], selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_byte_perm_S (w7[3], 0, selector); c6[1] = hc_byte_perm_S (w7[2], w7[3], selector); c6[0] = hc_byte_perm_S (w7[1], w7[2], selector); c5[3] = hc_byte_perm_S (w7[0], w7[1], selector); c5[2] = hc_byte_perm_S (w6[3], w7[0], selector); c5[1] = hc_byte_perm_S (w6[2], w6[3], selector); c5[0] = hc_byte_perm_S (w6[1], w6[2], selector); c4[3] = hc_byte_perm_S (w6[0], w6[1], selector); c4[2] = hc_byte_perm_S (w5[3], w6[0], selector); c4[1] = hc_byte_perm_S (w5[2], w5[3], selector); c4[0] = hc_byte_perm_S (w5[1], w5[2], selector); c3[3] = hc_byte_perm_S (w5[0], w5[1], selector); c3[2] = hc_byte_perm_S (w4[3], w5[0], selector); c3[1] = hc_byte_perm_S (w4[2], w4[3], selector); c3[0] = hc_byte_perm_S (w4[1], w4[2], selector); c2[3] = hc_byte_perm_S (w4[0], w4[1], selector); c2[2] = hc_byte_perm_S (w3[3], w4[0], selector); c2[1] = hc_byte_perm_S (w3[2], w3[3], selector); c2[0] = hc_byte_perm_S (w3[1], w3[2], selector); c1[3] = hc_byte_perm_S (w3[0], w3[1], selector); c1[2] = hc_byte_perm_S (w2[3], w3[0], selector); c1[1] = hc_byte_perm_S (w2[2], w2[3], selector); c1[0] = hc_byte_perm_S (w2[1], w2[2], selector); c0[3] = hc_byte_perm_S (w2[0], w2[1], selector); c0[2] = hc_byte_perm_S (w1[3], w2[0], selector); c0[1] = hc_byte_perm_S (w1[2], w1[3], selector); c0[0] = hc_byte_perm_S (w1[1], w1[2], selector); w7[3] = hc_byte_perm_S (w1[0], w1[1], selector); w7[2] = hc_byte_perm_S (w0[3], w1[0], selector); w7[1] = hc_byte_perm_S (w0[2], w0[3], selector); w7[0] = hc_byte_perm_S (w0[1], w0[2], selector); w6[3] = hc_byte_perm_S (w0[0], w0[1], selector); w6[2] = hc_byte_perm_S ( 0, w0[0], selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_byte_perm_S (w7[3], 0, selector); c6[2] = hc_byte_perm_S (w7[2], w7[3], selector); c6[1] = hc_byte_perm_S (w7[1], w7[2], selector); c6[0] = hc_byte_perm_S (w7[0], w7[1], selector); c5[3] = hc_byte_perm_S (w6[3], w7[0], selector); c5[2] = hc_byte_perm_S (w6[2], w6[3], selector); c5[1] = hc_byte_perm_S (w6[1], w6[2], selector); c5[0] = hc_byte_perm_S (w6[0], w6[1], selector); c4[3] = hc_byte_perm_S (w5[3], w6[0], selector); c4[2] = hc_byte_perm_S (w5[2], w5[3], selector); c4[1] = hc_byte_perm_S (w5[1], w5[2], selector); c4[0] = hc_byte_perm_S (w5[0], w5[1], selector); c3[3] = hc_byte_perm_S (w4[3], w5[0], selector); c3[2] = hc_byte_perm_S (w4[2], w4[3], selector); c3[1] = hc_byte_perm_S (w4[1], w4[2], selector); c3[0] = hc_byte_perm_S (w4[0], w4[1], selector); c2[3] = hc_byte_perm_S (w3[3], w4[0], selector); c2[2] = hc_byte_perm_S (w3[2], w3[3], selector); c2[1] = hc_byte_perm_S (w3[1], w3[2], selector); c2[0] = hc_byte_perm_S (w3[0], w3[1], selector); c1[3] = hc_byte_perm_S (w2[3], w3[0], selector); c1[2] = hc_byte_perm_S (w2[2], w2[3], selector); c1[1] = hc_byte_perm_S (w2[1], w2[2], selector); c1[0] = hc_byte_perm_S (w2[0], w2[1], selector); c0[3] = hc_byte_perm_S (w1[3], w2[0], selector); c0[2] = hc_byte_perm_S (w1[2], w1[3], selector); c0[1] = hc_byte_perm_S (w1[1], w1[2], selector); c0[0] = hc_byte_perm_S (w1[0], w1[1], selector); w7[3] = hc_byte_perm_S (w0[3], w1[0], selector); w7[2] = hc_byte_perm_S (w0[2], w0[3], selector); w7[1] = hc_byte_perm_S (w0[1], w0[2], selector); w7[0] = hc_byte_perm_S (w0[0], w0[1], selector); w6[3] = hc_byte_perm_S ( 0, w0[0], selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_byte_perm_S (w7[3], 0, selector); c6[3] = hc_byte_perm_S (w7[2], w7[3], selector); c6[2] = hc_byte_perm_S (w7[1], w7[2], selector); c6[1] = hc_byte_perm_S (w7[0], w7[1], selector); c6[0] = hc_byte_perm_S (w6[3], w7[0], selector); c5[3] = hc_byte_perm_S (w6[2], w6[3], selector); c5[2] = hc_byte_perm_S (w6[1], w6[2], selector); c5[1] = hc_byte_perm_S (w6[0], w6[1], selector); c5[0] = hc_byte_perm_S (w5[3], w6[0], selector); c4[3] = hc_byte_perm_S (w5[2], w5[3], selector); c4[2] = hc_byte_perm_S (w5[1], w5[2], selector); c4[1] = hc_byte_perm_S (w5[0], w5[1], selector); c4[0] = hc_byte_perm_S (w4[3], w5[0], selector); c3[3] = hc_byte_perm_S (w4[2], w4[3], selector); c3[2] = hc_byte_perm_S (w4[1], w4[2], selector); c3[1] = hc_byte_perm_S (w4[0], w4[1], selector); c3[0] = hc_byte_perm_S (w3[3], w4[0], selector); c2[3] = hc_byte_perm_S (w3[2], w3[3], selector); c2[2] = hc_byte_perm_S (w3[1], w3[2], selector); c2[1] = hc_byte_perm_S (w3[0], w3[1], selector); c2[0] = hc_byte_perm_S (w2[3], w3[0], selector); c1[3] = hc_byte_perm_S (w2[2], w2[3], selector); c1[2] = hc_byte_perm_S (w2[1], w2[2], selector); c1[1] = hc_byte_perm_S (w2[0], w2[1], selector); c1[0] = hc_byte_perm_S (w1[3], w2[0], selector); c0[3] = hc_byte_perm_S (w1[2], w1[3], selector); c0[2] = hc_byte_perm_S (w1[1], w1[2], selector); c0[1] = hc_byte_perm_S (w1[0], w1[1], selector); c0[0] = hc_byte_perm_S (w0[3], w1[0], selector); w7[3] = hc_byte_perm_S (w0[2], w0[3], selector); w7[2] = hc_byte_perm_S (w0[1], w0[2], selector); w7[1] = hc_byte_perm_S (w0[0], w0[1], selector); w7[0] = hc_byte_perm_S ( 0, w0[0], selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_byte_perm_S (w7[3], 0, selector); c7[0] = hc_byte_perm_S (w7[2], w7[3], selector); c6[3] = hc_byte_perm_S (w7[1], w7[2], selector); c6[2] = hc_byte_perm_S (w7[0], w7[1], selector); c6[1] = hc_byte_perm_S (w6[3], w7[0], selector); c6[0] = hc_byte_perm_S (w6[2], w6[3], selector); c5[3] = hc_byte_perm_S (w6[1], w6[2], selector); c5[2] = hc_byte_perm_S (w6[0], w6[1], selector); c5[1] = hc_byte_perm_S (w5[3], w6[0], selector); c5[0] = hc_byte_perm_S (w5[2], w5[3], selector); c4[3] = hc_byte_perm_S (w5[1], w5[2], selector); c4[2] = hc_byte_perm_S (w5[0], w5[1], selector); c4[1] = hc_byte_perm_S (w4[3], w5[0], selector); c4[0] = hc_byte_perm_S (w4[2], w4[3], selector); c3[3] = hc_byte_perm_S (w4[1], w4[2], selector); c3[2] = hc_byte_perm_S (w4[0], w4[1], selector); c3[1] = hc_byte_perm_S (w3[3], w4[0], selector); c3[0] = hc_byte_perm_S (w3[2], w3[3], selector); c2[3] = hc_byte_perm_S (w3[1], w3[2], selector); c2[2] = hc_byte_perm_S (w3[0], w3[1], selector); c2[1] = hc_byte_perm_S (w2[3], w3[0], selector); c2[0] = hc_byte_perm_S (w2[2], w2[3], selector); c1[3] = hc_byte_perm_S (w2[1], w2[2], selector); c1[2] = hc_byte_perm_S (w2[0], w2[1], selector); c1[1] = hc_byte_perm_S (w1[3], w2[0], selector); c1[0] = hc_byte_perm_S (w1[2], w1[3], selector); c0[3] = hc_byte_perm_S (w1[1], w1[2], selector); c0[2] = hc_byte_perm_S (w1[0], w1[1], selector); c0[1] = hc_byte_perm_S (w0[3], w1[0], selector); c0[0] = hc_byte_perm_S (w0[2], w0[3], selector); w7[3] = hc_byte_perm_S (w0[1], w0[2], selector); w7[2] = hc_byte_perm_S (w0[0], w0[1], selector); w7[1] = hc_byte_perm_S ( 0, w0[0], selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_byte_perm_S (w7[3], 0, selector); c7[1] = hc_byte_perm_S (w7[2], w7[3], selector); c7[0] = hc_byte_perm_S (w7[1], w7[2], selector); c6[3] = hc_byte_perm_S (w7[0], w7[1], selector); c6[2] = hc_byte_perm_S (w6[3], w7[0], selector); c6[1] = hc_byte_perm_S (w6[2], w6[3], selector); c6[0] = hc_byte_perm_S (w6[1], w6[2], selector); c5[3] = hc_byte_perm_S (w6[0], w6[1], selector); c5[2] = hc_byte_perm_S (w5[3], w6[0], selector); c5[1] = hc_byte_perm_S (w5[2], w5[3], selector); c5[0] = hc_byte_perm_S (w5[1], w5[2], selector); c4[3] = hc_byte_perm_S (w5[0], w5[1], selector); c4[2] = hc_byte_perm_S (w4[3], w5[0], selector); c4[1] = hc_byte_perm_S (w4[2], w4[3], selector); c4[0] = hc_byte_perm_S (w4[1], w4[2], selector); c3[3] = hc_byte_perm_S (w4[0], w4[1], selector); c3[2] = hc_byte_perm_S (w3[3], w4[0], selector); c3[1] = hc_byte_perm_S (w3[2], w3[3], selector); c3[0] = hc_byte_perm_S (w3[1], w3[2], selector); c2[3] = hc_byte_perm_S (w3[0], w3[1], selector); c2[2] = hc_byte_perm_S (w2[3], w3[0], selector); c2[1] = hc_byte_perm_S (w2[2], w2[3], selector); c2[0] = hc_byte_perm_S (w2[1], w2[2], selector); c1[3] = hc_byte_perm_S (w2[0], w2[1], selector); c1[2] = hc_byte_perm_S (w1[3], w2[0], selector); c1[1] = hc_byte_perm_S (w1[2], w1[3], selector); c1[0] = hc_byte_perm_S (w1[1], w1[2], selector); c0[3] = hc_byte_perm_S (w1[0], w1[1], selector); c0[2] = hc_byte_perm_S (w0[3], w1[0], selector); c0[1] = hc_byte_perm_S (w0[2], w0[3], selector); c0[0] = hc_byte_perm_S (w0[1], w0[2], selector); w7[3] = hc_byte_perm_S (w0[0], w0[1], selector); w7[2] = hc_byte_perm_S ( 0, w0[0], selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_byte_perm_S (w7[3], 0, selector); c7[2] = hc_byte_perm_S (w7[2], w7[3], selector); c7[1] = hc_byte_perm_S (w7[1], w7[2], selector); c7[0] = hc_byte_perm_S (w7[0], w7[1], selector); c6[3] = hc_byte_perm_S (w6[3], w7[0], selector); c6[2] = hc_byte_perm_S (w6[2], w6[3], selector); c6[1] = hc_byte_perm_S (w6[1], w6[2], selector); c6[0] = hc_byte_perm_S (w6[0], w6[1], selector); c5[3] = hc_byte_perm_S (w5[3], w6[0], selector); c5[2] = hc_byte_perm_S (w5[2], w5[3], selector); c5[1] = hc_byte_perm_S (w5[1], w5[2], selector); c5[0] = hc_byte_perm_S (w5[0], w5[1], selector); c4[3] = hc_byte_perm_S (w4[3], w5[0], selector); c4[2] = hc_byte_perm_S (w4[2], w4[3], selector); c4[1] = hc_byte_perm_S (w4[1], w4[2], selector); c4[0] = hc_byte_perm_S (w4[0], w4[1], selector); c3[3] = hc_byte_perm_S (w3[3], w4[0], selector); c3[2] = hc_byte_perm_S (w3[2], w3[3], selector); c3[1] = hc_byte_perm_S (w3[1], w3[2], selector); c3[0] = hc_byte_perm_S (w3[0], w3[1], selector); c2[3] = hc_byte_perm_S (w2[3], w3[0], selector); c2[2] = hc_byte_perm_S (w2[2], w2[3], selector); c2[1] = hc_byte_perm_S (w2[1], w2[2], selector); c2[0] = hc_byte_perm_S (w2[0], w2[1], selector); c1[3] = hc_byte_perm_S (w1[3], w2[0], selector); c1[2] = hc_byte_perm_S (w1[2], w1[3], selector); c1[1] = hc_byte_perm_S (w1[1], w1[2], selector); c1[0] = hc_byte_perm_S (w1[0], w1[1], selector); c0[3] = hc_byte_perm_S (w0[3], w1[0], selector); c0[2] = hc_byte_perm_S (w0[2], w0[3], selector); c0[1] = hc_byte_perm_S (w0[1], w0[2], selector); c0[0] = hc_byte_perm_S (w0[0], w0[1], selector); w7[3] = hc_byte_perm_S ( 0, w0[0], selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w7[3] = hc_bytealign_be_S (w7[2], w7[3], offset); w7[2] = hc_bytealign_be_S (w7[1], w7[2], offset); w7[1] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[0] = hc_bytealign_be_S (w6[3], w7[0], offset); w6[3] = hc_bytealign_be_S (w6[2], w6[3], offset); w6[2] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[1] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[0] = hc_bytealign_be_S (w5[3], w6[0], offset); w5[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w5[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w4[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w4[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w3[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; case 1: w7[3] = hc_bytealign_be_S (w7[1], w7[2], offset); w7[2] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[1] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[0] = hc_bytealign_be_S (w6[2], w6[3], offset); w6[3] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[2] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[1] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[0] = hc_bytealign_be_S (w5[2], w5[3], offset); w5[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w4[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: w7[3] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[2] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[1] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[0] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[3] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[2] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[1] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[0] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[2] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[1] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[0] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[3] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[2] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[1] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[0] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[2] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[1] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[0] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_be_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[2] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[1] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[0] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_be_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[2] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[1] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[0] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_be_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[2] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[1] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[0] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_be_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_be_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_be_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_be_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_be_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_be_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_be_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_be_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_be_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[0] = hc_bytealign_be_S ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[1] = hc_bytealign_be_S ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[2] = hc_bytealign_be_S ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[3] = hc_bytealign_be_S ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[0] = hc_bytealign_be_S ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[1] = hc_bytealign_be_S ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[2] = hc_bytealign_be_S ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[3] = hc_bytealign_be_S ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[0] = hc_bytealign_be_S ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[1] = hc_bytealign_be_S ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[2] = hc_bytealign_be_S ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[3] = hc_bytealign_be_S ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[0] = hc_bytealign_be_S ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[1] = hc_bytealign_be_S ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[2] = hc_bytealign_be_S ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_bytealign_be_S ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; case 1: w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; case 2: w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); w1[0] = hc_byte_perm_S (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); w1[1] = hc_byte_perm_S (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); w1[2] = hc_byte_perm_S (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); w1[3] = hc_byte_perm_S (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); w2[0] = hc_byte_perm_S (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); w2[1] = hc_byte_perm_S (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); w2[2] = hc_byte_perm_S (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); w2[3] = hc_byte_perm_S (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); w3[0] = hc_byte_perm_S (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); w3[1] = hc_byte_perm_S (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); w3[2] = hc_byte_perm_S (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); w3[3] = hc_byte_perm_S (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); w4[0] = hc_byte_perm_S (w0[0], 0, selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); w4[1] = hc_byte_perm_S (w0[0], 0, selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); w4[2] = hc_byte_perm_S (w0[0], 0, selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); w4[3] = hc_byte_perm_S (w0[0], 0, selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); w5[0] = hc_byte_perm_S (w0[0], 0, selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); w5[1] = hc_byte_perm_S (w0[0], 0, selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); w5[2] = hc_byte_perm_S (w0[0], 0, selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); w5[3] = hc_byte_perm_S (w0[0], 0, selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); w6[0] = hc_byte_perm_S (w0[0], 0, selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); w6[1] = hc_byte_perm_S (w0[0], 0, selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); w6[2] = hc_byte_perm_S (w0[0], 0, selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); w6[3] = hc_byte_perm_S (w0[0], 0, selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); w7[0] = hc_byte_perm_S (w0[0], 0, selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); w7[1] = hc_byte_perm_S (w0[0], 0, selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); w7[2] = hc_byte_perm_S (w0[0], 0, selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: w7[3] = hc_byte_perm_S (w0[0], 0, selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: c0[0] = hc_bytealign_be_S (w7[3], 0, offset); w7[3] = hc_bytealign_be_S (w7[2], w7[3], offset); w7[2] = hc_bytealign_be_S (w7[1], w7[2], offset); w7[1] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[0] = hc_bytealign_be_S (w6[3], w7[0], offset); w6[3] = hc_bytealign_be_S (w6[2], w6[3], offset); w6[2] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[1] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[0] = hc_bytealign_be_S (w5[3], w6[0], offset); w5[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w5[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w4[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w4[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w3[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w2[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w1[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w0[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[0] = hc_bytealign_be_S ( 0, w0[0], offset); break; case 1: c0[1] = hc_bytealign_be_S (w7[3], 0, offset); c0[0] = hc_bytealign_be_S (w7[2], w7[3], offset); w7[3] = hc_bytealign_be_S (w7[1], w7[2], offset); w7[2] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[1] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[0] = hc_bytealign_be_S (w6[2], w6[3], offset); w6[3] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[2] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[1] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[0] = hc_bytealign_be_S (w5[2], w5[3], offset); w5[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w4[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w3[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w2[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w1[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w0[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[1] = hc_bytealign_be_S ( 0, w0[0], offset); w0[0] = 0; break; case 2: c0[2] = hc_bytealign_be_S (w7[3], 0, offset); c0[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c0[0] = hc_bytealign_be_S (w7[1], w7[2], offset); w7[3] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[2] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[1] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[0] = hc_bytealign_be_S (w6[1], w6[2], offset); w6[3] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[2] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[1] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[0] = hc_bytealign_be_S (w5[1], w5[2], offset); w5[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w4[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w3[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w2[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w1[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w0[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[2] = hc_bytealign_be_S ( 0, w0[0], offset); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_bytealign_be_S (w7[3], 0, offset); c0[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c0[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c0[0] = hc_bytealign_be_S (w7[0], w7[1], offset); w7[3] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[2] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[1] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[0] = hc_bytealign_be_S (w6[0], w6[1], offset); w6[3] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[2] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[1] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[0] = hc_bytealign_be_S (w5[0], w5[1], offset); w5[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w4[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w3[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w2[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w1[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w0[3] = hc_bytealign_be_S ( 0, w0[0], offset); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_bytealign_be_S (w7[3], 0, offset); c0[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c0[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c0[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c0[0] = hc_bytealign_be_S (w6[3], w7[0], offset); w7[3] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[2] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[1] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[0] = hc_bytealign_be_S (w5[3], w6[0], offset); w6[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w5[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w4[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w3[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w2[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w1[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[0] = hc_bytealign_be_S ( 0, w0[0], offset); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_bytealign_be_S (w7[3], 0, offset); c1[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c0[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c0[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c0[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c0[0] = hc_bytealign_be_S (w6[2], w6[3], offset); w7[3] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[2] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[1] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[0] = hc_bytealign_be_S (w5[2], w5[3], offset); w6[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w5[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w4[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w3[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w2[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w1[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[1] = hc_bytealign_be_S ( 0, w0[0], offset); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_bytealign_be_S (w7[3], 0, offset); c1[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c1[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c0[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c0[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c0[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c0[0] = hc_bytealign_be_S (w6[1], w6[2], offset); w7[3] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[2] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[1] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[0] = hc_bytealign_be_S (w5[1], w5[2], offset); w6[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w5[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w4[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w3[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w2[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w1[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[2] = hc_bytealign_be_S ( 0, w0[0], offset); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_bytealign_be_S (w7[3], 0, offset); c1[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c1[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c1[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c0[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c0[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c0[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c0[0] = hc_bytealign_be_S (w6[0], w6[1], offset); w7[3] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[2] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[1] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[0] = hc_bytealign_be_S (w5[0], w5[1], offset); w6[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w5[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w4[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w3[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w2[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w1[3] = hc_bytealign_be_S ( 0, w0[0], offset); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_bytealign_be_S (w7[3], 0, offset); c1[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c1[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c1[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c1[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c0[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c0[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c0[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c0[0] = hc_bytealign_be_S (w5[3], w6[0], offset); w7[3] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[2] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[1] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w6[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w5[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w4[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w3[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w2[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[0] = hc_bytealign_be_S ( 0, w0[0], offset); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_bytealign_be_S (w7[3], 0, offset); c2[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c1[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c1[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c1[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c1[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c0[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c0[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c0[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c0[0] = hc_bytealign_be_S (w5[2], w5[3], offset); w7[3] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[2] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[1] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w6[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w5[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w4[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w3[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w2[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[1] = hc_bytealign_be_S ( 0, w0[0], offset); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_bytealign_be_S (w7[3], 0, offset); c2[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c2[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c1[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c1[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c1[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c1[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c0[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c0[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c0[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c0[0] = hc_bytealign_be_S (w5[1], w5[2], offset); w7[3] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[2] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[1] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w6[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w5[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w4[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w3[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w2[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[2] = hc_bytealign_be_S ( 0, w0[0], offset); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_bytealign_be_S (w7[3], 0, offset); c2[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c2[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c2[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c1[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c1[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c1[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c1[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c0[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c0[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c0[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c0[0] = hc_bytealign_be_S (w5[0], w5[1], offset); w7[3] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[2] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[1] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w6[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w5[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w4[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w3[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w2[3] = hc_bytealign_be_S ( 0, w0[0], offset); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_bytealign_be_S (w7[3], 0, offset); c2[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c2[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c2[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c2[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c1[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c1[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c1[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c1[0] = hc_bytealign_be_S (w5[3], w6[0], offset); c0[3] = hc_bytealign_be_S (w5[2], w5[3], offset); c0[2] = hc_bytealign_be_S (w5[1], w5[2], offset); c0[1] = hc_bytealign_be_S (w5[0], w5[1], offset); c0[0] = hc_bytealign_be_S (w4[3], w5[0], offset); w7[3] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[2] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[1] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w6[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w5[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w4[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w3[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[0] = hc_bytealign_be_S ( 0, w0[0], offset); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_bytealign_be_S (w7[3], 0, offset); c3[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c2[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c2[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c2[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c2[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c1[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c1[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c1[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c1[0] = hc_bytealign_be_S (w5[2], w5[3], offset); c0[3] = hc_bytealign_be_S (w5[1], w5[2], offset); c0[2] = hc_bytealign_be_S (w5[0], w5[1], offset); c0[1] = hc_bytealign_be_S (w4[3], w5[0], offset); c0[0] = hc_bytealign_be_S (w4[2], w4[3], offset); w7[3] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[2] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[1] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w6[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w5[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w4[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w3[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[1] = hc_bytealign_be_S ( 0, w0[0], offset); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_bytealign_be_S (w7[3], 0, offset); c3[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c3[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c2[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c2[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c2[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c2[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c1[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c1[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c1[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c1[0] = hc_bytealign_be_S (w5[1], w5[2], offset); c0[3] = hc_bytealign_be_S (w5[0], w5[1], offset); c0[2] = hc_bytealign_be_S (w4[3], w5[0], offset); c0[1] = hc_bytealign_be_S (w4[2], w4[3], offset); c0[0] = hc_bytealign_be_S (w4[1], w4[2], offset); w7[3] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[2] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[1] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w6[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w5[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w4[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w3[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[2] = hc_bytealign_be_S ( 0, w0[0], offset); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_bytealign_be_S (w7[3], 0, offset); c3[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c3[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c3[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c2[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c2[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c2[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c2[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c1[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c1[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c1[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c1[0] = hc_bytealign_be_S (w5[0], w5[1], offset); c0[3] = hc_bytealign_be_S (w4[3], w5[0], offset); c0[2] = hc_bytealign_be_S (w4[2], w4[3], offset); c0[1] = hc_bytealign_be_S (w4[1], w4[2], offset); c0[0] = hc_bytealign_be_S (w4[0], w4[1], offset); w7[3] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[2] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[1] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w6[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w5[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w4[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w3[3] = hc_bytealign_be_S ( 0, w0[0], offset); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_bytealign_be_S (w7[3], 0, offset); c3[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c3[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c3[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c3[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c2[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c2[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c2[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c2[0] = hc_bytealign_be_S (w5[3], w6[0], offset); c1[3] = hc_bytealign_be_S (w5[2], w5[3], offset); c1[2] = hc_bytealign_be_S (w5[1], w5[2], offset); c1[1] = hc_bytealign_be_S (w5[0], w5[1], offset); c1[0] = hc_bytealign_be_S (w4[3], w5[0], offset); c0[3] = hc_bytealign_be_S (w4[2], w4[3], offset); c0[2] = hc_bytealign_be_S (w4[1], w4[2], offset); c0[1] = hc_bytealign_be_S (w4[0], w4[1], offset); c0[0] = hc_bytealign_be_S (w3[3], w4[0], offset); w7[3] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[2] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[1] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w6[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w5[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w4[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[0] = hc_bytealign_be_S ( 0, w0[0], offset); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_bytealign_be_S (w7[3], 0, offset); c4[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c3[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c3[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c3[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c3[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c2[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c2[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c2[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c2[0] = hc_bytealign_be_S (w5[2], w5[3], offset); c1[3] = hc_bytealign_be_S (w5[1], w5[2], offset); c1[2] = hc_bytealign_be_S (w5[0], w5[1], offset); c1[1] = hc_bytealign_be_S (w4[3], w5[0], offset); c1[0] = hc_bytealign_be_S (w4[2], w4[3], offset); c0[3] = hc_bytealign_be_S (w4[1], w4[2], offset); c0[2] = hc_bytealign_be_S (w4[0], w4[1], offset); c0[1] = hc_bytealign_be_S (w3[3], w4[0], offset); c0[0] = hc_bytealign_be_S (w3[2], w3[3], offset); w7[3] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[2] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[1] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w6[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w5[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w4[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[1] = hc_bytealign_be_S ( 0, w0[0], offset); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_bytealign_be_S (w7[3], 0, offset); c4[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c4[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c3[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c3[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c3[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c3[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c2[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c2[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c2[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c2[0] = hc_bytealign_be_S (w5[1], w5[2], offset); c1[3] = hc_bytealign_be_S (w5[0], w5[1], offset); c1[2] = hc_bytealign_be_S (w4[3], w5[0], offset); c1[1] = hc_bytealign_be_S (w4[2], w4[3], offset); c1[0] = hc_bytealign_be_S (w4[1], w4[2], offset); c0[3] = hc_bytealign_be_S (w4[0], w4[1], offset); c0[2] = hc_bytealign_be_S (w3[3], w4[0], offset); c0[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[0] = hc_bytealign_be_S (w3[1], w3[2], offset); w7[3] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[2] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[1] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w6[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w5[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w4[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[2] = hc_bytealign_be_S ( 0, w0[0], offset); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_bytealign_be_S (w7[3], 0, offset); c4[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c4[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c4[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c3[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c3[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c3[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c3[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c2[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c2[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c2[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c2[0] = hc_bytealign_be_S (w5[0], w5[1], offset); c1[3] = hc_bytealign_be_S (w4[3], w5[0], offset); c1[2] = hc_bytealign_be_S (w4[2], w4[3], offset); c1[1] = hc_bytealign_be_S (w4[1], w4[2], offset); c1[0] = hc_bytealign_be_S (w4[0], w4[1], offset); c0[3] = hc_bytealign_be_S (w3[3], w4[0], offset); c0[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[0] = hc_bytealign_be_S (w3[0], w3[1], offset); w7[3] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[2] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[1] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w6[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w5[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w4[3] = hc_bytealign_be_S ( 0, w0[0], offset); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_bytealign_be_S (w7[3], 0, offset); c4[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c4[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c4[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c4[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c3[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c3[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c3[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c3[0] = hc_bytealign_be_S (w5[3], w6[0], offset); c2[3] = hc_bytealign_be_S (w5[2], w5[3], offset); c2[2] = hc_bytealign_be_S (w5[1], w5[2], offset); c2[1] = hc_bytealign_be_S (w5[0], w5[1], offset); c2[0] = hc_bytealign_be_S (w4[3], w5[0], offset); c1[3] = hc_bytealign_be_S (w4[2], w4[3], offset); c1[2] = hc_bytealign_be_S (w4[1], w4[2], offset); c1[1] = hc_bytealign_be_S (w4[0], w4[1], offset); c1[0] = hc_bytealign_be_S (w3[3], w4[0], offset); c0[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[0] = hc_bytealign_be_S (w2[3], w3[0], offset); w7[3] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[2] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[1] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w6[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w5[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[0] = hc_bytealign_be_S ( 0, w0[0], offset); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_bytealign_be_S (w7[3], 0, offset); c5[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c4[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c4[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c4[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c4[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c3[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c3[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c3[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c3[0] = hc_bytealign_be_S (w5[2], w5[3], offset); c2[3] = hc_bytealign_be_S (w5[1], w5[2], offset); c2[2] = hc_bytealign_be_S (w5[0], w5[1], offset); c2[1] = hc_bytealign_be_S (w4[3], w5[0], offset); c2[0] = hc_bytealign_be_S (w4[2], w4[3], offset); c1[3] = hc_bytealign_be_S (w4[1], w4[2], offset); c1[2] = hc_bytealign_be_S (w4[0], w4[1], offset); c1[1] = hc_bytealign_be_S (w3[3], w4[0], offset); c1[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c0[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[0] = hc_bytealign_be_S (w2[2], w2[3], offset); w7[3] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[2] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[1] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w6[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w5[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[1] = hc_bytealign_be_S ( 0, w0[0], offset); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_bytealign_be_S (w7[3], 0, offset); c5[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c5[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c4[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c4[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c4[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c4[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c3[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c3[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c3[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c3[0] = hc_bytealign_be_S (w5[1], w5[2], offset); c2[3] = hc_bytealign_be_S (w5[0], w5[1], offset); c2[2] = hc_bytealign_be_S (w4[3], w5[0], offset); c2[1] = hc_bytealign_be_S (w4[2], w4[3], offset); c2[0] = hc_bytealign_be_S (w4[1], w4[2], offset); c1[3] = hc_bytealign_be_S (w4[0], w4[1], offset); c1[2] = hc_bytealign_be_S (w3[3], w4[0], offset); c1[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c0[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[0] = hc_bytealign_be_S (w2[1], w2[2], offset); w7[3] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[2] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[1] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w6[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w5[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[2] = hc_bytealign_be_S ( 0, w0[0], offset); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_bytealign_be_S (w7[3], 0, offset); c5[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c5[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c5[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c4[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c4[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c4[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c4[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c3[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c3[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c3[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c3[0] = hc_bytealign_be_S (w5[0], w5[1], offset); c2[3] = hc_bytealign_be_S (w4[3], w5[0], offset); c2[2] = hc_bytealign_be_S (w4[2], w4[3], offset); c2[1] = hc_bytealign_be_S (w4[1], w4[2], offset); c2[0] = hc_bytealign_be_S (w4[0], w4[1], offset); c1[3] = hc_bytealign_be_S (w3[3], w4[0], offset); c1[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c0[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[0] = hc_bytealign_be_S (w2[0], w2[1], offset); w7[3] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[2] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[1] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w6[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w5[3] = hc_bytealign_be_S ( 0, w0[0], offset); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_bytealign_be_S (w7[3], 0, offset); c5[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c5[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c5[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c5[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c4[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c4[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c4[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c4[0] = hc_bytealign_be_S (w5[3], w6[0], offset); c3[3] = hc_bytealign_be_S (w5[2], w5[3], offset); c3[2] = hc_bytealign_be_S (w5[1], w5[2], offset); c3[1] = hc_bytealign_be_S (w5[0], w5[1], offset); c3[0] = hc_bytealign_be_S (w4[3], w5[0], offset); c2[3] = hc_bytealign_be_S (w4[2], w4[3], offset); c2[2] = hc_bytealign_be_S (w4[1], w4[2], offset); c2[1] = hc_bytealign_be_S (w4[0], w4[1], offset); c2[0] = hc_bytealign_be_S (w3[3], w4[0], offset); c1[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[0] = hc_bytealign_be_S (w2[3], w3[0], offset); c0[3] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[2] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[1] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[0] = hc_bytealign_be_S (w1[3], w2[0], offset); w7[3] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[2] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[1] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w6[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[0] = hc_bytealign_be_S ( 0, w0[0], offset); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_bytealign_be_S (w7[3], 0, offset); c6[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c5[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c5[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c5[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c5[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c4[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c4[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c4[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c4[0] = hc_bytealign_be_S (w5[2], w5[3], offset); c3[3] = hc_bytealign_be_S (w5[1], w5[2], offset); c3[2] = hc_bytealign_be_S (w5[0], w5[1], offset); c3[1] = hc_bytealign_be_S (w4[3], w5[0], offset); c3[0] = hc_bytealign_be_S (w4[2], w4[3], offset); c2[3] = hc_bytealign_be_S (w4[1], w4[2], offset); c2[2] = hc_bytealign_be_S (w4[0], w4[1], offset); c2[1] = hc_bytealign_be_S (w3[3], w4[0], offset); c2[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c1[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[0] = hc_bytealign_be_S (w2[2], w2[3], offset); c0[3] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[2] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[1] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[0] = hc_bytealign_be_S (w1[2], w1[3], offset); w7[3] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[2] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[1] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w6[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[1] = hc_bytealign_be_S ( 0, w0[0], offset); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_bytealign_be_S (w7[3], 0, offset); c6[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c6[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c5[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c5[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c5[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c5[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c4[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c4[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c4[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c4[0] = hc_bytealign_be_S (w5[1], w5[2], offset); c3[3] = hc_bytealign_be_S (w5[0], w5[1], offset); c3[2] = hc_bytealign_be_S (w4[3], w5[0], offset); c3[1] = hc_bytealign_be_S (w4[2], w4[3], offset); c3[0] = hc_bytealign_be_S (w4[1], w4[2], offset); c2[3] = hc_bytealign_be_S (w4[0], w4[1], offset); c2[2] = hc_bytealign_be_S (w3[3], w4[0], offset); c2[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c1[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[0] = hc_bytealign_be_S (w2[1], w2[2], offset); c0[3] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[2] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[1] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[0] = hc_bytealign_be_S (w1[1], w1[2], offset); w7[3] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[2] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[1] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w6[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[2] = hc_bytealign_be_S ( 0, w0[0], offset); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_bytealign_be_S (w7[3], 0, offset); c6[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c6[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c6[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c5[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c5[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c5[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c5[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c4[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c4[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c4[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c4[0] = hc_bytealign_be_S (w5[0], w5[1], offset); c3[3] = hc_bytealign_be_S (w4[3], w5[0], offset); c3[2] = hc_bytealign_be_S (w4[2], w4[3], offset); c3[1] = hc_bytealign_be_S (w4[1], w4[2], offset); c3[0] = hc_bytealign_be_S (w4[0], w4[1], offset); c2[3] = hc_bytealign_be_S (w3[3], w4[0], offset); c2[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c1[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[0] = hc_bytealign_be_S (w2[0], w2[1], offset); c0[3] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[2] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[1] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[0] = hc_bytealign_be_S (w1[0], w1[1], offset); w7[3] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[2] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[1] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w6[3] = hc_bytealign_be_S ( 0, w0[0], offset); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_bytealign_be_S (w7[3], 0, offset); c6[3] = hc_bytealign_be_S (w7[2], w7[3], offset); c6[2] = hc_bytealign_be_S (w7[1], w7[2], offset); c6[1] = hc_bytealign_be_S (w7[0], w7[1], offset); c6[0] = hc_bytealign_be_S (w6[3], w7[0], offset); c5[3] = hc_bytealign_be_S (w6[2], w6[3], offset); c5[2] = hc_bytealign_be_S (w6[1], w6[2], offset); c5[1] = hc_bytealign_be_S (w6[0], w6[1], offset); c5[0] = hc_bytealign_be_S (w5[3], w6[0], offset); c4[3] = hc_bytealign_be_S (w5[2], w5[3], offset); c4[2] = hc_bytealign_be_S (w5[1], w5[2], offset); c4[1] = hc_bytealign_be_S (w5[0], w5[1], offset); c4[0] = hc_bytealign_be_S (w4[3], w5[0], offset); c3[3] = hc_bytealign_be_S (w4[2], w4[3], offset); c3[2] = hc_bytealign_be_S (w4[1], w4[2], offset); c3[1] = hc_bytealign_be_S (w4[0], w4[1], offset); c3[0] = hc_bytealign_be_S (w3[3], w4[0], offset); c2[3] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[2] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[1] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[0] = hc_bytealign_be_S (w2[3], w3[0], offset); c1[3] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[2] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[1] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[0] = hc_bytealign_be_S (w1[3], w2[0], offset); c0[3] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[2] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[1] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[0] = hc_bytealign_be_S (w0[3], w1[0], offset); w7[3] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[2] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[1] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[0] = hc_bytealign_be_S ( 0, w0[0], offset); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_bytealign_be_S (w7[3], 0, offset); c7[0] = hc_bytealign_be_S (w7[2], w7[3], offset); c6[3] = hc_bytealign_be_S (w7[1], w7[2], offset); c6[2] = hc_bytealign_be_S (w7[0], w7[1], offset); c6[1] = hc_bytealign_be_S (w6[3], w7[0], offset); c6[0] = hc_bytealign_be_S (w6[2], w6[3], offset); c5[3] = hc_bytealign_be_S (w6[1], w6[2], offset); c5[2] = hc_bytealign_be_S (w6[0], w6[1], offset); c5[1] = hc_bytealign_be_S (w5[3], w6[0], offset); c5[0] = hc_bytealign_be_S (w5[2], w5[3], offset); c4[3] = hc_bytealign_be_S (w5[1], w5[2], offset); c4[2] = hc_bytealign_be_S (w5[0], w5[1], offset); c4[1] = hc_bytealign_be_S (w4[3], w5[0], offset); c4[0] = hc_bytealign_be_S (w4[2], w4[3], offset); c3[3] = hc_bytealign_be_S (w4[1], w4[2], offset); c3[2] = hc_bytealign_be_S (w4[0], w4[1], offset); c3[1] = hc_bytealign_be_S (w3[3], w4[0], offset); c3[0] = hc_bytealign_be_S (w3[2], w3[3], offset); c2[3] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[2] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[1] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[0] = hc_bytealign_be_S (w2[2], w2[3], offset); c1[3] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[2] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[1] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[0] = hc_bytealign_be_S (w1[2], w1[3], offset); c0[3] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[2] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[1] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[0] = hc_bytealign_be_S (w0[2], w0[3], offset); w7[3] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[2] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[1] = hc_bytealign_be_S ( 0, w0[0], offset); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_bytealign_be_S (w7[3], 0, offset); c7[1] = hc_bytealign_be_S (w7[2], w7[3], offset); c7[0] = hc_bytealign_be_S (w7[1], w7[2], offset); c6[3] = hc_bytealign_be_S (w7[0], w7[1], offset); c6[2] = hc_bytealign_be_S (w6[3], w7[0], offset); c6[1] = hc_bytealign_be_S (w6[2], w6[3], offset); c6[0] = hc_bytealign_be_S (w6[1], w6[2], offset); c5[3] = hc_bytealign_be_S (w6[0], w6[1], offset); c5[2] = hc_bytealign_be_S (w5[3], w6[0], offset); c5[1] = hc_bytealign_be_S (w5[2], w5[3], offset); c5[0] = hc_bytealign_be_S (w5[1], w5[2], offset); c4[3] = hc_bytealign_be_S (w5[0], w5[1], offset); c4[2] = hc_bytealign_be_S (w4[3], w5[0], offset); c4[1] = hc_bytealign_be_S (w4[2], w4[3], offset); c4[0] = hc_bytealign_be_S (w4[1], w4[2], offset); c3[3] = hc_bytealign_be_S (w4[0], w4[1], offset); c3[2] = hc_bytealign_be_S (w3[3], w4[0], offset); c3[1] = hc_bytealign_be_S (w3[2], w3[3], offset); c3[0] = hc_bytealign_be_S (w3[1], w3[2], offset); c2[3] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[2] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[1] = hc_bytealign_be_S (w2[2], w2[3], offset); c2[0] = hc_bytealign_be_S (w2[1], w2[2], offset); c1[3] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[2] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[1] = hc_bytealign_be_S (w1[2], w1[3], offset); c1[0] = hc_bytealign_be_S (w1[1], w1[2], offset); c0[3] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[2] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[1] = hc_bytealign_be_S (w0[2], w0[3], offset); c0[0] = hc_bytealign_be_S (w0[1], w0[2], offset); w7[3] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[2] = hc_bytealign_be_S ( 0, w0[0], offset); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_bytealign_be_S (w7[3], 0, offset); c7[2] = hc_bytealign_be_S (w7[2], w7[3], offset); c7[1] = hc_bytealign_be_S (w7[1], w7[2], offset); c7[0] = hc_bytealign_be_S (w7[0], w7[1], offset); c6[3] = hc_bytealign_be_S (w6[3], w7[0], offset); c6[2] = hc_bytealign_be_S (w6[2], w6[3], offset); c6[1] = hc_bytealign_be_S (w6[1], w6[2], offset); c6[0] = hc_bytealign_be_S (w6[0], w6[1], offset); c5[3] = hc_bytealign_be_S (w5[3], w6[0], offset); c5[2] = hc_bytealign_be_S (w5[2], w5[3], offset); c5[1] = hc_bytealign_be_S (w5[1], w5[2], offset); c5[0] = hc_bytealign_be_S (w5[0], w5[1], offset); c4[3] = hc_bytealign_be_S (w4[3], w5[0], offset); c4[2] = hc_bytealign_be_S (w4[2], w4[3], offset); c4[1] = hc_bytealign_be_S (w4[1], w4[2], offset); c4[0] = hc_bytealign_be_S (w4[0], w4[1], offset); c3[3] = hc_bytealign_be_S (w3[3], w4[0], offset); c3[2] = hc_bytealign_be_S (w3[2], w3[3], offset); c3[1] = hc_bytealign_be_S (w3[1], w3[2], offset); c3[0] = hc_bytealign_be_S (w3[0], w3[1], offset); c2[3] = hc_bytealign_be_S (w2[3], w3[0], offset); c2[2] = hc_bytealign_be_S (w2[2], w2[3], offset); c2[1] = hc_bytealign_be_S (w2[1], w2[2], offset); c2[0] = hc_bytealign_be_S (w2[0], w2[1], offset); c1[3] = hc_bytealign_be_S (w1[3], w2[0], offset); c1[2] = hc_bytealign_be_S (w1[2], w1[3], offset); c1[1] = hc_bytealign_be_S (w1[1], w1[2], offset); c1[0] = hc_bytealign_be_S (w1[0], w1[1], offset); c0[3] = hc_bytealign_be_S (w0[3], w1[0], offset); c0[2] = hc_bytealign_be_S (w0[2], w0[3], offset); c0[1] = hc_bytealign_be_S (w0[1], w0[2], offset); c0[0] = hc_bytealign_be_S (w0[0], w0[1], offset); w7[3] = hc_bytealign_be_S ( 0, w0[0], offset); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: c0[0] = hc_byte_perm_S ( 0, w7[3], selector); w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); w0[0] = hc_byte_perm_S (w0[0], 0, selector); break; case 1: c0[1] = hc_byte_perm_S ( 0, w7[3], selector); c0[0] = hc_byte_perm_S (w7[3], w7[2], selector); w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); w0[1] = hc_byte_perm_S (w0[0], 0, selector); w0[0] = 0; break; case 2: c0[2] = hc_byte_perm_S ( 0, w7[3], selector); c0[1] = hc_byte_perm_S (w7[3], w7[2], selector); c0[0] = hc_byte_perm_S (w7[2], w7[1], selector); w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); w0[2] = hc_byte_perm_S (w0[0], 0, selector); w0[1] = 0; w0[0] = 0; break; case 3: c0[3] = hc_byte_perm_S ( 0, w7[3], selector); c0[2] = hc_byte_perm_S (w7[3], w7[2], selector); c0[1] = hc_byte_perm_S (w7[2], w7[1], selector); c0[0] = hc_byte_perm_S (w7[1], w7[0], selector); w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); w0[3] = hc_byte_perm_S (w0[0], 0, selector); w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 4: c1[0] = hc_byte_perm_S ( 0, w7[3], selector); c0[3] = hc_byte_perm_S (w7[3], w7[2], selector); c0[2] = hc_byte_perm_S (w7[2], w7[1], selector); c0[1] = hc_byte_perm_S (w7[1], w7[0], selector); c0[0] = hc_byte_perm_S (w7[0], w6[3], selector); w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); w1[0] = hc_byte_perm_S (w0[0], 0, selector); w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 5: c1[1] = hc_byte_perm_S ( 0, w7[3], selector); c1[0] = hc_byte_perm_S (w7[3], w7[2], selector); c0[3] = hc_byte_perm_S (w7[2], w7[1], selector); c0[2] = hc_byte_perm_S (w7[1], w7[0], selector); c0[1] = hc_byte_perm_S (w7[0], w6[3], selector); c0[0] = hc_byte_perm_S (w6[3], w6[2], selector); w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); w1[1] = hc_byte_perm_S (w0[0], 0, selector); w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 6: c1[2] = hc_byte_perm_S ( 0, w7[3], selector); c1[1] = hc_byte_perm_S (w7[3], w7[2], selector); c1[0] = hc_byte_perm_S (w7[2], w7[1], selector); c0[3] = hc_byte_perm_S (w7[1], w7[0], selector); c0[2] = hc_byte_perm_S (w7[0], w6[3], selector); c0[1] = hc_byte_perm_S (w6[3], w6[2], selector); c0[0] = hc_byte_perm_S (w6[2], w6[1], selector); w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); w1[2] = hc_byte_perm_S (w0[0], 0, selector); w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 7: c1[3] = hc_byte_perm_S ( 0, w7[3], selector); c1[2] = hc_byte_perm_S (w7[3], w7[2], selector); c1[1] = hc_byte_perm_S (w7[2], w7[1], selector); c1[0] = hc_byte_perm_S (w7[1], w7[0], selector); c0[3] = hc_byte_perm_S (w7[0], w6[3], selector); c0[2] = hc_byte_perm_S (w6[3], w6[2], selector); c0[1] = hc_byte_perm_S (w6[2], w6[1], selector); c0[0] = hc_byte_perm_S (w6[1], w6[0], selector); w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); w1[3] = hc_byte_perm_S (w0[0], 0, selector); w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 8: c2[0] = hc_byte_perm_S ( 0, w7[3], selector); c1[3] = hc_byte_perm_S (w7[3], w7[2], selector); c1[2] = hc_byte_perm_S (w7[2], w7[1], selector); c1[1] = hc_byte_perm_S (w7[1], w7[0], selector); c1[0] = hc_byte_perm_S (w7[0], w6[3], selector); c0[3] = hc_byte_perm_S (w6[3], w6[2], selector); c0[2] = hc_byte_perm_S (w6[2], w6[1], selector); c0[1] = hc_byte_perm_S (w6[1], w6[0], selector); c0[0] = hc_byte_perm_S (w6[0], w5[3], selector); w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); w2[0] = hc_byte_perm_S (w0[0], 0, selector); w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 9: c2[1] = hc_byte_perm_S ( 0, w7[3], selector); c2[0] = hc_byte_perm_S (w7[3], w7[2], selector); c1[3] = hc_byte_perm_S (w7[2], w7[1], selector); c1[2] = hc_byte_perm_S (w7[1], w7[0], selector); c1[1] = hc_byte_perm_S (w7[0], w6[3], selector); c1[0] = hc_byte_perm_S (w6[3], w6[2], selector); c0[3] = hc_byte_perm_S (w6[2], w6[1], selector); c0[2] = hc_byte_perm_S (w6[1], w6[0], selector); c0[1] = hc_byte_perm_S (w6[0], w5[3], selector); c0[0] = hc_byte_perm_S (w5[3], w5[2], selector); w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); w2[1] = hc_byte_perm_S (w0[0], 0, selector); w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 10: c2[2] = hc_byte_perm_S ( 0, w7[3], selector); c2[1] = hc_byte_perm_S (w7[3], w7[2], selector); c2[0] = hc_byte_perm_S (w7[2], w7[1], selector); c1[3] = hc_byte_perm_S (w7[1], w7[0], selector); c1[2] = hc_byte_perm_S (w7[0], w6[3], selector); c1[1] = hc_byte_perm_S (w6[3], w6[2], selector); c1[0] = hc_byte_perm_S (w6[2], w6[1], selector); c0[3] = hc_byte_perm_S (w6[1], w6[0], selector); c0[2] = hc_byte_perm_S (w6[0], w5[3], selector); c0[1] = hc_byte_perm_S (w5[3], w5[2], selector); c0[0] = hc_byte_perm_S (w5[2], w5[1], selector); w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); w2[2] = hc_byte_perm_S (w0[0], 0, selector); w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 11: c2[3] = hc_byte_perm_S ( 0, w7[3], selector); c2[2] = hc_byte_perm_S (w7[3], w7[2], selector); c2[1] = hc_byte_perm_S (w7[2], w7[1], selector); c2[0] = hc_byte_perm_S (w7[1], w7[0], selector); c1[3] = hc_byte_perm_S (w7[0], w6[3], selector); c1[2] = hc_byte_perm_S (w6[3], w6[2], selector); c1[1] = hc_byte_perm_S (w6[2], w6[1], selector); c1[0] = hc_byte_perm_S (w6[1], w6[0], selector); c0[3] = hc_byte_perm_S (w6[0], w5[3], selector); c0[2] = hc_byte_perm_S (w5[3], w5[2], selector); c0[1] = hc_byte_perm_S (w5[2], w5[1], selector); c0[0] = hc_byte_perm_S (w5[1], w5[0], selector); w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); w2[3] = hc_byte_perm_S (w0[0], 0, selector); w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 12: c3[0] = hc_byte_perm_S ( 0, w7[3], selector); c2[3] = hc_byte_perm_S (w7[3], w7[2], selector); c2[2] = hc_byte_perm_S (w7[2], w7[1], selector); c2[1] = hc_byte_perm_S (w7[1], w7[0], selector); c2[0] = hc_byte_perm_S (w7[0], w6[3], selector); c1[3] = hc_byte_perm_S (w6[3], w6[2], selector); c1[2] = hc_byte_perm_S (w6[2], w6[1], selector); c1[1] = hc_byte_perm_S (w6[1], w6[0], selector); c1[0] = hc_byte_perm_S (w6[0], w5[3], selector); c0[3] = hc_byte_perm_S (w5[3], w5[2], selector); c0[2] = hc_byte_perm_S (w5[2], w5[1], selector); c0[1] = hc_byte_perm_S (w5[1], w5[0], selector); c0[0] = hc_byte_perm_S (w5[0], w4[3], selector); w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); w3[0] = hc_byte_perm_S (w0[0], 0, selector); w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 13: c3[1] = hc_byte_perm_S ( 0, w7[3], selector); c3[0] = hc_byte_perm_S (w7[3], w7[2], selector); c2[3] = hc_byte_perm_S (w7[2], w7[1], selector); c2[2] = hc_byte_perm_S (w7[1], w7[0], selector); c2[1] = hc_byte_perm_S (w7[0], w6[3], selector); c2[0] = hc_byte_perm_S (w6[3], w6[2], selector); c1[3] = hc_byte_perm_S (w6[2], w6[1], selector); c1[2] = hc_byte_perm_S (w6[1], w6[0], selector); c1[1] = hc_byte_perm_S (w6[0], w5[3], selector); c1[0] = hc_byte_perm_S (w5[3], w5[2], selector); c0[3] = hc_byte_perm_S (w5[2], w5[1], selector); c0[2] = hc_byte_perm_S (w5[1], w5[0], selector); c0[1] = hc_byte_perm_S (w5[0], w4[3], selector); c0[0] = hc_byte_perm_S (w4[3], w4[2], selector); w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); w3[1] = hc_byte_perm_S (w0[0], 0, selector); w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 14: c3[2] = hc_byte_perm_S ( 0, w7[3], selector); c3[1] = hc_byte_perm_S (w7[3], w7[2], selector); c3[0] = hc_byte_perm_S (w7[2], w7[1], selector); c2[3] = hc_byte_perm_S (w7[1], w7[0], selector); c2[2] = hc_byte_perm_S (w7[0], w6[3], selector); c2[1] = hc_byte_perm_S (w6[3], w6[2], selector); c2[0] = hc_byte_perm_S (w6[2], w6[1], selector); c1[3] = hc_byte_perm_S (w6[1], w6[0], selector); c1[2] = hc_byte_perm_S (w6[0], w5[3], selector); c1[1] = hc_byte_perm_S (w5[3], w5[2], selector); c1[0] = hc_byte_perm_S (w5[2], w5[1], selector); c0[3] = hc_byte_perm_S (w5[1], w5[0], selector); c0[2] = hc_byte_perm_S (w5[0], w4[3], selector); c0[1] = hc_byte_perm_S (w4[3], w4[2], selector); c0[0] = hc_byte_perm_S (w4[2], w4[1], selector); w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); w3[2] = hc_byte_perm_S (w0[0], 0, selector); w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 15: c3[3] = hc_byte_perm_S ( 0, w7[3], selector); c3[2] = hc_byte_perm_S (w7[3], w7[2], selector); c3[1] = hc_byte_perm_S (w7[2], w7[1], selector); c3[0] = hc_byte_perm_S (w7[1], w7[0], selector); c2[3] = hc_byte_perm_S (w7[0], w6[3], selector); c2[2] = hc_byte_perm_S (w6[3], w6[2], selector); c2[1] = hc_byte_perm_S (w6[2], w6[1], selector); c2[0] = hc_byte_perm_S (w6[1], w6[0], selector); c1[3] = hc_byte_perm_S (w6[0], w5[3], selector); c1[2] = hc_byte_perm_S (w5[3], w5[2], selector); c1[1] = hc_byte_perm_S (w5[2], w5[1], selector); c1[0] = hc_byte_perm_S (w5[1], w5[0], selector); c0[3] = hc_byte_perm_S (w5[0], w4[3], selector); c0[2] = hc_byte_perm_S (w4[3], w4[2], selector); c0[1] = hc_byte_perm_S (w4[2], w4[1], selector); c0[0] = hc_byte_perm_S (w4[1], w4[0], selector); w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); w3[3] = hc_byte_perm_S (w0[0], 0, selector); w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 16: c4[0] = hc_byte_perm_S ( 0, w7[3], selector); c3[3] = hc_byte_perm_S (w7[3], w7[2], selector); c3[2] = hc_byte_perm_S (w7[2], w7[1], selector); c3[1] = hc_byte_perm_S (w7[1], w7[0], selector); c3[0] = hc_byte_perm_S (w7[0], w6[3], selector); c2[3] = hc_byte_perm_S (w6[3], w6[2], selector); c2[2] = hc_byte_perm_S (w6[2], w6[1], selector); c2[1] = hc_byte_perm_S (w6[1], w6[0], selector); c2[0] = hc_byte_perm_S (w6[0], w5[3], selector); c1[3] = hc_byte_perm_S (w5[3], w5[2], selector); c1[2] = hc_byte_perm_S (w5[2], w5[1], selector); c1[1] = hc_byte_perm_S (w5[1], w5[0], selector); c1[0] = hc_byte_perm_S (w5[0], w4[3], selector); c0[3] = hc_byte_perm_S (w4[3], w4[2], selector); c0[2] = hc_byte_perm_S (w4[2], w4[1], selector); c0[1] = hc_byte_perm_S (w4[1], w4[0], selector); c0[0] = hc_byte_perm_S (w4[0], w3[3], selector); w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); w4[0] = hc_byte_perm_S (w0[0], 0, selector); w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 17: c4[1] = hc_byte_perm_S ( 0, w7[3], selector); c4[0] = hc_byte_perm_S (w7[3], w7[2], selector); c3[3] = hc_byte_perm_S (w7[2], w7[1], selector); c3[2] = hc_byte_perm_S (w7[1], w7[0], selector); c3[1] = hc_byte_perm_S (w7[0], w6[3], selector); c3[0] = hc_byte_perm_S (w6[3], w6[2], selector); c2[3] = hc_byte_perm_S (w6[2], w6[1], selector); c2[2] = hc_byte_perm_S (w6[1], w6[0], selector); c2[1] = hc_byte_perm_S (w6[0], w5[3], selector); c2[0] = hc_byte_perm_S (w5[3], w5[2], selector); c1[3] = hc_byte_perm_S (w5[2], w5[1], selector); c1[2] = hc_byte_perm_S (w5[1], w5[0], selector); c1[1] = hc_byte_perm_S (w5[0], w4[3], selector); c1[0] = hc_byte_perm_S (w4[3], w4[2], selector); c0[3] = hc_byte_perm_S (w4[2], w4[1], selector); c0[2] = hc_byte_perm_S (w4[1], w4[0], selector); c0[1] = hc_byte_perm_S (w4[0], w3[3], selector); c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); w4[1] = hc_byte_perm_S (w0[0], 0, selector); w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 18: c4[2] = hc_byte_perm_S ( 0, w7[3], selector); c4[1] = hc_byte_perm_S (w7[3], w7[2], selector); c4[0] = hc_byte_perm_S (w7[2], w7[1], selector); c3[3] = hc_byte_perm_S (w7[1], w7[0], selector); c3[2] = hc_byte_perm_S (w7[0], w6[3], selector); c3[1] = hc_byte_perm_S (w6[3], w6[2], selector); c3[0] = hc_byte_perm_S (w6[2], w6[1], selector); c2[3] = hc_byte_perm_S (w6[1], w6[0], selector); c2[2] = hc_byte_perm_S (w6[0], w5[3], selector); c2[1] = hc_byte_perm_S (w5[3], w5[2], selector); c2[0] = hc_byte_perm_S (w5[2], w5[1], selector); c1[3] = hc_byte_perm_S (w5[1], w5[0], selector); c1[2] = hc_byte_perm_S (w5[0], w4[3], selector); c1[1] = hc_byte_perm_S (w4[3], w4[2], selector); c1[0] = hc_byte_perm_S (w4[2], w4[1], selector); c0[3] = hc_byte_perm_S (w4[1], w4[0], selector); c0[2] = hc_byte_perm_S (w4[0], w3[3], selector); c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); w4[2] = hc_byte_perm_S (w0[0], 0, selector); w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 19: c4[3] = hc_byte_perm_S ( 0, w7[3], selector); c4[2] = hc_byte_perm_S (w7[3], w7[2], selector); c4[1] = hc_byte_perm_S (w7[2], w7[1], selector); c4[0] = hc_byte_perm_S (w7[1], w7[0], selector); c3[3] = hc_byte_perm_S (w7[0], w6[3], selector); c3[2] = hc_byte_perm_S (w6[3], w6[2], selector); c3[1] = hc_byte_perm_S (w6[2], w6[1], selector); c3[0] = hc_byte_perm_S (w6[1], w6[0], selector); c2[3] = hc_byte_perm_S (w6[0], w5[3], selector); c2[2] = hc_byte_perm_S (w5[3], w5[2], selector); c2[1] = hc_byte_perm_S (w5[2], w5[1], selector); c2[0] = hc_byte_perm_S (w5[1], w5[0], selector); c1[3] = hc_byte_perm_S (w5[0], w4[3], selector); c1[2] = hc_byte_perm_S (w4[3], w4[2], selector); c1[1] = hc_byte_perm_S (w4[2], w4[1], selector); c1[0] = hc_byte_perm_S (w4[1], w4[0], selector); c0[3] = hc_byte_perm_S (w4[0], w3[3], selector); c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); w4[3] = hc_byte_perm_S (w0[0], 0, selector); w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 20: c5[0] = hc_byte_perm_S ( 0, w7[3], selector); c4[3] = hc_byte_perm_S (w7[3], w7[2], selector); c4[2] = hc_byte_perm_S (w7[2], w7[1], selector); c4[1] = hc_byte_perm_S (w7[1], w7[0], selector); c4[0] = hc_byte_perm_S (w7[0], w6[3], selector); c3[3] = hc_byte_perm_S (w6[3], w6[2], selector); c3[2] = hc_byte_perm_S (w6[2], w6[1], selector); c3[1] = hc_byte_perm_S (w6[1], w6[0], selector); c3[0] = hc_byte_perm_S (w6[0], w5[3], selector); c2[3] = hc_byte_perm_S (w5[3], w5[2], selector); c2[2] = hc_byte_perm_S (w5[2], w5[1], selector); c2[1] = hc_byte_perm_S (w5[1], w5[0], selector); c2[0] = hc_byte_perm_S (w5[0], w4[3], selector); c1[3] = hc_byte_perm_S (w4[3], w4[2], selector); c1[2] = hc_byte_perm_S (w4[2], w4[1], selector); c1[1] = hc_byte_perm_S (w4[1], w4[0], selector); c1[0] = hc_byte_perm_S (w4[0], w3[3], selector); c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); w5[0] = hc_byte_perm_S (w0[0], 0, selector); w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 21: c5[1] = hc_byte_perm_S ( 0, w7[3], selector); c5[0] = hc_byte_perm_S (w7[3], w7[2], selector); c4[3] = hc_byte_perm_S (w7[2], w7[1], selector); c4[2] = hc_byte_perm_S (w7[1], w7[0], selector); c4[1] = hc_byte_perm_S (w7[0], w6[3], selector); c4[0] = hc_byte_perm_S (w6[3], w6[2], selector); c3[3] = hc_byte_perm_S (w6[2], w6[1], selector); c3[2] = hc_byte_perm_S (w6[1], w6[0], selector); c3[1] = hc_byte_perm_S (w6[0], w5[3], selector); c3[0] = hc_byte_perm_S (w5[3], w5[2], selector); c2[3] = hc_byte_perm_S (w5[2], w5[1], selector); c2[2] = hc_byte_perm_S (w5[1], w5[0], selector); c2[1] = hc_byte_perm_S (w5[0], w4[3], selector); c2[0] = hc_byte_perm_S (w4[3], w4[2], selector); c1[3] = hc_byte_perm_S (w4[2], w4[1], selector); c1[2] = hc_byte_perm_S (w4[1], w4[0], selector); c1[1] = hc_byte_perm_S (w4[0], w3[3], selector); c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); w5[1] = hc_byte_perm_S (w0[0], 0, selector); w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 22: c5[2] = hc_byte_perm_S ( 0, w7[3], selector); c5[1] = hc_byte_perm_S (w7[3], w7[2], selector); c5[0] = hc_byte_perm_S (w7[2], w7[1], selector); c4[3] = hc_byte_perm_S (w7[1], w7[0], selector); c4[2] = hc_byte_perm_S (w7[0], w6[3], selector); c4[1] = hc_byte_perm_S (w6[3], w6[2], selector); c4[0] = hc_byte_perm_S (w6[2], w6[1], selector); c3[3] = hc_byte_perm_S (w6[1], w6[0], selector); c3[2] = hc_byte_perm_S (w6[0], w5[3], selector); c3[1] = hc_byte_perm_S (w5[3], w5[2], selector); c3[0] = hc_byte_perm_S (w5[2], w5[1], selector); c2[3] = hc_byte_perm_S (w5[1], w5[0], selector); c2[2] = hc_byte_perm_S (w5[0], w4[3], selector); c2[1] = hc_byte_perm_S (w4[3], w4[2], selector); c2[0] = hc_byte_perm_S (w4[2], w4[1], selector); c1[3] = hc_byte_perm_S (w4[1], w4[0], selector); c1[2] = hc_byte_perm_S (w4[0], w3[3], selector); c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); w5[2] = hc_byte_perm_S (w0[0], 0, selector); w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 23: c5[3] = hc_byte_perm_S ( 0, w7[3], selector); c5[2] = hc_byte_perm_S (w7[3], w7[2], selector); c5[1] = hc_byte_perm_S (w7[2], w7[1], selector); c5[0] = hc_byte_perm_S (w7[1], w7[0], selector); c4[3] = hc_byte_perm_S (w7[0], w6[3], selector); c4[2] = hc_byte_perm_S (w6[3], w6[2], selector); c4[1] = hc_byte_perm_S (w6[2], w6[1], selector); c4[0] = hc_byte_perm_S (w6[1], w6[0], selector); c3[3] = hc_byte_perm_S (w6[0], w5[3], selector); c3[2] = hc_byte_perm_S (w5[3], w5[2], selector); c3[1] = hc_byte_perm_S (w5[2], w5[1], selector); c3[0] = hc_byte_perm_S (w5[1], w5[0], selector); c2[3] = hc_byte_perm_S (w5[0], w4[3], selector); c2[2] = hc_byte_perm_S (w4[3], w4[2], selector); c2[1] = hc_byte_perm_S (w4[2], w4[1], selector); c2[0] = hc_byte_perm_S (w4[1], w4[0], selector); c1[3] = hc_byte_perm_S (w4[0], w3[3], selector); c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); w5[3] = hc_byte_perm_S (w0[0], 0, selector); w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 24: c6[0] = hc_byte_perm_S ( 0, w7[3], selector); c5[3] = hc_byte_perm_S (w7[3], w7[2], selector); c5[2] = hc_byte_perm_S (w7[2], w7[1], selector); c5[1] = hc_byte_perm_S (w7[1], w7[0], selector); c5[0] = hc_byte_perm_S (w7[0], w6[3], selector); c4[3] = hc_byte_perm_S (w6[3], w6[2], selector); c4[2] = hc_byte_perm_S (w6[2], w6[1], selector); c4[1] = hc_byte_perm_S (w6[1], w6[0], selector); c4[0] = hc_byte_perm_S (w6[0], w5[3], selector); c3[3] = hc_byte_perm_S (w5[3], w5[2], selector); c3[2] = hc_byte_perm_S (w5[2], w5[1], selector); c3[1] = hc_byte_perm_S (w5[1], w5[0], selector); c3[0] = hc_byte_perm_S (w5[0], w4[3], selector); c2[3] = hc_byte_perm_S (w4[3], w4[2], selector); c2[2] = hc_byte_perm_S (w4[2], w4[1], selector); c2[1] = hc_byte_perm_S (w4[1], w4[0], selector); c2[0] = hc_byte_perm_S (w4[0], w3[3], selector); c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); w6[0] = hc_byte_perm_S (w0[0], 0, selector); w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 25: c6[1] = hc_byte_perm_S ( 0, w7[3], selector); c6[0] = hc_byte_perm_S (w7[3], w7[2], selector); c5[3] = hc_byte_perm_S (w7[2], w7[1], selector); c5[2] = hc_byte_perm_S (w7[1], w7[0], selector); c5[1] = hc_byte_perm_S (w7[0], w6[3], selector); c5[0] = hc_byte_perm_S (w6[3], w6[2], selector); c4[3] = hc_byte_perm_S (w6[2], w6[1], selector); c4[2] = hc_byte_perm_S (w6[1], w6[0], selector); c4[1] = hc_byte_perm_S (w6[0], w5[3], selector); c4[0] = hc_byte_perm_S (w5[3], w5[2], selector); c3[3] = hc_byte_perm_S (w5[2], w5[1], selector); c3[2] = hc_byte_perm_S (w5[1], w5[0], selector); c3[1] = hc_byte_perm_S (w5[0], w4[3], selector); c3[0] = hc_byte_perm_S (w4[3], w4[2], selector); c2[3] = hc_byte_perm_S (w4[2], w4[1], selector); c2[2] = hc_byte_perm_S (w4[1], w4[0], selector); c2[1] = hc_byte_perm_S (w4[0], w3[3], selector); c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); w6[1] = hc_byte_perm_S (w0[0], 0, selector); w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 26: c6[2] = hc_byte_perm_S ( 0, w7[3], selector); c6[1] = hc_byte_perm_S (w7[3], w7[2], selector); c6[0] = hc_byte_perm_S (w7[2], w7[1], selector); c5[3] = hc_byte_perm_S (w7[1], w7[0], selector); c5[2] = hc_byte_perm_S (w7[0], w6[3], selector); c5[1] = hc_byte_perm_S (w6[3], w6[2], selector); c5[0] = hc_byte_perm_S (w6[2], w6[1], selector); c4[3] = hc_byte_perm_S (w6[1], w6[0], selector); c4[2] = hc_byte_perm_S (w6[0], w5[3], selector); c4[1] = hc_byte_perm_S (w5[3], w5[2], selector); c4[0] = hc_byte_perm_S (w5[2], w5[1], selector); c3[3] = hc_byte_perm_S (w5[1], w5[0], selector); c3[2] = hc_byte_perm_S (w5[0], w4[3], selector); c3[1] = hc_byte_perm_S (w4[3], w4[2], selector); c3[0] = hc_byte_perm_S (w4[2], w4[1], selector); c2[3] = hc_byte_perm_S (w4[1], w4[0], selector); c2[2] = hc_byte_perm_S (w4[0], w3[3], selector); c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); w6[2] = hc_byte_perm_S (w0[0], 0, selector); w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 27: c6[3] = hc_byte_perm_S ( 0, w7[3], selector); c6[2] = hc_byte_perm_S (w7[3], w7[2], selector); c6[1] = hc_byte_perm_S (w7[2], w7[1], selector); c6[0] = hc_byte_perm_S (w7[1], w7[0], selector); c5[3] = hc_byte_perm_S (w7[0], w6[3], selector); c5[2] = hc_byte_perm_S (w6[3], w6[2], selector); c5[1] = hc_byte_perm_S (w6[2], w6[1], selector); c5[0] = hc_byte_perm_S (w6[1], w6[0], selector); c4[3] = hc_byte_perm_S (w6[0], w5[3], selector); c4[2] = hc_byte_perm_S (w5[3], w5[2], selector); c4[1] = hc_byte_perm_S (w5[2], w5[1], selector); c4[0] = hc_byte_perm_S (w5[1], w5[0], selector); c3[3] = hc_byte_perm_S (w5[0], w4[3], selector); c3[2] = hc_byte_perm_S (w4[3], w4[2], selector); c3[1] = hc_byte_perm_S (w4[2], w4[1], selector); c3[0] = hc_byte_perm_S (w4[1], w4[0], selector); c2[3] = hc_byte_perm_S (w4[0], w3[3], selector); c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); w6[3] = hc_byte_perm_S (w0[0], 0, selector); w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 28: c7[0] = hc_byte_perm_S ( 0, w7[3], selector); c6[3] = hc_byte_perm_S (w7[3], w7[2], selector); c6[2] = hc_byte_perm_S (w7[2], w7[1], selector); c6[1] = hc_byte_perm_S (w7[1], w7[0], selector); c6[0] = hc_byte_perm_S (w7[0], w6[3], selector); c5[3] = hc_byte_perm_S (w6[3], w6[2], selector); c5[2] = hc_byte_perm_S (w6[2], w6[1], selector); c5[1] = hc_byte_perm_S (w6[1], w6[0], selector); c5[0] = hc_byte_perm_S (w6[0], w5[3], selector); c4[3] = hc_byte_perm_S (w5[3], w5[2], selector); c4[2] = hc_byte_perm_S (w5[2], w5[1], selector); c4[1] = hc_byte_perm_S (w5[1], w5[0], selector); c4[0] = hc_byte_perm_S (w5[0], w4[3], selector); c3[3] = hc_byte_perm_S (w4[3], w4[2], selector); c3[2] = hc_byte_perm_S (w4[2], w4[1], selector); c3[1] = hc_byte_perm_S (w4[1], w4[0], selector); c3[0] = hc_byte_perm_S (w4[0], w3[3], selector); c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); w7[0] = hc_byte_perm_S (w0[0], 0, selector); w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 29: c7[1] = hc_byte_perm_S ( 0, w7[3], selector); c7[0] = hc_byte_perm_S (w7[3], w7[2], selector); c6[3] = hc_byte_perm_S (w7[2], w7[1], selector); c6[2] = hc_byte_perm_S (w7[1], w7[0], selector); c6[1] = hc_byte_perm_S (w7[0], w6[3], selector); c6[0] = hc_byte_perm_S (w6[3], w6[2], selector); c5[3] = hc_byte_perm_S (w6[2], w6[1], selector); c5[2] = hc_byte_perm_S (w6[1], w6[0], selector); c5[1] = hc_byte_perm_S (w6[0], w5[3], selector); c5[0] = hc_byte_perm_S (w5[3], w5[2], selector); c4[3] = hc_byte_perm_S (w5[2], w5[1], selector); c4[2] = hc_byte_perm_S (w5[1], w5[0], selector); c4[1] = hc_byte_perm_S (w5[0], w4[3], selector); c4[0] = hc_byte_perm_S (w4[3], w4[2], selector); c3[3] = hc_byte_perm_S (w4[2], w4[1], selector); c3[2] = hc_byte_perm_S (w4[1], w4[0], selector); c3[1] = hc_byte_perm_S (w4[0], w3[3], selector); c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); w7[1] = hc_byte_perm_S (w0[0], 0, selector); w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 30: c7[2] = hc_byte_perm_S ( 0, w7[3], selector); c7[1] = hc_byte_perm_S (w7[3], w7[2], selector); c7[0] = hc_byte_perm_S (w7[2], w7[1], selector); c6[3] = hc_byte_perm_S (w7[1], w7[0], selector); c6[2] = hc_byte_perm_S (w7[0], w6[3], selector); c6[1] = hc_byte_perm_S (w6[3], w6[2], selector); c6[0] = hc_byte_perm_S (w6[2], w6[1], selector); c5[3] = hc_byte_perm_S (w6[1], w6[0], selector); c5[2] = hc_byte_perm_S (w6[0], w5[3], selector); c5[1] = hc_byte_perm_S (w5[3], w5[2], selector); c5[0] = hc_byte_perm_S (w5[2], w5[1], selector); c4[3] = hc_byte_perm_S (w5[1], w5[0], selector); c4[2] = hc_byte_perm_S (w5[0], w4[3], selector); c4[1] = hc_byte_perm_S (w4[3], w4[2], selector); c4[0] = hc_byte_perm_S (w4[2], w4[1], selector); c3[3] = hc_byte_perm_S (w4[1], w4[0], selector); c3[2] = hc_byte_perm_S (w4[0], w3[3], selector); c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); w7[2] = hc_byte_perm_S (w0[0], 0, selector); w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; case 31: c7[3] = hc_byte_perm_S ( 0, w7[3], selector); c7[2] = hc_byte_perm_S (w7[3], w7[2], selector); c7[1] = hc_byte_perm_S (w7[2], w7[1], selector); c7[0] = hc_byte_perm_S (w7[1], w7[0], selector); c6[3] = hc_byte_perm_S (w7[0], w6[3], selector); c6[2] = hc_byte_perm_S (w6[3], w6[2], selector); c6[1] = hc_byte_perm_S (w6[2], w6[1], selector); c6[0] = hc_byte_perm_S (w6[1], w6[0], selector); c5[3] = hc_byte_perm_S (w6[0], w5[3], selector); c5[2] = hc_byte_perm_S (w5[3], w5[2], selector); c5[1] = hc_byte_perm_S (w5[2], w5[1], selector); c5[0] = hc_byte_perm_S (w5[1], w5[0], selector); c4[3] = hc_byte_perm_S (w5[0], w4[3], selector); c4[2] = hc_byte_perm_S (w4[3], w4[2], selector); c4[1] = hc_byte_perm_S (w4[2], w4[1], selector); c4[0] = hc_byte_perm_S (w4[1], w4[0], selector); c3[3] = hc_byte_perm_S (w4[0], w3[3], selector); c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); w7[3] = hc_byte_perm_S (w0[0], 0, selector); w7[2] = 0; w7[1] = 0; w7[0] = 0; w6[3] = 0; w6[2] = 0; w6[1] = 0; w6[0] = 0; w5[3] = 0; w5[2] = 0; w5[1] = 0; w5[0] = 0; w4[3] = 0; w4[2] = 0; w4[1] = 0; w4[0] = 0; w3[3] = 0; w3[2] = 0; w3[1] = 0; w3[0] = 0; w2[3] = 0; w2[2] = 0; w2[1] = 0; w2[0] = 0; w1[3] = 0; w1[2] = 0; w1[1] = 0; w1[0] = 0; w0[3] = 0; w0[2] = 0; w0[1] = 0; w0[0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_1x64_le_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w[63] = hc_bytealign_S (w[62], w[63], offset); w[62] = hc_bytealign_S (w[61], w[62], offset); w[61] = hc_bytealign_S (w[60], w[61], offset); w[60] = hc_bytealign_S (w[59], w[60], offset); w[59] = hc_bytealign_S (w[58], w[59], offset); w[58] = hc_bytealign_S (w[57], w[58], offset); w[57] = hc_bytealign_S (w[56], w[57], offset); w[56] = hc_bytealign_S (w[55], w[56], offset); w[55] = hc_bytealign_S (w[54], w[55], offset); w[54] = hc_bytealign_S (w[53], w[54], offset); w[53] = hc_bytealign_S (w[52], w[53], offset); w[52] = hc_bytealign_S (w[51], w[52], offset); w[51] = hc_bytealign_S (w[50], w[51], offset); w[50] = hc_bytealign_S (w[49], w[50], offset); w[49] = hc_bytealign_S (w[48], w[49], offset); w[48] = hc_bytealign_S (w[47], w[48], offset); w[47] = hc_bytealign_S (w[46], w[47], offset); w[46] = hc_bytealign_S (w[45], w[46], offset); w[45] = hc_bytealign_S (w[44], w[45], offset); w[44] = hc_bytealign_S (w[43], w[44], offset); w[43] = hc_bytealign_S (w[42], w[43], offset); w[42] = hc_bytealign_S (w[41], w[42], offset); w[41] = hc_bytealign_S (w[40], w[41], offset); w[40] = hc_bytealign_S (w[39], w[40], offset); w[39] = hc_bytealign_S (w[38], w[39], offset); w[38] = hc_bytealign_S (w[37], w[38], offset); w[37] = hc_bytealign_S (w[36], w[37], offset); w[36] = hc_bytealign_S (w[35], w[36], offset); w[35] = hc_bytealign_S (w[34], w[35], offset); w[34] = hc_bytealign_S (w[33], w[34], offset); w[33] = hc_bytealign_S (w[32], w[33], offset); w[32] = hc_bytealign_S (w[31], w[32], offset); w[31] = hc_bytealign_S (w[30], w[31], offset); w[30] = hc_bytealign_S (w[29], w[30], offset); w[29] = hc_bytealign_S (w[28], w[29], offset); w[28] = hc_bytealign_S (w[27], w[28], offset); w[27] = hc_bytealign_S (w[26], w[27], offset); w[26] = hc_bytealign_S (w[25], w[26], offset); w[25] = hc_bytealign_S (w[24], w[25], offset); w[24] = hc_bytealign_S (w[23], w[24], offset); w[23] = hc_bytealign_S (w[22], w[23], offset); w[22] = hc_bytealign_S (w[21], w[22], offset); w[21] = hc_bytealign_S (w[20], w[21], offset); w[20] = hc_bytealign_S (w[19], w[20], offset); w[19] = hc_bytealign_S (w[18], w[19], offset); w[18] = hc_bytealign_S (w[17], w[18], offset); w[17] = hc_bytealign_S (w[16], w[17], offset); w[16] = hc_bytealign_S (w[15], w[16], offset); w[15] = hc_bytealign_S (w[14], w[15], offset); w[14] = hc_bytealign_S (w[13], w[14], offset); w[13] = hc_bytealign_S (w[12], w[13], offset); w[12] = hc_bytealign_S (w[11], w[12], offset); w[11] = hc_bytealign_S (w[10], w[11], offset); w[10] = hc_bytealign_S (w[ 9], w[10], offset); w[ 9] = hc_bytealign_S (w[ 8], w[ 9], offset); w[ 8] = hc_bytealign_S (w[ 7], w[ 8], offset); w[ 7] = hc_bytealign_S (w[ 6], w[ 7], offset); w[ 6] = hc_bytealign_S (w[ 5], w[ 6], offset); w[ 5] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 4] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 3] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 2] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 1] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 0] = hc_bytealign_S ( 0, w[ 0], offset); break; case 1: w[63] = hc_bytealign_S (w[61], w[62], offset); w[62] = hc_bytealign_S (w[60], w[61], offset); w[61] = hc_bytealign_S (w[59], w[60], offset); w[60] = hc_bytealign_S (w[58], w[59], offset); w[59] = hc_bytealign_S (w[57], w[58], offset); w[58] = hc_bytealign_S (w[56], w[57], offset); w[57] = hc_bytealign_S (w[55], w[56], offset); w[56] = hc_bytealign_S (w[54], w[55], offset); w[55] = hc_bytealign_S (w[53], w[54], offset); w[54] = hc_bytealign_S (w[52], w[53], offset); w[53] = hc_bytealign_S (w[51], w[52], offset); w[52] = hc_bytealign_S (w[50], w[51], offset); w[51] = hc_bytealign_S (w[49], w[50], offset); w[50] = hc_bytealign_S (w[48], w[49], offset); w[49] = hc_bytealign_S (w[47], w[48], offset); w[48] = hc_bytealign_S (w[46], w[47], offset); w[47] = hc_bytealign_S (w[45], w[46], offset); w[46] = hc_bytealign_S (w[44], w[45], offset); w[45] = hc_bytealign_S (w[43], w[44], offset); w[44] = hc_bytealign_S (w[42], w[43], offset); w[43] = hc_bytealign_S (w[41], w[42], offset); w[42] = hc_bytealign_S (w[40], w[41], offset); w[41] = hc_bytealign_S (w[39], w[40], offset); w[40] = hc_bytealign_S (w[38], w[39], offset); w[39] = hc_bytealign_S (w[37], w[38], offset); w[38] = hc_bytealign_S (w[36], w[37], offset); w[37] = hc_bytealign_S (w[35], w[36], offset); w[36] = hc_bytealign_S (w[34], w[35], offset); w[35] = hc_bytealign_S (w[33], w[34], offset); w[34] = hc_bytealign_S (w[32], w[33], offset); w[33] = hc_bytealign_S (w[31], w[32], offset); w[32] = hc_bytealign_S (w[30], w[31], offset); w[31] = hc_bytealign_S (w[29], w[30], offset); w[30] = hc_bytealign_S (w[28], w[29], offset); w[29] = hc_bytealign_S (w[27], w[28], offset); w[28] = hc_bytealign_S (w[26], w[27], offset); w[27] = hc_bytealign_S (w[25], w[26], offset); w[26] = hc_bytealign_S (w[24], w[25], offset); w[25] = hc_bytealign_S (w[23], w[24], offset); w[24] = hc_bytealign_S (w[22], w[23], offset); w[23] = hc_bytealign_S (w[21], w[22], offset); w[22] = hc_bytealign_S (w[20], w[21], offset); w[21] = hc_bytealign_S (w[19], w[20], offset); w[20] = hc_bytealign_S (w[18], w[19], offset); w[19] = hc_bytealign_S (w[17], w[18], offset); w[18] = hc_bytealign_S (w[16], w[17], offset); w[17] = hc_bytealign_S (w[15], w[16], offset); w[16] = hc_bytealign_S (w[14], w[15], offset); w[15] = hc_bytealign_S (w[13], w[14], offset); w[14] = hc_bytealign_S (w[12], w[13], offset); w[13] = hc_bytealign_S (w[11], w[12], offset); w[12] = hc_bytealign_S (w[10], w[11], offset); w[11] = hc_bytealign_S (w[ 9], w[10], offset); w[10] = hc_bytealign_S (w[ 8], w[ 9], offset); w[ 9] = hc_bytealign_S (w[ 7], w[ 8], offset); w[ 8] = hc_bytealign_S (w[ 6], w[ 7], offset); w[ 7] = hc_bytealign_S (w[ 5], w[ 6], offset); w[ 6] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 5] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 4] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 3] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 2] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 1] = hc_bytealign_S ( 0, w[ 0], offset); w[ 0] = 0; break; case 2: w[63] = hc_bytealign_S (w[60], w[61], offset); w[62] = hc_bytealign_S (w[59], w[60], offset); w[61] = hc_bytealign_S (w[58], w[59], offset); w[60] = hc_bytealign_S (w[57], w[58], offset); w[59] = hc_bytealign_S (w[56], w[57], offset); w[58] = hc_bytealign_S (w[55], w[56], offset); w[57] = hc_bytealign_S (w[54], w[55], offset); w[56] = hc_bytealign_S (w[53], w[54], offset); w[55] = hc_bytealign_S (w[52], w[53], offset); w[54] = hc_bytealign_S (w[51], w[52], offset); w[53] = hc_bytealign_S (w[50], w[51], offset); w[52] = hc_bytealign_S (w[49], w[50], offset); w[51] = hc_bytealign_S (w[48], w[49], offset); w[50] = hc_bytealign_S (w[47], w[48], offset); w[49] = hc_bytealign_S (w[46], w[47], offset); w[48] = hc_bytealign_S (w[45], w[46], offset); w[47] = hc_bytealign_S (w[44], w[45], offset); w[46] = hc_bytealign_S (w[43], w[44], offset); w[45] = hc_bytealign_S (w[42], w[43], offset); w[44] = hc_bytealign_S (w[41], w[42], offset); w[43] = hc_bytealign_S (w[40], w[41], offset); w[42] = hc_bytealign_S (w[39], w[40], offset); w[41] = hc_bytealign_S (w[38], w[39], offset); w[40] = hc_bytealign_S (w[37], w[38], offset); w[39] = hc_bytealign_S (w[36], w[37], offset); w[38] = hc_bytealign_S (w[35], w[36], offset); w[37] = hc_bytealign_S (w[34], w[35], offset); w[36] = hc_bytealign_S (w[33], w[34], offset); w[35] = hc_bytealign_S (w[32], w[33], offset); w[34] = hc_bytealign_S (w[31], w[32], offset); w[33] = hc_bytealign_S (w[30], w[31], offset); w[32] = hc_bytealign_S (w[29], w[30], offset); w[31] = hc_bytealign_S (w[28], w[29], offset); w[30] = hc_bytealign_S (w[27], w[28], offset); w[29] = hc_bytealign_S (w[26], w[27], offset); w[28] = hc_bytealign_S (w[25], w[26], offset); w[27] = hc_bytealign_S (w[24], w[25], offset); w[26] = hc_bytealign_S (w[23], w[24], offset); w[25] = hc_bytealign_S (w[22], w[23], offset); w[24] = hc_bytealign_S (w[21], w[22], offset); w[23] = hc_bytealign_S (w[20], w[21], offset); w[22] = hc_bytealign_S (w[19], w[20], offset); w[21] = hc_bytealign_S (w[18], w[19], offset); w[20] = hc_bytealign_S (w[17], w[18], offset); w[19] = hc_bytealign_S (w[16], w[17], offset); w[18] = hc_bytealign_S (w[15], w[16], offset); w[17] = hc_bytealign_S (w[14], w[15], offset); w[16] = hc_bytealign_S (w[13], w[14], offset); w[15] = hc_bytealign_S (w[12], w[13], offset); w[14] = hc_bytealign_S (w[11], w[12], offset); w[13] = hc_bytealign_S (w[10], w[11], offset); w[12] = hc_bytealign_S (w[ 9], w[10], offset); w[11] = hc_bytealign_S (w[ 8], w[ 9], offset); w[10] = hc_bytealign_S (w[ 7], w[ 8], offset); w[ 9] = hc_bytealign_S (w[ 6], w[ 7], offset); w[ 8] = hc_bytealign_S (w[ 5], w[ 6], offset); w[ 7] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 6] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 5] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 4] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 3] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 2] = hc_bytealign_S ( 0, w[ 0], offset); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_bytealign_S (w[59], w[60], offset); w[62] = hc_bytealign_S (w[58], w[59], offset); w[61] = hc_bytealign_S (w[57], w[58], offset); w[60] = hc_bytealign_S (w[56], w[57], offset); w[59] = hc_bytealign_S (w[55], w[56], offset); w[58] = hc_bytealign_S (w[54], w[55], offset); w[57] = hc_bytealign_S (w[53], w[54], offset); w[56] = hc_bytealign_S (w[52], w[53], offset); w[55] = hc_bytealign_S (w[51], w[52], offset); w[54] = hc_bytealign_S (w[50], w[51], offset); w[53] = hc_bytealign_S (w[49], w[50], offset); w[52] = hc_bytealign_S (w[48], w[49], offset); w[51] = hc_bytealign_S (w[47], w[48], offset); w[50] = hc_bytealign_S (w[46], w[47], offset); w[49] = hc_bytealign_S (w[45], w[46], offset); w[48] = hc_bytealign_S (w[44], w[45], offset); w[47] = hc_bytealign_S (w[43], w[44], offset); w[46] = hc_bytealign_S (w[42], w[43], offset); w[45] = hc_bytealign_S (w[41], w[42], offset); w[44] = hc_bytealign_S (w[40], w[41], offset); w[43] = hc_bytealign_S (w[39], w[40], offset); w[42] = hc_bytealign_S (w[38], w[39], offset); w[41] = hc_bytealign_S (w[37], w[38], offset); w[40] = hc_bytealign_S (w[36], w[37], offset); w[39] = hc_bytealign_S (w[35], w[36], offset); w[38] = hc_bytealign_S (w[34], w[35], offset); w[37] = hc_bytealign_S (w[33], w[34], offset); w[36] = hc_bytealign_S (w[32], w[33], offset); w[35] = hc_bytealign_S (w[31], w[32], offset); w[34] = hc_bytealign_S (w[30], w[31], offset); w[33] = hc_bytealign_S (w[29], w[30], offset); w[32] = hc_bytealign_S (w[28], w[29], offset); w[31] = hc_bytealign_S (w[27], w[28], offset); w[30] = hc_bytealign_S (w[26], w[27], offset); w[29] = hc_bytealign_S (w[25], w[26], offset); w[28] = hc_bytealign_S (w[24], w[25], offset); w[27] = hc_bytealign_S (w[23], w[24], offset); w[26] = hc_bytealign_S (w[22], w[23], offset); w[25] = hc_bytealign_S (w[21], w[22], offset); w[24] = hc_bytealign_S (w[20], w[21], offset); w[23] = hc_bytealign_S (w[19], w[20], offset); w[22] = hc_bytealign_S (w[18], w[19], offset); w[21] = hc_bytealign_S (w[17], w[18], offset); w[20] = hc_bytealign_S (w[16], w[17], offset); w[19] = hc_bytealign_S (w[15], w[16], offset); w[18] = hc_bytealign_S (w[14], w[15], offset); w[17] = hc_bytealign_S (w[13], w[14], offset); w[16] = hc_bytealign_S (w[12], w[13], offset); w[15] = hc_bytealign_S (w[11], w[12], offset); w[14] = hc_bytealign_S (w[10], w[11], offset); w[13] = hc_bytealign_S (w[ 9], w[10], offset); w[12] = hc_bytealign_S (w[ 8], w[ 9], offset); w[11] = hc_bytealign_S (w[ 7], w[ 8], offset); w[10] = hc_bytealign_S (w[ 6], w[ 7], offset); w[ 9] = hc_bytealign_S (w[ 5], w[ 6], offset); w[ 8] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 7] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 6] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 5] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 4] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 3] = hc_bytealign_S ( 0, w[ 0], offset); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_bytealign_S (w[58], w[59], offset); w[62] = hc_bytealign_S (w[57], w[58], offset); w[61] = hc_bytealign_S (w[56], w[57], offset); w[60] = hc_bytealign_S (w[55], w[56], offset); w[59] = hc_bytealign_S (w[54], w[55], offset); w[58] = hc_bytealign_S (w[53], w[54], offset); w[57] = hc_bytealign_S (w[52], w[53], offset); w[56] = hc_bytealign_S (w[51], w[52], offset); w[55] = hc_bytealign_S (w[50], w[51], offset); w[54] = hc_bytealign_S (w[49], w[50], offset); w[53] = hc_bytealign_S (w[48], w[49], offset); w[52] = hc_bytealign_S (w[47], w[48], offset); w[51] = hc_bytealign_S (w[46], w[47], offset); w[50] = hc_bytealign_S (w[45], w[46], offset); w[49] = hc_bytealign_S (w[44], w[45], offset); w[48] = hc_bytealign_S (w[43], w[44], offset); w[47] = hc_bytealign_S (w[42], w[43], offset); w[46] = hc_bytealign_S (w[41], w[42], offset); w[45] = hc_bytealign_S (w[40], w[41], offset); w[44] = hc_bytealign_S (w[39], w[40], offset); w[43] = hc_bytealign_S (w[38], w[39], offset); w[42] = hc_bytealign_S (w[37], w[38], offset); w[41] = hc_bytealign_S (w[36], w[37], offset); w[40] = hc_bytealign_S (w[35], w[36], offset); w[39] = hc_bytealign_S (w[34], w[35], offset); w[38] = hc_bytealign_S (w[33], w[34], offset); w[37] = hc_bytealign_S (w[32], w[33], offset); w[36] = hc_bytealign_S (w[31], w[32], offset); w[35] = hc_bytealign_S (w[30], w[31], offset); w[34] = hc_bytealign_S (w[29], w[30], offset); w[33] = hc_bytealign_S (w[28], w[29], offset); w[32] = hc_bytealign_S (w[27], w[28], offset); w[31] = hc_bytealign_S (w[26], w[27], offset); w[30] = hc_bytealign_S (w[25], w[26], offset); w[29] = hc_bytealign_S (w[24], w[25], offset); w[28] = hc_bytealign_S (w[23], w[24], offset); w[27] = hc_bytealign_S (w[22], w[23], offset); w[26] = hc_bytealign_S (w[21], w[22], offset); w[25] = hc_bytealign_S (w[20], w[21], offset); w[24] = hc_bytealign_S (w[19], w[20], offset); w[23] = hc_bytealign_S (w[18], w[19], offset); w[22] = hc_bytealign_S (w[17], w[18], offset); w[21] = hc_bytealign_S (w[16], w[17], offset); w[20] = hc_bytealign_S (w[15], w[16], offset); w[19] = hc_bytealign_S (w[14], w[15], offset); w[18] = hc_bytealign_S (w[13], w[14], offset); w[17] = hc_bytealign_S (w[12], w[13], offset); w[16] = hc_bytealign_S (w[11], w[12], offset); w[15] = hc_bytealign_S (w[10], w[11], offset); w[14] = hc_bytealign_S (w[ 9], w[10], offset); w[13] = hc_bytealign_S (w[ 8], w[ 9], offset); w[12] = hc_bytealign_S (w[ 7], w[ 8], offset); w[11] = hc_bytealign_S (w[ 6], w[ 7], offset); w[10] = hc_bytealign_S (w[ 5], w[ 6], offset); w[ 9] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 8] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 7] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 6] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 5] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 4] = hc_bytealign_S ( 0, w[ 0], offset); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_bytealign_S (w[57], w[58], offset); w[62] = hc_bytealign_S (w[56], w[57], offset); w[61] = hc_bytealign_S (w[55], w[56], offset); w[60] = hc_bytealign_S (w[54], w[55], offset); w[59] = hc_bytealign_S (w[53], w[54], offset); w[58] = hc_bytealign_S (w[52], w[53], offset); w[57] = hc_bytealign_S (w[51], w[52], offset); w[56] = hc_bytealign_S (w[50], w[51], offset); w[55] = hc_bytealign_S (w[49], w[50], offset); w[54] = hc_bytealign_S (w[48], w[49], offset); w[53] = hc_bytealign_S (w[47], w[48], offset); w[52] = hc_bytealign_S (w[46], w[47], offset); w[51] = hc_bytealign_S (w[45], w[46], offset); w[50] = hc_bytealign_S (w[44], w[45], offset); w[49] = hc_bytealign_S (w[43], w[44], offset); w[48] = hc_bytealign_S (w[42], w[43], offset); w[47] = hc_bytealign_S (w[41], w[42], offset); w[46] = hc_bytealign_S (w[40], w[41], offset); w[45] = hc_bytealign_S (w[39], w[40], offset); w[44] = hc_bytealign_S (w[38], w[39], offset); w[43] = hc_bytealign_S (w[37], w[38], offset); w[42] = hc_bytealign_S (w[36], w[37], offset); w[41] = hc_bytealign_S (w[35], w[36], offset); w[40] = hc_bytealign_S (w[34], w[35], offset); w[39] = hc_bytealign_S (w[33], w[34], offset); w[38] = hc_bytealign_S (w[32], w[33], offset); w[37] = hc_bytealign_S (w[31], w[32], offset); w[36] = hc_bytealign_S (w[30], w[31], offset); w[35] = hc_bytealign_S (w[29], w[30], offset); w[34] = hc_bytealign_S (w[28], w[29], offset); w[33] = hc_bytealign_S (w[27], w[28], offset); w[32] = hc_bytealign_S (w[26], w[27], offset); w[31] = hc_bytealign_S (w[25], w[26], offset); w[30] = hc_bytealign_S (w[24], w[25], offset); w[29] = hc_bytealign_S (w[23], w[24], offset); w[28] = hc_bytealign_S (w[22], w[23], offset); w[27] = hc_bytealign_S (w[21], w[22], offset); w[26] = hc_bytealign_S (w[20], w[21], offset); w[25] = hc_bytealign_S (w[19], w[20], offset); w[24] = hc_bytealign_S (w[18], w[19], offset); w[23] = hc_bytealign_S (w[17], w[18], offset); w[22] = hc_bytealign_S (w[16], w[17], offset); w[21] = hc_bytealign_S (w[15], w[16], offset); w[20] = hc_bytealign_S (w[14], w[15], offset); w[19] = hc_bytealign_S (w[13], w[14], offset); w[18] = hc_bytealign_S (w[12], w[13], offset); w[17] = hc_bytealign_S (w[11], w[12], offset); w[16] = hc_bytealign_S (w[10], w[11], offset); w[15] = hc_bytealign_S (w[ 9], w[10], offset); w[14] = hc_bytealign_S (w[ 8], w[ 9], offset); w[13] = hc_bytealign_S (w[ 7], w[ 8], offset); w[12] = hc_bytealign_S (w[ 6], w[ 7], offset); w[11] = hc_bytealign_S (w[ 5], w[ 6], offset); w[10] = hc_bytealign_S (w[ 4], w[ 5], offset); w[ 9] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 8] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 7] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 6] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 5] = hc_bytealign_S ( 0, w[ 0], offset); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_bytealign_S (w[56], w[57], offset); w[62] = hc_bytealign_S (w[55], w[56], offset); w[61] = hc_bytealign_S (w[54], w[55], offset); w[60] = hc_bytealign_S (w[53], w[54], offset); w[59] = hc_bytealign_S (w[52], w[53], offset); w[58] = hc_bytealign_S (w[51], w[52], offset); w[57] = hc_bytealign_S (w[50], w[51], offset); w[56] = hc_bytealign_S (w[49], w[50], offset); w[55] = hc_bytealign_S (w[48], w[49], offset); w[54] = hc_bytealign_S (w[47], w[48], offset); w[53] = hc_bytealign_S (w[46], w[47], offset); w[52] = hc_bytealign_S (w[45], w[46], offset); w[51] = hc_bytealign_S (w[44], w[45], offset); w[50] = hc_bytealign_S (w[43], w[44], offset); w[49] = hc_bytealign_S (w[42], w[43], offset); w[48] = hc_bytealign_S (w[41], w[42], offset); w[47] = hc_bytealign_S (w[40], w[41], offset); w[46] = hc_bytealign_S (w[39], w[40], offset); w[45] = hc_bytealign_S (w[38], w[39], offset); w[44] = hc_bytealign_S (w[37], w[38], offset); w[43] = hc_bytealign_S (w[36], w[37], offset); w[42] = hc_bytealign_S (w[35], w[36], offset); w[41] = hc_bytealign_S (w[34], w[35], offset); w[40] = hc_bytealign_S (w[33], w[34], offset); w[39] = hc_bytealign_S (w[32], w[33], offset); w[38] = hc_bytealign_S (w[31], w[32], offset); w[37] = hc_bytealign_S (w[30], w[31], offset); w[36] = hc_bytealign_S (w[29], w[30], offset); w[35] = hc_bytealign_S (w[28], w[29], offset); w[34] = hc_bytealign_S (w[27], w[28], offset); w[33] = hc_bytealign_S (w[26], w[27], offset); w[32] = hc_bytealign_S (w[25], w[26], offset); w[31] = hc_bytealign_S (w[24], w[25], offset); w[30] = hc_bytealign_S (w[23], w[24], offset); w[29] = hc_bytealign_S (w[22], w[23], offset); w[28] = hc_bytealign_S (w[21], w[22], offset); w[27] = hc_bytealign_S (w[20], w[21], offset); w[26] = hc_bytealign_S (w[19], w[20], offset); w[25] = hc_bytealign_S (w[18], w[19], offset); w[24] = hc_bytealign_S (w[17], w[18], offset); w[23] = hc_bytealign_S (w[16], w[17], offset); w[22] = hc_bytealign_S (w[15], w[16], offset); w[21] = hc_bytealign_S (w[14], w[15], offset); w[20] = hc_bytealign_S (w[13], w[14], offset); w[19] = hc_bytealign_S (w[12], w[13], offset); w[18] = hc_bytealign_S (w[11], w[12], offset); w[17] = hc_bytealign_S (w[10], w[11], offset); w[16] = hc_bytealign_S (w[ 9], w[10], offset); w[15] = hc_bytealign_S (w[ 8], w[ 9], offset); w[14] = hc_bytealign_S (w[ 7], w[ 8], offset); w[13] = hc_bytealign_S (w[ 6], w[ 7], offset); w[12] = hc_bytealign_S (w[ 5], w[ 6], offset); w[11] = hc_bytealign_S (w[ 4], w[ 5], offset); w[10] = hc_bytealign_S (w[ 3], w[ 4], offset); w[ 9] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 8] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 7] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 6] = hc_bytealign_S ( 0, w[ 0], offset); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_bytealign_S (w[55], w[56], offset); w[62] = hc_bytealign_S (w[54], w[55], offset); w[61] = hc_bytealign_S (w[53], w[54], offset); w[60] = hc_bytealign_S (w[52], w[53], offset); w[59] = hc_bytealign_S (w[51], w[52], offset); w[58] = hc_bytealign_S (w[50], w[51], offset); w[57] = hc_bytealign_S (w[49], w[50], offset); w[56] = hc_bytealign_S (w[48], w[49], offset); w[55] = hc_bytealign_S (w[47], w[48], offset); w[54] = hc_bytealign_S (w[46], w[47], offset); w[53] = hc_bytealign_S (w[45], w[46], offset); w[52] = hc_bytealign_S (w[44], w[45], offset); w[51] = hc_bytealign_S (w[43], w[44], offset); w[50] = hc_bytealign_S (w[42], w[43], offset); w[49] = hc_bytealign_S (w[41], w[42], offset); w[48] = hc_bytealign_S (w[40], w[41], offset); w[47] = hc_bytealign_S (w[39], w[40], offset); w[46] = hc_bytealign_S (w[38], w[39], offset); w[45] = hc_bytealign_S (w[37], w[38], offset); w[44] = hc_bytealign_S (w[36], w[37], offset); w[43] = hc_bytealign_S (w[35], w[36], offset); w[42] = hc_bytealign_S (w[34], w[35], offset); w[41] = hc_bytealign_S (w[33], w[34], offset); w[40] = hc_bytealign_S (w[32], w[33], offset); w[39] = hc_bytealign_S (w[31], w[32], offset); w[38] = hc_bytealign_S (w[30], w[31], offset); w[37] = hc_bytealign_S (w[29], w[30], offset); w[36] = hc_bytealign_S (w[28], w[29], offset); w[35] = hc_bytealign_S (w[27], w[28], offset); w[34] = hc_bytealign_S (w[26], w[27], offset); w[33] = hc_bytealign_S (w[25], w[26], offset); w[32] = hc_bytealign_S (w[24], w[25], offset); w[31] = hc_bytealign_S (w[23], w[24], offset); w[30] = hc_bytealign_S (w[22], w[23], offset); w[29] = hc_bytealign_S (w[21], w[22], offset); w[28] = hc_bytealign_S (w[20], w[21], offset); w[27] = hc_bytealign_S (w[19], w[20], offset); w[26] = hc_bytealign_S (w[18], w[19], offset); w[25] = hc_bytealign_S (w[17], w[18], offset); w[24] = hc_bytealign_S (w[16], w[17], offset); w[23] = hc_bytealign_S (w[15], w[16], offset); w[22] = hc_bytealign_S (w[14], w[15], offset); w[21] = hc_bytealign_S (w[13], w[14], offset); w[20] = hc_bytealign_S (w[12], w[13], offset); w[19] = hc_bytealign_S (w[11], w[12], offset); w[18] = hc_bytealign_S (w[10], w[11], offset); w[17] = hc_bytealign_S (w[ 9], w[10], offset); w[16] = hc_bytealign_S (w[ 8], w[ 9], offset); w[15] = hc_bytealign_S (w[ 7], w[ 8], offset); w[14] = hc_bytealign_S (w[ 6], w[ 7], offset); w[13] = hc_bytealign_S (w[ 5], w[ 6], offset); w[12] = hc_bytealign_S (w[ 4], w[ 5], offset); w[11] = hc_bytealign_S (w[ 3], w[ 4], offset); w[10] = hc_bytealign_S (w[ 2], w[ 3], offset); w[ 9] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 8] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 7] = hc_bytealign_S ( 0, w[ 0], offset); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_bytealign_S (w[54], w[55], offset); w[62] = hc_bytealign_S (w[53], w[54], offset); w[61] = hc_bytealign_S (w[52], w[53], offset); w[60] = hc_bytealign_S (w[51], w[52], offset); w[59] = hc_bytealign_S (w[50], w[51], offset); w[58] = hc_bytealign_S (w[49], w[50], offset); w[57] = hc_bytealign_S (w[48], w[49], offset); w[56] = hc_bytealign_S (w[47], w[48], offset); w[55] = hc_bytealign_S (w[46], w[47], offset); w[54] = hc_bytealign_S (w[45], w[46], offset); w[53] = hc_bytealign_S (w[44], w[45], offset); w[52] = hc_bytealign_S (w[43], w[44], offset); w[51] = hc_bytealign_S (w[42], w[43], offset); w[50] = hc_bytealign_S (w[41], w[42], offset); w[49] = hc_bytealign_S (w[40], w[41], offset); w[48] = hc_bytealign_S (w[39], w[40], offset); w[47] = hc_bytealign_S (w[38], w[39], offset); w[46] = hc_bytealign_S (w[37], w[38], offset); w[45] = hc_bytealign_S (w[36], w[37], offset); w[44] = hc_bytealign_S (w[35], w[36], offset); w[43] = hc_bytealign_S (w[34], w[35], offset); w[42] = hc_bytealign_S (w[33], w[34], offset); w[41] = hc_bytealign_S (w[32], w[33], offset); w[40] = hc_bytealign_S (w[31], w[32], offset); w[39] = hc_bytealign_S (w[30], w[31], offset); w[38] = hc_bytealign_S (w[29], w[30], offset); w[37] = hc_bytealign_S (w[28], w[29], offset); w[36] = hc_bytealign_S (w[27], w[28], offset); w[35] = hc_bytealign_S (w[26], w[27], offset); w[34] = hc_bytealign_S (w[25], w[26], offset); w[33] = hc_bytealign_S (w[24], w[25], offset); w[32] = hc_bytealign_S (w[23], w[24], offset); w[31] = hc_bytealign_S (w[22], w[23], offset); w[30] = hc_bytealign_S (w[21], w[22], offset); w[29] = hc_bytealign_S (w[20], w[21], offset); w[28] = hc_bytealign_S (w[19], w[20], offset); w[27] = hc_bytealign_S (w[18], w[19], offset); w[26] = hc_bytealign_S (w[17], w[18], offset); w[25] = hc_bytealign_S (w[16], w[17], offset); w[24] = hc_bytealign_S (w[15], w[16], offset); w[23] = hc_bytealign_S (w[14], w[15], offset); w[22] = hc_bytealign_S (w[13], w[14], offset); w[21] = hc_bytealign_S (w[12], w[13], offset); w[20] = hc_bytealign_S (w[11], w[12], offset); w[19] = hc_bytealign_S (w[10], w[11], offset); w[18] = hc_bytealign_S (w[ 9], w[10], offset); w[17] = hc_bytealign_S (w[ 8], w[ 9], offset); w[16] = hc_bytealign_S (w[ 7], w[ 8], offset); w[15] = hc_bytealign_S (w[ 6], w[ 7], offset); w[14] = hc_bytealign_S (w[ 5], w[ 6], offset); w[13] = hc_bytealign_S (w[ 4], w[ 5], offset); w[12] = hc_bytealign_S (w[ 3], w[ 4], offset); w[11] = hc_bytealign_S (w[ 2], w[ 3], offset); w[10] = hc_bytealign_S (w[ 1], w[ 2], offset); w[ 9] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 8] = hc_bytealign_S ( 0, w[ 0], offset); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_bytealign_S (w[53], w[54], offset); w[62] = hc_bytealign_S (w[52], w[53], offset); w[61] = hc_bytealign_S (w[51], w[52], offset); w[60] = hc_bytealign_S (w[50], w[51], offset); w[59] = hc_bytealign_S (w[49], w[50], offset); w[58] = hc_bytealign_S (w[48], w[49], offset); w[57] = hc_bytealign_S (w[47], w[48], offset); w[56] = hc_bytealign_S (w[46], w[47], offset); w[55] = hc_bytealign_S (w[45], w[46], offset); w[54] = hc_bytealign_S (w[44], w[45], offset); w[53] = hc_bytealign_S (w[43], w[44], offset); w[52] = hc_bytealign_S (w[42], w[43], offset); w[51] = hc_bytealign_S (w[41], w[42], offset); w[50] = hc_bytealign_S (w[40], w[41], offset); w[49] = hc_bytealign_S (w[39], w[40], offset); w[48] = hc_bytealign_S (w[38], w[39], offset); w[47] = hc_bytealign_S (w[37], w[38], offset); w[46] = hc_bytealign_S (w[36], w[37], offset); w[45] = hc_bytealign_S (w[35], w[36], offset); w[44] = hc_bytealign_S (w[34], w[35], offset); w[43] = hc_bytealign_S (w[33], w[34], offset); w[42] = hc_bytealign_S (w[32], w[33], offset); w[41] = hc_bytealign_S (w[31], w[32], offset); w[40] = hc_bytealign_S (w[30], w[31], offset); w[39] = hc_bytealign_S (w[29], w[30], offset); w[38] = hc_bytealign_S (w[28], w[29], offset); w[37] = hc_bytealign_S (w[27], w[28], offset); w[36] = hc_bytealign_S (w[26], w[27], offset); w[35] = hc_bytealign_S (w[25], w[26], offset); w[34] = hc_bytealign_S (w[24], w[25], offset); w[33] = hc_bytealign_S (w[23], w[24], offset); w[32] = hc_bytealign_S (w[22], w[23], offset); w[31] = hc_bytealign_S (w[21], w[22], offset); w[30] = hc_bytealign_S (w[20], w[21], offset); w[29] = hc_bytealign_S (w[19], w[20], offset); w[28] = hc_bytealign_S (w[18], w[19], offset); w[27] = hc_bytealign_S (w[17], w[18], offset); w[26] = hc_bytealign_S (w[16], w[17], offset); w[25] = hc_bytealign_S (w[15], w[16], offset); w[24] = hc_bytealign_S (w[14], w[15], offset); w[23] = hc_bytealign_S (w[13], w[14], offset); w[22] = hc_bytealign_S (w[12], w[13], offset); w[21] = hc_bytealign_S (w[11], w[12], offset); w[20] = hc_bytealign_S (w[10], w[11], offset); w[19] = hc_bytealign_S (w[ 9], w[10], offset); w[18] = hc_bytealign_S (w[ 8], w[ 9], offset); w[17] = hc_bytealign_S (w[ 7], w[ 8], offset); w[16] = hc_bytealign_S (w[ 6], w[ 7], offset); w[15] = hc_bytealign_S (w[ 5], w[ 6], offset); w[14] = hc_bytealign_S (w[ 4], w[ 5], offset); w[13] = hc_bytealign_S (w[ 3], w[ 4], offset); w[12] = hc_bytealign_S (w[ 2], w[ 3], offset); w[11] = hc_bytealign_S (w[ 1], w[ 2], offset); w[10] = hc_bytealign_S (w[ 0], w[ 1], offset); w[ 9] = hc_bytealign_S ( 0, w[ 0], offset); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_bytealign_S (w[52], w[53], offset); w[62] = hc_bytealign_S (w[51], w[52], offset); w[61] = hc_bytealign_S (w[50], w[51], offset); w[60] = hc_bytealign_S (w[49], w[50], offset); w[59] = hc_bytealign_S (w[48], w[49], offset); w[58] = hc_bytealign_S (w[47], w[48], offset); w[57] = hc_bytealign_S (w[46], w[47], offset); w[56] = hc_bytealign_S (w[45], w[46], offset); w[55] = hc_bytealign_S (w[44], w[45], offset); w[54] = hc_bytealign_S (w[43], w[44], offset); w[53] = hc_bytealign_S (w[42], w[43], offset); w[52] = hc_bytealign_S (w[41], w[42], offset); w[51] = hc_bytealign_S (w[40], w[41], offset); w[50] = hc_bytealign_S (w[39], w[40], offset); w[49] = hc_bytealign_S (w[38], w[39], offset); w[48] = hc_bytealign_S (w[37], w[38], offset); w[47] = hc_bytealign_S (w[36], w[37], offset); w[46] = hc_bytealign_S (w[35], w[36], offset); w[45] = hc_bytealign_S (w[34], w[35], offset); w[44] = hc_bytealign_S (w[33], w[34], offset); w[43] = hc_bytealign_S (w[32], w[33], offset); w[42] = hc_bytealign_S (w[31], w[32], offset); w[41] = hc_bytealign_S (w[30], w[31], offset); w[40] = hc_bytealign_S (w[29], w[30], offset); w[39] = hc_bytealign_S (w[28], w[29], offset); w[38] = hc_bytealign_S (w[27], w[28], offset); w[37] = hc_bytealign_S (w[26], w[27], offset); w[36] = hc_bytealign_S (w[25], w[26], offset); w[35] = hc_bytealign_S (w[24], w[25], offset); w[34] = hc_bytealign_S (w[23], w[24], offset); w[33] = hc_bytealign_S (w[22], w[23], offset); w[32] = hc_bytealign_S (w[21], w[22], offset); w[31] = hc_bytealign_S (w[20], w[21], offset); w[30] = hc_bytealign_S (w[19], w[20], offset); w[29] = hc_bytealign_S (w[18], w[19], offset); w[28] = hc_bytealign_S (w[17], w[18], offset); w[27] = hc_bytealign_S (w[16], w[17], offset); w[26] = hc_bytealign_S (w[15], w[16], offset); w[25] = hc_bytealign_S (w[14], w[15], offset); w[24] = hc_bytealign_S (w[13], w[14], offset); w[23] = hc_bytealign_S (w[12], w[13], offset); w[22] = hc_bytealign_S (w[11], w[12], offset); w[21] = hc_bytealign_S (w[10], w[11], offset); w[20] = hc_bytealign_S (w[ 9], w[10], offset); w[19] = hc_bytealign_S (w[ 8], w[ 9], offset); w[18] = hc_bytealign_S (w[ 7], w[ 8], offset); w[17] = hc_bytealign_S (w[ 6], w[ 7], offset); w[16] = hc_bytealign_S (w[ 5], w[ 6], offset); w[15] = hc_bytealign_S (w[ 4], w[ 5], offset); w[14] = hc_bytealign_S (w[ 3], w[ 4], offset); w[13] = hc_bytealign_S (w[ 2], w[ 3], offset); w[12] = hc_bytealign_S (w[ 1], w[ 2], offset); w[11] = hc_bytealign_S (w[ 0], w[ 1], offset); w[10] = hc_bytealign_S ( 0, w[ 0], offset); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_bytealign_S (w[51], w[52], offset); w[62] = hc_bytealign_S (w[50], w[51], offset); w[61] = hc_bytealign_S (w[49], w[50], offset); w[60] = hc_bytealign_S (w[48], w[49], offset); w[59] = hc_bytealign_S (w[47], w[48], offset); w[58] = hc_bytealign_S (w[46], w[47], offset); w[57] = hc_bytealign_S (w[45], w[46], offset); w[56] = hc_bytealign_S (w[44], w[45], offset); w[55] = hc_bytealign_S (w[43], w[44], offset); w[54] = hc_bytealign_S (w[42], w[43], offset); w[53] = hc_bytealign_S (w[41], w[42], offset); w[52] = hc_bytealign_S (w[40], w[41], offset); w[51] = hc_bytealign_S (w[39], w[40], offset); w[50] = hc_bytealign_S (w[38], w[39], offset); w[49] = hc_bytealign_S (w[37], w[38], offset); w[48] = hc_bytealign_S (w[36], w[37], offset); w[47] = hc_bytealign_S (w[35], w[36], offset); w[46] = hc_bytealign_S (w[34], w[35], offset); w[45] = hc_bytealign_S (w[33], w[34], offset); w[44] = hc_bytealign_S (w[32], w[33], offset); w[43] = hc_bytealign_S (w[31], w[32], offset); w[42] = hc_bytealign_S (w[30], w[31], offset); w[41] = hc_bytealign_S (w[29], w[30], offset); w[40] = hc_bytealign_S (w[28], w[29], offset); w[39] = hc_bytealign_S (w[27], w[28], offset); w[38] = hc_bytealign_S (w[26], w[27], offset); w[37] = hc_bytealign_S (w[25], w[26], offset); w[36] = hc_bytealign_S (w[24], w[25], offset); w[35] = hc_bytealign_S (w[23], w[24], offset); w[34] = hc_bytealign_S (w[22], w[23], offset); w[33] = hc_bytealign_S (w[21], w[22], offset); w[32] = hc_bytealign_S (w[20], w[21], offset); w[31] = hc_bytealign_S (w[19], w[20], offset); w[30] = hc_bytealign_S (w[18], w[19], offset); w[29] = hc_bytealign_S (w[17], w[18], offset); w[28] = hc_bytealign_S (w[16], w[17], offset); w[27] = hc_bytealign_S (w[15], w[16], offset); w[26] = hc_bytealign_S (w[14], w[15], offset); w[25] = hc_bytealign_S (w[13], w[14], offset); w[24] = hc_bytealign_S (w[12], w[13], offset); w[23] = hc_bytealign_S (w[11], w[12], offset); w[22] = hc_bytealign_S (w[10], w[11], offset); w[21] = hc_bytealign_S (w[ 9], w[10], offset); w[20] = hc_bytealign_S (w[ 8], w[ 9], offset); w[19] = hc_bytealign_S (w[ 7], w[ 8], offset); w[18] = hc_bytealign_S (w[ 6], w[ 7], offset); w[17] = hc_bytealign_S (w[ 5], w[ 6], offset); w[16] = hc_bytealign_S (w[ 4], w[ 5], offset); w[15] = hc_bytealign_S (w[ 3], w[ 4], offset); w[14] = hc_bytealign_S (w[ 2], w[ 3], offset); w[13] = hc_bytealign_S (w[ 1], w[ 2], offset); w[12] = hc_bytealign_S (w[ 0], w[ 1], offset); w[11] = hc_bytealign_S ( 0, w[ 0], offset); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_bytealign_S (w[50], w[51], offset); w[62] = hc_bytealign_S (w[49], w[50], offset); w[61] = hc_bytealign_S (w[48], w[49], offset); w[60] = hc_bytealign_S (w[47], w[48], offset); w[59] = hc_bytealign_S (w[46], w[47], offset); w[58] = hc_bytealign_S (w[45], w[46], offset); w[57] = hc_bytealign_S (w[44], w[45], offset); w[56] = hc_bytealign_S (w[43], w[44], offset); w[55] = hc_bytealign_S (w[42], w[43], offset); w[54] = hc_bytealign_S (w[41], w[42], offset); w[53] = hc_bytealign_S (w[40], w[41], offset); w[52] = hc_bytealign_S (w[39], w[40], offset); w[51] = hc_bytealign_S (w[38], w[39], offset); w[50] = hc_bytealign_S (w[37], w[38], offset); w[49] = hc_bytealign_S (w[36], w[37], offset); w[48] = hc_bytealign_S (w[35], w[36], offset); w[47] = hc_bytealign_S (w[34], w[35], offset); w[46] = hc_bytealign_S (w[33], w[34], offset); w[45] = hc_bytealign_S (w[32], w[33], offset); w[44] = hc_bytealign_S (w[31], w[32], offset); w[43] = hc_bytealign_S (w[30], w[31], offset); w[42] = hc_bytealign_S (w[29], w[30], offset); w[41] = hc_bytealign_S (w[28], w[29], offset); w[40] = hc_bytealign_S (w[27], w[28], offset); w[39] = hc_bytealign_S (w[26], w[27], offset); w[38] = hc_bytealign_S (w[25], w[26], offset); w[37] = hc_bytealign_S (w[24], w[25], offset); w[36] = hc_bytealign_S (w[23], w[24], offset); w[35] = hc_bytealign_S (w[22], w[23], offset); w[34] = hc_bytealign_S (w[21], w[22], offset); w[33] = hc_bytealign_S (w[20], w[21], offset); w[32] = hc_bytealign_S (w[19], w[20], offset); w[31] = hc_bytealign_S (w[18], w[19], offset); w[30] = hc_bytealign_S (w[17], w[18], offset); w[29] = hc_bytealign_S (w[16], w[17], offset); w[28] = hc_bytealign_S (w[15], w[16], offset); w[27] = hc_bytealign_S (w[14], w[15], offset); w[26] = hc_bytealign_S (w[13], w[14], offset); w[25] = hc_bytealign_S (w[12], w[13], offset); w[24] = hc_bytealign_S (w[11], w[12], offset); w[23] = hc_bytealign_S (w[10], w[11], offset); w[22] = hc_bytealign_S (w[ 9], w[10], offset); w[21] = hc_bytealign_S (w[ 8], w[ 9], offset); w[20] = hc_bytealign_S (w[ 7], w[ 8], offset); w[19] = hc_bytealign_S (w[ 6], w[ 7], offset); w[18] = hc_bytealign_S (w[ 5], w[ 6], offset); w[17] = hc_bytealign_S (w[ 4], w[ 5], offset); w[16] = hc_bytealign_S (w[ 3], w[ 4], offset); w[15] = hc_bytealign_S (w[ 2], w[ 3], offset); w[14] = hc_bytealign_S (w[ 1], w[ 2], offset); w[13] = hc_bytealign_S (w[ 0], w[ 1], offset); w[12] = hc_bytealign_S ( 0, w[ 0], offset); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_bytealign_S (w[49], w[50], offset); w[62] = hc_bytealign_S (w[48], w[49], offset); w[61] = hc_bytealign_S (w[47], w[48], offset); w[60] = hc_bytealign_S (w[46], w[47], offset); w[59] = hc_bytealign_S (w[45], w[46], offset); w[58] = hc_bytealign_S (w[44], w[45], offset); w[57] = hc_bytealign_S (w[43], w[44], offset); w[56] = hc_bytealign_S (w[42], w[43], offset); w[55] = hc_bytealign_S (w[41], w[42], offset); w[54] = hc_bytealign_S (w[40], w[41], offset); w[53] = hc_bytealign_S (w[39], w[40], offset); w[52] = hc_bytealign_S (w[38], w[39], offset); w[51] = hc_bytealign_S (w[37], w[38], offset); w[50] = hc_bytealign_S (w[36], w[37], offset); w[49] = hc_bytealign_S (w[35], w[36], offset); w[48] = hc_bytealign_S (w[34], w[35], offset); w[47] = hc_bytealign_S (w[33], w[34], offset); w[46] = hc_bytealign_S (w[32], w[33], offset); w[45] = hc_bytealign_S (w[31], w[32], offset); w[44] = hc_bytealign_S (w[30], w[31], offset); w[43] = hc_bytealign_S (w[29], w[30], offset); w[42] = hc_bytealign_S (w[28], w[29], offset); w[41] = hc_bytealign_S (w[27], w[28], offset); w[40] = hc_bytealign_S (w[26], w[27], offset); w[39] = hc_bytealign_S (w[25], w[26], offset); w[38] = hc_bytealign_S (w[24], w[25], offset); w[37] = hc_bytealign_S (w[23], w[24], offset); w[36] = hc_bytealign_S (w[22], w[23], offset); w[35] = hc_bytealign_S (w[21], w[22], offset); w[34] = hc_bytealign_S (w[20], w[21], offset); w[33] = hc_bytealign_S (w[19], w[20], offset); w[32] = hc_bytealign_S (w[18], w[19], offset); w[31] = hc_bytealign_S (w[17], w[18], offset); w[30] = hc_bytealign_S (w[16], w[17], offset); w[29] = hc_bytealign_S (w[15], w[16], offset); w[28] = hc_bytealign_S (w[14], w[15], offset); w[27] = hc_bytealign_S (w[13], w[14], offset); w[26] = hc_bytealign_S (w[12], w[13], offset); w[25] = hc_bytealign_S (w[11], w[12], offset); w[24] = hc_bytealign_S (w[10], w[11], offset); w[23] = hc_bytealign_S (w[ 9], w[10], offset); w[22] = hc_bytealign_S (w[ 8], w[ 9], offset); w[21] = hc_bytealign_S (w[ 7], w[ 8], offset); w[20] = hc_bytealign_S (w[ 6], w[ 7], offset); w[19] = hc_bytealign_S (w[ 5], w[ 6], offset); w[18] = hc_bytealign_S (w[ 4], w[ 5], offset); w[17] = hc_bytealign_S (w[ 3], w[ 4], offset); w[16] = hc_bytealign_S (w[ 2], w[ 3], offset); w[15] = hc_bytealign_S (w[ 1], w[ 2], offset); w[14] = hc_bytealign_S (w[ 0], w[ 1], offset); w[13] = hc_bytealign_S ( 0, w[ 0], offset); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_bytealign_S (w[48], w[49], offset); w[62] = hc_bytealign_S (w[47], w[48], offset); w[61] = hc_bytealign_S (w[46], w[47], offset); w[60] = hc_bytealign_S (w[45], w[46], offset); w[59] = hc_bytealign_S (w[44], w[45], offset); w[58] = hc_bytealign_S (w[43], w[44], offset); w[57] = hc_bytealign_S (w[42], w[43], offset); w[56] = hc_bytealign_S (w[41], w[42], offset); w[55] = hc_bytealign_S (w[40], w[41], offset); w[54] = hc_bytealign_S (w[39], w[40], offset); w[53] = hc_bytealign_S (w[38], w[39], offset); w[52] = hc_bytealign_S (w[37], w[38], offset); w[51] = hc_bytealign_S (w[36], w[37], offset); w[50] = hc_bytealign_S (w[35], w[36], offset); w[49] = hc_bytealign_S (w[34], w[35], offset); w[48] = hc_bytealign_S (w[33], w[34], offset); w[47] = hc_bytealign_S (w[32], w[33], offset); w[46] = hc_bytealign_S (w[31], w[32], offset); w[45] = hc_bytealign_S (w[30], w[31], offset); w[44] = hc_bytealign_S (w[29], w[30], offset); w[43] = hc_bytealign_S (w[28], w[29], offset); w[42] = hc_bytealign_S (w[27], w[28], offset); w[41] = hc_bytealign_S (w[26], w[27], offset); w[40] = hc_bytealign_S (w[25], w[26], offset); w[39] = hc_bytealign_S (w[24], w[25], offset); w[38] = hc_bytealign_S (w[23], w[24], offset); w[37] = hc_bytealign_S (w[22], w[23], offset); w[36] = hc_bytealign_S (w[21], w[22], offset); w[35] = hc_bytealign_S (w[20], w[21], offset); w[34] = hc_bytealign_S (w[19], w[20], offset); w[33] = hc_bytealign_S (w[18], w[19], offset); w[32] = hc_bytealign_S (w[17], w[18], offset); w[31] = hc_bytealign_S (w[16], w[17], offset); w[30] = hc_bytealign_S (w[15], w[16], offset); w[29] = hc_bytealign_S (w[14], w[15], offset); w[28] = hc_bytealign_S (w[13], w[14], offset); w[27] = hc_bytealign_S (w[12], w[13], offset); w[26] = hc_bytealign_S (w[11], w[12], offset); w[25] = hc_bytealign_S (w[10], w[11], offset); w[24] = hc_bytealign_S (w[ 9], w[10], offset); w[23] = hc_bytealign_S (w[ 8], w[ 9], offset); w[22] = hc_bytealign_S (w[ 7], w[ 8], offset); w[21] = hc_bytealign_S (w[ 6], w[ 7], offset); w[20] = hc_bytealign_S (w[ 5], w[ 6], offset); w[19] = hc_bytealign_S (w[ 4], w[ 5], offset); w[18] = hc_bytealign_S (w[ 3], w[ 4], offset); w[17] = hc_bytealign_S (w[ 2], w[ 3], offset); w[16] = hc_bytealign_S (w[ 1], w[ 2], offset); w[15] = hc_bytealign_S (w[ 0], w[ 1], offset); w[14] = hc_bytealign_S ( 0, w[ 0], offset); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_bytealign_S (w[47], w[48], offset); w[62] = hc_bytealign_S (w[46], w[47], offset); w[61] = hc_bytealign_S (w[45], w[46], offset); w[60] = hc_bytealign_S (w[44], w[45], offset); w[59] = hc_bytealign_S (w[43], w[44], offset); w[58] = hc_bytealign_S (w[42], w[43], offset); w[57] = hc_bytealign_S (w[41], w[42], offset); w[56] = hc_bytealign_S (w[40], w[41], offset); w[55] = hc_bytealign_S (w[39], w[40], offset); w[54] = hc_bytealign_S (w[38], w[39], offset); w[53] = hc_bytealign_S (w[37], w[38], offset); w[52] = hc_bytealign_S (w[36], w[37], offset); w[51] = hc_bytealign_S (w[35], w[36], offset); w[50] = hc_bytealign_S (w[34], w[35], offset); w[49] = hc_bytealign_S (w[33], w[34], offset); w[48] = hc_bytealign_S (w[32], w[33], offset); w[47] = hc_bytealign_S (w[31], w[32], offset); w[46] = hc_bytealign_S (w[30], w[31], offset); w[45] = hc_bytealign_S (w[29], w[30], offset); w[44] = hc_bytealign_S (w[28], w[29], offset); w[43] = hc_bytealign_S (w[27], w[28], offset); w[42] = hc_bytealign_S (w[26], w[27], offset); w[41] = hc_bytealign_S (w[25], w[26], offset); w[40] = hc_bytealign_S (w[24], w[25], offset); w[39] = hc_bytealign_S (w[23], w[24], offset); w[38] = hc_bytealign_S (w[22], w[23], offset); w[37] = hc_bytealign_S (w[21], w[22], offset); w[36] = hc_bytealign_S (w[20], w[21], offset); w[35] = hc_bytealign_S (w[19], w[20], offset); w[34] = hc_bytealign_S (w[18], w[19], offset); w[33] = hc_bytealign_S (w[17], w[18], offset); w[32] = hc_bytealign_S (w[16], w[17], offset); w[31] = hc_bytealign_S (w[15], w[16], offset); w[30] = hc_bytealign_S (w[14], w[15], offset); w[29] = hc_bytealign_S (w[13], w[14], offset); w[28] = hc_bytealign_S (w[12], w[13], offset); w[27] = hc_bytealign_S (w[11], w[12], offset); w[26] = hc_bytealign_S (w[10], w[11], offset); w[25] = hc_bytealign_S (w[ 9], w[10], offset); w[24] = hc_bytealign_S (w[ 8], w[ 9], offset); w[23] = hc_bytealign_S (w[ 7], w[ 8], offset); w[22] = hc_bytealign_S (w[ 6], w[ 7], offset); w[21] = hc_bytealign_S (w[ 5], w[ 6], offset); w[20] = hc_bytealign_S (w[ 4], w[ 5], offset); w[19] = hc_bytealign_S (w[ 3], w[ 4], offset); w[18] = hc_bytealign_S (w[ 2], w[ 3], offset); w[17] = hc_bytealign_S (w[ 1], w[ 2], offset); w[16] = hc_bytealign_S (w[ 0], w[ 1], offset); w[15] = hc_bytealign_S ( 0, w[ 0], offset); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_bytealign_S (w[46], w[47], offset); w[62] = hc_bytealign_S (w[45], w[46], offset); w[61] = hc_bytealign_S (w[44], w[45], offset); w[60] = hc_bytealign_S (w[43], w[44], offset); w[59] = hc_bytealign_S (w[42], w[43], offset); w[58] = hc_bytealign_S (w[41], w[42], offset); w[57] = hc_bytealign_S (w[40], w[41], offset); w[56] = hc_bytealign_S (w[39], w[40], offset); w[55] = hc_bytealign_S (w[38], w[39], offset); w[54] = hc_bytealign_S (w[37], w[38], offset); w[53] = hc_bytealign_S (w[36], w[37], offset); w[52] = hc_bytealign_S (w[35], w[36], offset); w[51] = hc_bytealign_S (w[34], w[35], offset); w[50] = hc_bytealign_S (w[33], w[34], offset); w[49] = hc_bytealign_S (w[32], w[33], offset); w[48] = hc_bytealign_S (w[31], w[32], offset); w[47] = hc_bytealign_S (w[30], w[31], offset); w[46] = hc_bytealign_S (w[29], w[30], offset); w[45] = hc_bytealign_S (w[28], w[29], offset); w[44] = hc_bytealign_S (w[27], w[28], offset); w[43] = hc_bytealign_S (w[26], w[27], offset); w[42] = hc_bytealign_S (w[25], w[26], offset); w[41] = hc_bytealign_S (w[24], w[25], offset); w[40] = hc_bytealign_S (w[23], w[24], offset); w[39] = hc_bytealign_S (w[22], w[23], offset); w[38] = hc_bytealign_S (w[21], w[22], offset); w[37] = hc_bytealign_S (w[20], w[21], offset); w[36] = hc_bytealign_S (w[19], w[20], offset); w[35] = hc_bytealign_S (w[18], w[19], offset); w[34] = hc_bytealign_S (w[17], w[18], offset); w[33] = hc_bytealign_S (w[16], w[17], offset); w[32] = hc_bytealign_S (w[15], w[16], offset); w[31] = hc_bytealign_S (w[14], w[15], offset); w[30] = hc_bytealign_S (w[13], w[14], offset); w[29] = hc_bytealign_S (w[12], w[13], offset); w[28] = hc_bytealign_S (w[11], w[12], offset); w[27] = hc_bytealign_S (w[10], w[11], offset); w[26] = hc_bytealign_S (w[ 9], w[10], offset); w[25] = hc_bytealign_S (w[ 8], w[ 9], offset); w[24] = hc_bytealign_S (w[ 7], w[ 8], offset); w[23] = hc_bytealign_S (w[ 6], w[ 7], offset); w[22] = hc_bytealign_S (w[ 5], w[ 6], offset); w[21] = hc_bytealign_S (w[ 4], w[ 5], offset); w[20] = hc_bytealign_S (w[ 3], w[ 4], offset); w[19] = hc_bytealign_S (w[ 2], w[ 3], offset); w[18] = hc_bytealign_S (w[ 1], w[ 2], offset); w[17] = hc_bytealign_S (w[ 0], w[ 1], offset); w[16] = hc_bytealign_S ( 0, w[ 0], offset); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_bytealign_S (w[45], w[46], offset); w[62] = hc_bytealign_S (w[44], w[45], offset); w[61] = hc_bytealign_S (w[43], w[44], offset); w[60] = hc_bytealign_S (w[42], w[43], offset); w[59] = hc_bytealign_S (w[41], w[42], offset); w[58] = hc_bytealign_S (w[40], w[41], offset); w[57] = hc_bytealign_S (w[39], w[40], offset); w[56] = hc_bytealign_S (w[38], w[39], offset); w[55] = hc_bytealign_S (w[37], w[38], offset); w[54] = hc_bytealign_S (w[36], w[37], offset); w[53] = hc_bytealign_S (w[35], w[36], offset); w[52] = hc_bytealign_S (w[34], w[35], offset); w[51] = hc_bytealign_S (w[33], w[34], offset); w[50] = hc_bytealign_S (w[32], w[33], offset); w[49] = hc_bytealign_S (w[31], w[32], offset); w[48] = hc_bytealign_S (w[30], w[31], offset); w[47] = hc_bytealign_S (w[29], w[30], offset); w[46] = hc_bytealign_S (w[28], w[29], offset); w[45] = hc_bytealign_S (w[27], w[28], offset); w[44] = hc_bytealign_S (w[26], w[27], offset); w[43] = hc_bytealign_S (w[25], w[26], offset); w[42] = hc_bytealign_S (w[24], w[25], offset); w[41] = hc_bytealign_S (w[23], w[24], offset); w[40] = hc_bytealign_S (w[22], w[23], offset); w[39] = hc_bytealign_S (w[21], w[22], offset); w[38] = hc_bytealign_S (w[20], w[21], offset); w[37] = hc_bytealign_S (w[19], w[20], offset); w[36] = hc_bytealign_S (w[18], w[19], offset); w[35] = hc_bytealign_S (w[17], w[18], offset); w[34] = hc_bytealign_S (w[16], w[17], offset); w[33] = hc_bytealign_S (w[15], w[16], offset); w[32] = hc_bytealign_S (w[14], w[15], offset); w[31] = hc_bytealign_S (w[13], w[14], offset); w[30] = hc_bytealign_S (w[12], w[13], offset); w[29] = hc_bytealign_S (w[11], w[12], offset); w[28] = hc_bytealign_S (w[10], w[11], offset); w[27] = hc_bytealign_S (w[ 9], w[10], offset); w[26] = hc_bytealign_S (w[ 8], w[ 9], offset); w[25] = hc_bytealign_S (w[ 7], w[ 8], offset); w[24] = hc_bytealign_S (w[ 6], w[ 7], offset); w[23] = hc_bytealign_S (w[ 5], w[ 6], offset); w[22] = hc_bytealign_S (w[ 4], w[ 5], offset); w[21] = hc_bytealign_S (w[ 3], w[ 4], offset); w[20] = hc_bytealign_S (w[ 2], w[ 3], offset); w[19] = hc_bytealign_S (w[ 1], w[ 2], offset); w[18] = hc_bytealign_S (w[ 0], w[ 1], offset); w[17] = hc_bytealign_S ( 0, w[ 0], offset); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_bytealign_S (w[44], w[45], offset); w[62] = hc_bytealign_S (w[43], w[44], offset); w[61] = hc_bytealign_S (w[42], w[43], offset); w[60] = hc_bytealign_S (w[41], w[42], offset); w[59] = hc_bytealign_S (w[40], w[41], offset); w[58] = hc_bytealign_S (w[39], w[40], offset); w[57] = hc_bytealign_S (w[38], w[39], offset); w[56] = hc_bytealign_S (w[37], w[38], offset); w[55] = hc_bytealign_S (w[36], w[37], offset); w[54] = hc_bytealign_S (w[35], w[36], offset); w[53] = hc_bytealign_S (w[34], w[35], offset); w[52] = hc_bytealign_S (w[33], w[34], offset); w[51] = hc_bytealign_S (w[32], w[33], offset); w[50] = hc_bytealign_S (w[31], w[32], offset); w[49] = hc_bytealign_S (w[30], w[31], offset); w[48] = hc_bytealign_S (w[29], w[30], offset); w[47] = hc_bytealign_S (w[28], w[29], offset); w[46] = hc_bytealign_S (w[27], w[28], offset); w[45] = hc_bytealign_S (w[26], w[27], offset); w[44] = hc_bytealign_S (w[25], w[26], offset); w[43] = hc_bytealign_S (w[24], w[25], offset); w[42] = hc_bytealign_S (w[23], w[24], offset); w[41] = hc_bytealign_S (w[22], w[23], offset); w[40] = hc_bytealign_S (w[21], w[22], offset); w[39] = hc_bytealign_S (w[20], w[21], offset); w[38] = hc_bytealign_S (w[19], w[20], offset); w[37] = hc_bytealign_S (w[18], w[19], offset); w[36] = hc_bytealign_S (w[17], w[18], offset); w[35] = hc_bytealign_S (w[16], w[17], offset); w[34] = hc_bytealign_S (w[15], w[16], offset); w[33] = hc_bytealign_S (w[14], w[15], offset); w[32] = hc_bytealign_S (w[13], w[14], offset); w[31] = hc_bytealign_S (w[12], w[13], offset); w[30] = hc_bytealign_S (w[11], w[12], offset); w[29] = hc_bytealign_S (w[10], w[11], offset); w[28] = hc_bytealign_S (w[ 9], w[10], offset); w[27] = hc_bytealign_S (w[ 8], w[ 9], offset); w[26] = hc_bytealign_S (w[ 7], w[ 8], offset); w[25] = hc_bytealign_S (w[ 6], w[ 7], offset); w[24] = hc_bytealign_S (w[ 5], w[ 6], offset); w[23] = hc_bytealign_S (w[ 4], w[ 5], offset); w[22] = hc_bytealign_S (w[ 3], w[ 4], offset); w[21] = hc_bytealign_S (w[ 2], w[ 3], offset); w[20] = hc_bytealign_S (w[ 1], w[ 2], offset); w[19] = hc_bytealign_S (w[ 0], w[ 1], offset); w[18] = hc_bytealign_S ( 0, w[ 0], offset); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_bytealign_S (w[43], w[44], offset); w[62] = hc_bytealign_S (w[42], w[43], offset); w[61] = hc_bytealign_S (w[41], w[42], offset); w[60] = hc_bytealign_S (w[40], w[41], offset); w[59] = hc_bytealign_S (w[39], w[40], offset); w[58] = hc_bytealign_S (w[38], w[39], offset); w[57] = hc_bytealign_S (w[37], w[38], offset); w[56] = hc_bytealign_S (w[36], w[37], offset); w[55] = hc_bytealign_S (w[35], w[36], offset); w[54] = hc_bytealign_S (w[34], w[35], offset); w[53] = hc_bytealign_S (w[33], w[34], offset); w[52] = hc_bytealign_S (w[32], w[33], offset); w[51] = hc_bytealign_S (w[31], w[32], offset); w[50] = hc_bytealign_S (w[30], w[31], offset); w[49] = hc_bytealign_S (w[29], w[30], offset); w[48] = hc_bytealign_S (w[28], w[29], offset); w[47] = hc_bytealign_S (w[27], w[28], offset); w[46] = hc_bytealign_S (w[26], w[27], offset); w[45] = hc_bytealign_S (w[25], w[26], offset); w[44] = hc_bytealign_S (w[24], w[25], offset); w[43] = hc_bytealign_S (w[23], w[24], offset); w[42] = hc_bytealign_S (w[22], w[23], offset); w[41] = hc_bytealign_S (w[21], w[22], offset); w[40] = hc_bytealign_S (w[20], w[21], offset); w[39] = hc_bytealign_S (w[19], w[20], offset); w[38] = hc_bytealign_S (w[18], w[19], offset); w[37] = hc_bytealign_S (w[17], w[18], offset); w[36] = hc_bytealign_S (w[16], w[17], offset); w[35] = hc_bytealign_S (w[15], w[16], offset); w[34] = hc_bytealign_S (w[14], w[15], offset); w[33] = hc_bytealign_S (w[13], w[14], offset); w[32] = hc_bytealign_S (w[12], w[13], offset); w[31] = hc_bytealign_S (w[11], w[12], offset); w[30] = hc_bytealign_S (w[10], w[11], offset); w[29] = hc_bytealign_S (w[ 9], w[10], offset); w[28] = hc_bytealign_S (w[ 8], w[ 9], offset); w[27] = hc_bytealign_S (w[ 7], w[ 8], offset); w[26] = hc_bytealign_S (w[ 6], w[ 7], offset); w[25] = hc_bytealign_S (w[ 5], w[ 6], offset); w[24] = hc_bytealign_S (w[ 4], w[ 5], offset); w[23] = hc_bytealign_S (w[ 3], w[ 4], offset); w[22] = hc_bytealign_S (w[ 2], w[ 3], offset); w[21] = hc_bytealign_S (w[ 1], w[ 2], offset); w[20] = hc_bytealign_S (w[ 0], w[ 1], offset); w[19] = hc_bytealign_S ( 0, w[ 0], offset); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_bytealign_S (w[42], w[43], offset); w[62] = hc_bytealign_S (w[41], w[42], offset); w[61] = hc_bytealign_S (w[40], w[41], offset); w[60] = hc_bytealign_S (w[39], w[40], offset); w[59] = hc_bytealign_S (w[38], w[39], offset); w[58] = hc_bytealign_S (w[37], w[38], offset); w[57] = hc_bytealign_S (w[36], w[37], offset); w[56] = hc_bytealign_S (w[35], w[36], offset); w[55] = hc_bytealign_S (w[34], w[35], offset); w[54] = hc_bytealign_S (w[33], w[34], offset); w[53] = hc_bytealign_S (w[32], w[33], offset); w[52] = hc_bytealign_S (w[31], w[32], offset); w[51] = hc_bytealign_S (w[30], w[31], offset); w[50] = hc_bytealign_S (w[29], w[30], offset); w[49] = hc_bytealign_S (w[28], w[29], offset); w[48] = hc_bytealign_S (w[27], w[28], offset); w[47] = hc_bytealign_S (w[26], w[27], offset); w[46] = hc_bytealign_S (w[25], w[26], offset); w[45] = hc_bytealign_S (w[24], w[25], offset); w[44] = hc_bytealign_S (w[23], w[24], offset); w[43] = hc_bytealign_S (w[22], w[23], offset); w[42] = hc_bytealign_S (w[21], w[22], offset); w[41] = hc_bytealign_S (w[20], w[21], offset); w[40] = hc_bytealign_S (w[19], w[20], offset); w[39] = hc_bytealign_S (w[18], w[19], offset); w[38] = hc_bytealign_S (w[17], w[18], offset); w[37] = hc_bytealign_S (w[16], w[17], offset); w[36] = hc_bytealign_S (w[15], w[16], offset); w[35] = hc_bytealign_S (w[14], w[15], offset); w[34] = hc_bytealign_S (w[13], w[14], offset); w[33] = hc_bytealign_S (w[12], w[13], offset); w[32] = hc_bytealign_S (w[11], w[12], offset); w[31] = hc_bytealign_S (w[10], w[11], offset); w[30] = hc_bytealign_S (w[ 9], w[10], offset); w[29] = hc_bytealign_S (w[ 8], w[ 9], offset); w[28] = hc_bytealign_S (w[ 7], w[ 8], offset); w[27] = hc_bytealign_S (w[ 6], w[ 7], offset); w[26] = hc_bytealign_S (w[ 5], w[ 6], offset); w[25] = hc_bytealign_S (w[ 4], w[ 5], offset); w[24] = hc_bytealign_S (w[ 3], w[ 4], offset); w[23] = hc_bytealign_S (w[ 2], w[ 3], offset); w[22] = hc_bytealign_S (w[ 1], w[ 2], offset); w[21] = hc_bytealign_S (w[ 0], w[ 1], offset); w[20] = hc_bytealign_S ( 0, w[ 0], offset); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_bytealign_S (w[41], w[42], offset); w[62] = hc_bytealign_S (w[40], w[41], offset); w[61] = hc_bytealign_S (w[39], w[40], offset); w[60] = hc_bytealign_S (w[38], w[39], offset); w[59] = hc_bytealign_S (w[37], w[38], offset); w[58] = hc_bytealign_S (w[36], w[37], offset); w[57] = hc_bytealign_S (w[35], w[36], offset); w[56] = hc_bytealign_S (w[34], w[35], offset); w[55] = hc_bytealign_S (w[33], w[34], offset); w[54] = hc_bytealign_S (w[32], w[33], offset); w[53] = hc_bytealign_S (w[31], w[32], offset); w[52] = hc_bytealign_S (w[30], w[31], offset); w[51] = hc_bytealign_S (w[29], w[30], offset); w[50] = hc_bytealign_S (w[28], w[29], offset); w[49] = hc_bytealign_S (w[27], w[28], offset); w[48] = hc_bytealign_S (w[26], w[27], offset); w[47] = hc_bytealign_S (w[25], w[26], offset); w[46] = hc_bytealign_S (w[24], w[25], offset); w[45] = hc_bytealign_S (w[23], w[24], offset); w[44] = hc_bytealign_S (w[22], w[23], offset); w[43] = hc_bytealign_S (w[21], w[22], offset); w[42] = hc_bytealign_S (w[20], w[21], offset); w[41] = hc_bytealign_S (w[19], w[20], offset); w[40] = hc_bytealign_S (w[18], w[19], offset); w[39] = hc_bytealign_S (w[17], w[18], offset); w[38] = hc_bytealign_S (w[16], w[17], offset); w[37] = hc_bytealign_S (w[15], w[16], offset); w[36] = hc_bytealign_S (w[14], w[15], offset); w[35] = hc_bytealign_S (w[13], w[14], offset); w[34] = hc_bytealign_S (w[12], w[13], offset); w[33] = hc_bytealign_S (w[11], w[12], offset); w[32] = hc_bytealign_S (w[10], w[11], offset); w[31] = hc_bytealign_S (w[ 9], w[10], offset); w[30] = hc_bytealign_S (w[ 8], w[ 9], offset); w[29] = hc_bytealign_S (w[ 7], w[ 8], offset); w[28] = hc_bytealign_S (w[ 6], w[ 7], offset); w[27] = hc_bytealign_S (w[ 5], w[ 6], offset); w[26] = hc_bytealign_S (w[ 4], w[ 5], offset); w[25] = hc_bytealign_S (w[ 3], w[ 4], offset); w[24] = hc_bytealign_S (w[ 2], w[ 3], offset); w[23] = hc_bytealign_S (w[ 1], w[ 2], offset); w[22] = hc_bytealign_S (w[ 0], w[ 1], offset); w[21] = hc_bytealign_S ( 0, w[ 0], offset); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_bytealign_S (w[40], w[41], offset); w[62] = hc_bytealign_S (w[39], w[40], offset); w[61] = hc_bytealign_S (w[38], w[39], offset); w[60] = hc_bytealign_S (w[37], w[38], offset); w[59] = hc_bytealign_S (w[36], w[37], offset); w[58] = hc_bytealign_S (w[35], w[36], offset); w[57] = hc_bytealign_S (w[34], w[35], offset); w[56] = hc_bytealign_S (w[33], w[34], offset); w[55] = hc_bytealign_S (w[32], w[33], offset); w[54] = hc_bytealign_S (w[31], w[32], offset); w[53] = hc_bytealign_S (w[30], w[31], offset); w[52] = hc_bytealign_S (w[29], w[30], offset); w[51] = hc_bytealign_S (w[28], w[29], offset); w[50] = hc_bytealign_S (w[27], w[28], offset); w[49] = hc_bytealign_S (w[26], w[27], offset); w[48] = hc_bytealign_S (w[25], w[26], offset); w[47] = hc_bytealign_S (w[24], w[25], offset); w[46] = hc_bytealign_S (w[23], w[24], offset); w[45] = hc_bytealign_S (w[22], w[23], offset); w[44] = hc_bytealign_S (w[21], w[22], offset); w[43] = hc_bytealign_S (w[20], w[21], offset); w[42] = hc_bytealign_S (w[19], w[20], offset); w[41] = hc_bytealign_S (w[18], w[19], offset); w[40] = hc_bytealign_S (w[17], w[18], offset); w[39] = hc_bytealign_S (w[16], w[17], offset); w[38] = hc_bytealign_S (w[15], w[16], offset); w[37] = hc_bytealign_S (w[14], w[15], offset); w[36] = hc_bytealign_S (w[13], w[14], offset); w[35] = hc_bytealign_S (w[12], w[13], offset); w[34] = hc_bytealign_S (w[11], w[12], offset); w[33] = hc_bytealign_S (w[10], w[11], offset); w[32] = hc_bytealign_S (w[ 9], w[10], offset); w[31] = hc_bytealign_S (w[ 8], w[ 9], offset); w[30] = hc_bytealign_S (w[ 7], w[ 8], offset); w[29] = hc_bytealign_S (w[ 6], w[ 7], offset); w[28] = hc_bytealign_S (w[ 5], w[ 6], offset); w[27] = hc_bytealign_S (w[ 4], w[ 5], offset); w[26] = hc_bytealign_S (w[ 3], w[ 4], offset); w[25] = hc_bytealign_S (w[ 2], w[ 3], offset); w[24] = hc_bytealign_S (w[ 1], w[ 2], offset); w[23] = hc_bytealign_S (w[ 0], w[ 1], offset); w[22] = hc_bytealign_S ( 0, w[ 0], offset); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_bytealign_S (w[39], w[40], offset); w[62] = hc_bytealign_S (w[38], w[39], offset); w[61] = hc_bytealign_S (w[37], w[38], offset); w[60] = hc_bytealign_S (w[36], w[37], offset); w[59] = hc_bytealign_S (w[35], w[36], offset); w[58] = hc_bytealign_S (w[34], w[35], offset); w[57] = hc_bytealign_S (w[33], w[34], offset); w[56] = hc_bytealign_S (w[32], w[33], offset); w[55] = hc_bytealign_S (w[31], w[32], offset); w[54] = hc_bytealign_S (w[30], w[31], offset); w[53] = hc_bytealign_S (w[29], w[30], offset); w[52] = hc_bytealign_S (w[28], w[29], offset); w[51] = hc_bytealign_S (w[27], w[28], offset); w[50] = hc_bytealign_S (w[26], w[27], offset); w[49] = hc_bytealign_S (w[25], w[26], offset); w[48] = hc_bytealign_S (w[24], w[25], offset); w[47] = hc_bytealign_S (w[23], w[24], offset); w[46] = hc_bytealign_S (w[22], w[23], offset); w[45] = hc_bytealign_S (w[21], w[22], offset); w[44] = hc_bytealign_S (w[20], w[21], offset); w[43] = hc_bytealign_S (w[19], w[20], offset); w[42] = hc_bytealign_S (w[18], w[19], offset); w[41] = hc_bytealign_S (w[17], w[18], offset); w[40] = hc_bytealign_S (w[16], w[17], offset); w[39] = hc_bytealign_S (w[15], w[16], offset); w[38] = hc_bytealign_S (w[14], w[15], offset); w[37] = hc_bytealign_S (w[13], w[14], offset); w[36] = hc_bytealign_S (w[12], w[13], offset); w[35] = hc_bytealign_S (w[11], w[12], offset); w[34] = hc_bytealign_S (w[10], w[11], offset); w[33] = hc_bytealign_S (w[ 9], w[10], offset); w[32] = hc_bytealign_S (w[ 8], w[ 9], offset); w[31] = hc_bytealign_S (w[ 7], w[ 8], offset); w[30] = hc_bytealign_S (w[ 6], w[ 7], offset); w[29] = hc_bytealign_S (w[ 5], w[ 6], offset); w[28] = hc_bytealign_S (w[ 4], w[ 5], offset); w[27] = hc_bytealign_S (w[ 3], w[ 4], offset); w[26] = hc_bytealign_S (w[ 2], w[ 3], offset); w[25] = hc_bytealign_S (w[ 1], w[ 2], offset); w[24] = hc_bytealign_S (w[ 0], w[ 1], offset); w[23] = hc_bytealign_S ( 0, w[ 0], offset); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_bytealign_S (w[38], w[39], offset); w[62] = hc_bytealign_S (w[37], w[38], offset); w[61] = hc_bytealign_S (w[36], w[37], offset); w[60] = hc_bytealign_S (w[35], w[36], offset); w[59] = hc_bytealign_S (w[34], w[35], offset); w[58] = hc_bytealign_S (w[33], w[34], offset); w[57] = hc_bytealign_S (w[32], w[33], offset); w[56] = hc_bytealign_S (w[31], w[32], offset); w[55] = hc_bytealign_S (w[30], w[31], offset); w[54] = hc_bytealign_S (w[29], w[30], offset); w[53] = hc_bytealign_S (w[28], w[29], offset); w[52] = hc_bytealign_S (w[27], w[28], offset); w[51] = hc_bytealign_S (w[26], w[27], offset); w[50] = hc_bytealign_S (w[25], w[26], offset); w[49] = hc_bytealign_S (w[24], w[25], offset); w[48] = hc_bytealign_S (w[23], w[24], offset); w[47] = hc_bytealign_S (w[22], w[23], offset); w[46] = hc_bytealign_S (w[21], w[22], offset); w[45] = hc_bytealign_S (w[20], w[21], offset); w[44] = hc_bytealign_S (w[19], w[20], offset); w[43] = hc_bytealign_S (w[18], w[19], offset); w[42] = hc_bytealign_S (w[17], w[18], offset); w[41] = hc_bytealign_S (w[16], w[17], offset); w[40] = hc_bytealign_S (w[15], w[16], offset); w[39] = hc_bytealign_S (w[14], w[15], offset); w[38] = hc_bytealign_S (w[13], w[14], offset); w[37] = hc_bytealign_S (w[12], w[13], offset); w[36] = hc_bytealign_S (w[11], w[12], offset); w[35] = hc_bytealign_S (w[10], w[11], offset); w[34] = hc_bytealign_S (w[ 9], w[10], offset); w[33] = hc_bytealign_S (w[ 8], w[ 9], offset); w[32] = hc_bytealign_S (w[ 7], w[ 8], offset); w[31] = hc_bytealign_S (w[ 6], w[ 7], offset); w[30] = hc_bytealign_S (w[ 5], w[ 6], offset); w[29] = hc_bytealign_S (w[ 4], w[ 5], offset); w[28] = hc_bytealign_S (w[ 3], w[ 4], offset); w[27] = hc_bytealign_S (w[ 2], w[ 3], offset); w[26] = hc_bytealign_S (w[ 1], w[ 2], offset); w[25] = hc_bytealign_S (w[ 0], w[ 1], offset); w[24] = hc_bytealign_S ( 0, w[ 0], offset); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_bytealign_S (w[37], w[38], offset); w[62] = hc_bytealign_S (w[36], w[37], offset); w[61] = hc_bytealign_S (w[35], w[36], offset); w[60] = hc_bytealign_S (w[34], w[35], offset); w[59] = hc_bytealign_S (w[33], w[34], offset); w[58] = hc_bytealign_S (w[32], w[33], offset); w[57] = hc_bytealign_S (w[31], w[32], offset); w[56] = hc_bytealign_S (w[30], w[31], offset); w[55] = hc_bytealign_S (w[29], w[30], offset); w[54] = hc_bytealign_S (w[28], w[29], offset); w[53] = hc_bytealign_S (w[27], w[28], offset); w[52] = hc_bytealign_S (w[26], w[27], offset); w[51] = hc_bytealign_S (w[25], w[26], offset); w[50] = hc_bytealign_S (w[24], w[25], offset); w[49] = hc_bytealign_S (w[23], w[24], offset); w[48] = hc_bytealign_S (w[22], w[23], offset); w[47] = hc_bytealign_S (w[21], w[22], offset); w[46] = hc_bytealign_S (w[20], w[21], offset); w[45] = hc_bytealign_S (w[19], w[20], offset); w[44] = hc_bytealign_S (w[18], w[19], offset); w[43] = hc_bytealign_S (w[17], w[18], offset); w[42] = hc_bytealign_S (w[16], w[17], offset); w[41] = hc_bytealign_S (w[15], w[16], offset); w[40] = hc_bytealign_S (w[14], w[15], offset); w[39] = hc_bytealign_S (w[13], w[14], offset); w[38] = hc_bytealign_S (w[12], w[13], offset); w[37] = hc_bytealign_S (w[11], w[12], offset); w[36] = hc_bytealign_S (w[10], w[11], offset); w[35] = hc_bytealign_S (w[ 9], w[10], offset); w[34] = hc_bytealign_S (w[ 8], w[ 9], offset); w[33] = hc_bytealign_S (w[ 7], w[ 8], offset); w[32] = hc_bytealign_S (w[ 6], w[ 7], offset); w[31] = hc_bytealign_S (w[ 5], w[ 6], offset); w[30] = hc_bytealign_S (w[ 4], w[ 5], offset); w[29] = hc_bytealign_S (w[ 3], w[ 4], offset); w[28] = hc_bytealign_S (w[ 2], w[ 3], offset); w[27] = hc_bytealign_S (w[ 1], w[ 2], offset); w[26] = hc_bytealign_S (w[ 0], w[ 1], offset); w[25] = hc_bytealign_S ( 0, w[ 0], offset); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_bytealign_S (w[36], w[37], offset); w[62] = hc_bytealign_S (w[35], w[36], offset); w[61] = hc_bytealign_S (w[34], w[35], offset); w[60] = hc_bytealign_S (w[33], w[34], offset); w[59] = hc_bytealign_S (w[32], w[33], offset); w[58] = hc_bytealign_S (w[31], w[32], offset); w[57] = hc_bytealign_S (w[30], w[31], offset); w[56] = hc_bytealign_S (w[29], w[30], offset); w[55] = hc_bytealign_S (w[28], w[29], offset); w[54] = hc_bytealign_S (w[27], w[28], offset); w[53] = hc_bytealign_S (w[26], w[27], offset); w[52] = hc_bytealign_S (w[25], w[26], offset); w[51] = hc_bytealign_S (w[24], w[25], offset); w[50] = hc_bytealign_S (w[23], w[24], offset); w[49] = hc_bytealign_S (w[22], w[23], offset); w[48] = hc_bytealign_S (w[21], w[22], offset); w[47] = hc_bytealign_S (w[20], w[21], offset); w[46] = hc_bytealign_S (w[19], w[20], offset); w[45] = hc_bytealign_S (w[18], w[19], offset); w[44] = hc_bytealign_S (w[17], w[18], offset); w[43] = hc_bytealign_S (w[16], w[17], offset); w[42] = hc_bytealign_S (w[15], w[16], offset); w[41] = hc_bytealign_S (w[14], w[15], offset); w[40] = hc_bytealign_S (w[13], w[14], offset); w[39] = hc_bytealign_S (w[12], w[13], offset); w[38] = hc_bytealign_S (w[11], w[12], offset); w[37] = hc_bytealign_S (w[10], w[11], offset); w[36] = hc_bytealign_S (w[ 9], w[10], offset); w[35] = hc_bytealign_S (w[ 8], w[ 9], offset); w[34] = hc_bytealign_S (w[ 7], w[ 8], offset); w[33] = hc_bytealign_S (w[ 6], w[ 7], offset); w[32] = hc_bytealign_S (w[ 5], w[ 6], offset); w[31] = hc_bytealign_S (w[ 4], w[ 5], offset); w[30] = hc_bytealign_S (w[ 3], w[ 4], offset); w[29] = hc_bytealign_S (w[ 2], w[ 3], offset); w[28] = hc_bytealign_S (w[ 1], w[ 2], offset); w[27] = hc_bytealign_S (w[ 0], w[ 1], offset); w[26] = hc_bytealign_S ( 0, w[ 0], offset); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_bytealign_S (w[35], w[36], offset); w[62] = hc_bytealign_S (w[34], w[35], offset); w[61] = hc_bytealign_S (w[33], w[34], offset); w[60] = hc_bytealign_S (w[32], w[33], offset); w[59] = hc_bytealign_S (w[31], w[32], offset); w[58] = hc_bytealign_S (w[30], w[31], offset); w[57] = hc_bytealign_S (w[29], w[30], offset); w[56] = hc_bytealign_S (w[28], w[29], offset); w[55] = hc_bytealign_S (w[27], w[28], offset); w[54] = hc_bytealign_S (w[26], w[27], offset); w[53] = hc_bytealign_S (w[25], w[26], offset); w[52] = hc_bytealign_S (w[24], w[25], offset); w[51] = hc_bytealign_S (w[23], w[24], offset); w[50] = hc_bytealign_S (w[22], w[23], offset); w[49] = hc_bytealign_S (w[21], w[22], offset); w[48] = hc_bytealign_S (w[20], w[21], offset); w[47] = hc_bytealign_S (w[19], w[20], offset); w[46] = hc_bytealign_S (w[18], w[19], offset); w[45] = hc_bytealign_S (w[17], w[18], offset); w[44] = hc_bytealign_S (w[16], w[17], offset); w[43] = hc_bytealign_S (w[15], w[16], offset); w[42] = hc_bytealign_S (w[14], w[15], offset); w[41] = hc_bytealign_S (w[13], w[14], offset); w[40] = hc_bytealign_S (w[12], w[13], offset); w[39] = hc_bytealign_S (w[11], w[12], offset); w[38] = hc_bytealign_S (w[10], w[11], offset); w[37] = hc_bytealign_S (w[ 9], w[10], offset); w[36] = hc_bytealign_S (w[ 8], w[ 9], offset); w[35] = hc_bytealign_S (w[ 7], w[ 8], offset); w[34] = hc_bytealign_S (w[ 6], w[ 7], offset); w[33] = hc_bytealign_S (w[ 5], w[ 6], offset); w[32] = hc_bytealign_S (w[ 4], w[ 5], offset); w[31] = hc_bytealign_S (w[ 3], w[ 4], offset); w[30] = hc_bytealign_S (w[ 2], w[ 3], offset); w[29] = hc_bytealign_S (w[ 1], w[ 2], offset); w[28] = hc_bytealign_S (w[ 0], w[ 1], offset); w[27] = hc_bytealign_S ( 0, w[ 0], offset); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_bytealign_S (w[34], w[35], offset); w[62] = hc_bytealign_S (w[33], w[34], offset); w[61] = hc_bytealign_S (w[32], w[33], offset); w[60] = hc_bytealign_S (w[31], w[32], offset); w[59] = hc_bytealign_S (w[30], w[31], offset); w[58] = hc_bytealign_S (w[29], w[30], offset); w[57] = hc_bytealign_S (w[28], w[29], offset); w[56] = hc_bytealign_S (w[27], w[28], offset); w[55] = hc_bytealign_S (w[26], w[27], offset); w[54] = hc_bytealign_S (w[25], w[26], offset); w[53] = hc_bytealign_S (w[24], w[25], offset); w[52] = hc_bytealign_S (w[23], w[24], offset); w[51] = hc_bytealign_S (w[22], w[23], offset); w[50] = hc_bytealign_S (w[21], w[22], offset); w[49] = hc_bytealign_S (w[20], w[21], offset); w[48] = hc_bytealign_S (w[19], w[20], offset); w[47] = hc_bytealign_S (w[18], w[19], offset); w[46] = hc_bytealign_S (w[17], w[18], offset); w[45] = hc_bytealign_S (w[16], w[17], offset); w[44] = hc_bytealign_S (w[15], w[16], offset); w[43] = hc_bytealign_S (w[14], w[15], offset); w[42] = hc_bytealign_S (w[13], w[14], offset); w[41] = hc_bytealign_S (w[12], w[13], offset); w[40] = hc_bytealign_S (w[11], w[12], offset); w[39] = hc_bytealign_S (w[10], w[11], offset); w[38] = hc_bytealign_S (w[ 9], w[10], offset); w[37] = hc_bytealign_S (w[ 8], w[ 9], offset); w[36] = hc_bytealign_S (w[ 7], w[ 8], offset); w[35] = hc_bytealign_S (w[ 6], w[ 7], offset); w[34] = hc_bytealign_S (w[ 5], w[ 6], offset); w[33] = hc_bytealign_S (w[ 4], w[ 5], offset); w[32] = hc_bytealign_S (w[ 3], w[ 4], offset); w[31] = hc_bytealign_S (w[ 2], w[ 3], offset); w[30] = hc_bytealign_S (w[ 1], w[ 2], offset); w[29] = hc_bytealign_S (w[ 0], w[ 1], offset); w[28] = hc_bytealign_S ( 0, w[ 0], offset); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_bytealign_S (w[33], w[34], offset); w[62] = hc_bytealign_S (w[32], w[33], offset); w[61] = hc_bytealign_S (w[31], w[32], offset); w[60] = hc_bytealign_S (w[30], w[31], offset); w[59] = hc_bytealign_S (w[29], w[30], offset); w[58] = hc_bytealign_S (w[28], w[29], offset); w[57] = hc_bytealign_S (w[27], w[28], offset); w[56] = hc_bytealign_S (w[26], w[27], offset); w[55] = hc_bytealign_S (w[25], w[26], offset); w[54] = hc_bytealign_S (w[24], w[25], offset); w[53] = hc_bytealign_S (w[23], w[24], offset); w[52] = hc_bytealign_S (w[22], w[23], offset); w[51] = hc_bytealign_S (w[21], w[22], offset); w[50] = hc_bytealign_S (w[20], w[21], offset); w[49] = hc_bytealign_S (w[19], w[20], offset); w[48] = hc_bytealign_S (w[18], w[19], offset); w[47] = hc_bytealign_S (w[17], w[18], offset); w[46] = hc_bytealign_S (w[16], w[17], offset); w[45] = hc_bytealign_S (w[15], w[16], offset); w[44] = hc_bytealign_S (w[14], w[15], offset); w[43] = hc_bytealign_S (w[13], w[14], offset); w[42] = hc_bytealign_S (w[12], w[13], offset); w[41] = hc_bytealign_S (w[11], w[12], offset); w[40] = hc_bytealign_S (w[10], w[11], offset); w[39] = hc_bytealign_S (w[ 9], w[10], offset); w[38] = hc_bytealign_S (w[ 8], w[ 9], offset); w[37] = hc_bytealign_S (w[ 7], w[ 8], offset); w[36] = hc_bytealign_S (w[ 6], w[ 7], offset); w[35] = hc_bytealign_S (w[ 5], w[ 6], offset); w[34] = hc_bytealign_S (w[ 4], w[ 5], offset); w[33] = hc_bytealign_S (w[ 3], w[ 4], offset); w[32] = hc_bytealign_S (w[ 2], w[ 3], offset); w[31] = hc_bytealign_S (w[ 1], w[ 2], offset); w[30] = hc_bytealign_S (w[ 0], w[ 1], offset); w[29] = hc_bytealign_S ( 0, w[ 0], offset); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_bytealign_S (w[32], w[33], offset); w[62] = hc_bytealign_S (w[31], w[32], offset); w[61] = hc_bytealign_S (w[30], w[31], offset); w[60] = hc_bytealign_S (w[29], w[30], offset); w[59] = hc_bytealign_S (w[28], w[29], offset); w[58] = hc_bytealign_S (w[27], w[28], offset); w[57] = hc_bytealign_S (w[26], w[27], offset); w[56] = hc_bytealign_S (w[25], w[26], offset); w[55] = hc_bytealign_S (w[24], w[25], offset); w[54] = hc_bytealign_S (w[23], w[24], offset); w[53] = hc_bytealign_S (w[22], w[23], offset); w[52] = hc_bytealign_S (w[21], w[22], offset); w[51] = hc_bytealign_S (w[20], w[21], offset); w[50] = hc_bytealign_S (w[19], w[20], offset); w[49] = hc_bytealign_S (w[18], w[19], offset); w[48] = hc_bytealign_S (w[17], w[18], offset); w[47] = hc_bytealign_S (w[16], w[17], offset); w[46] = hc_bytealign_S (w[15], w[16], offset); w[45] = hc_bytealign_S (w[14], w[15], offset); w[44] = hc_bytealign_S (w[13], w[14], offset); w[43] = hc_bytealign_S (w[12], w[13], offset); w[42] = hc_bytealign_S (w[11], w[12], offset); w[41] = hc_bytealign_S (w[10], w[11], offset); w[40] = hc_bytealign_S (w[ 9], w[10], offset); w[39] = hc_bytealign_S (w[ 8], w[ 9], offset); w[38] = hc_bytealign_S (w[ 7], w[ 8], offset); w[37] = hc_bytealign_S (w[ 6], w[ 7], offset); w[36] = hc_bytealign_S (w[ 5], w[ 6], offset); w[35] = hc_bytealign_S (w[ 4], w[ 5], offset); w[34] = hc_bytealign_S (w[ 3], w[ 4], offset); w[33] = hc_bytealign_S (w[ 2], w[ 3], offset); w[32] = hc_bytealign_S (w[ 1], w[ 2], offset); w[31] = hc_bytealign_S (w[ 0], w[ 1], offset); w[30] = hc_bytealign_S ( 0, w[ 0], offset); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_bytealign_S (w[31], w[32], offset); w[62] = hc_bytealign_S (w[30], w[31], offset); w[61] = hc_bytealign_S (w[29], w[30], offset); w[60] = hc_bytealign_S (w[28], w[29], offset); w[59] = hc_bytealign_S (w[27], w[28], offset); w[58] = hc_bytealign_S (w[26], w[27], offset); w[57] = hc_bytealign_S (w[25], w[26], offset); w[56] = hc_bytealign_S (w[24], w[25], offset); w[55] = hc_bytealign_S (w[23], w[24], offset); w[54] = hc_bytealign_S (w[22], w[23], offset); w[53] = hc_bytealign_S (w[21], w[22], offset); w[52] = hc_bytealign_S (w[20], w[21], offset); w[51] = hc_bytealign_S (w[19], w[20], offset); w[50] = hc_bytealign_S (w[18], w[19], offset); w[49] = hc_bytealign_S (w[17], w[18], offset); w[48] = hc_bytealign_S (w[16], w[17], offset); w[47] = hc_bytealign_S (w[15], w[16], offset); w[46] = hc_bytealign_S (w[14], w[15], offset); w[45] = hc_bytealign_S (w[13], w[14], offset); w[44] = hc_bytealign_S (w[12], w[13], offset); w[43] = hc_bytealign_S (w[11], w[12], offset); w[42] = hc_bytealign_S (w[10], w[11], offset); w[41] = hc_bytealign_S (w[ 9], w[10], offset); w[40] = hc_bytealign_S (w[ 8], w[ 9], offset); w[39] = hc_bytealign_S (w[ 7], w[ 8], offset); w[38] = hc_bytealign_S (w[ 6], w[ 7], offset); w[37] = hc_bytealign_S (w[ 5], w[ 6], offset); w[36] = hc_bytealign_S (w[ 4], w[ 5], offset); w[35] = hc_bytealign_S (w[ 3], w[ 4], offset); w[34] = hc_bytealign_S (w[ 2], w[ 3], offset); w[33] = hc_bytealign_S (w[ 1], w[ 2], offset); w[32] = hc_bytealign_S (w[ 0], w[ 1], offset); w[31] = hc_bytealign_S ( 0, w[ 0], offset); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_bytealign_S (w[30], w[31], offset); w[62] = hc_bytealign_S (w[29], w[30], offset); w[61] = hc_bytealign_S (w[28], w[29], offset); w[60] = hc_bytealign_S (w[27], w[28], offset); w[59] = hc_bytealign_S (w[26], w[27], offset); w[58] = hc_bytealign_S (w[25], w[26], offset); w[57] = hc_bytealign_S (w[24], w[25], offset); w[56] = hc_bytealign_S (w[23], w[24], offset); w[55] = hc_bytealign_S (w[22], w[23], offset); w[54] = hc_bytealign_S (w[21], w[22], offset); w[53] = hc_bytealign_S (w[20], w[21], offset); w[52] = hc_bytealign_S (w[19], w[20], offset); w[51] = hc_bytealign_S (w[18], w[19], offset); w[50] = hc_bytealign_S (w[17], w[18], offset); w[49] = hc_bytealign_S (w[16], w[17], offset); w[48] = hc_bytealign_S (w[15], w[16], offset); w[47] = hc_bytealign_S (w[14], w[15], offset); w[46] = hc_bytealign_S (w[13], w[14], offset); w[45] = hc_bytealign_S (w[12], w[13], offset); w[44] = hc_bytealign_S (w[11], w[12], offset); w[43] = hc_bytealign_S (w[10], w[11], offset); w[42] = hc_bytealign_S (w[ 9], w[10], offset); w[41] = hc_bytealign_S (w[ 8], w[ 9], offset); w[40] = hc_bytealign_S (w[ 7], w[ 8], offset); w[39] = hc_bytealign_S (w[ 6], w[ 7], offset); w[38] = hc_bytealign_S (w[ 5], w[ 6], offset); w[37] = hc_bytealign_S (w[ 4], w[ 5], offset); w[36] = hc_bytealign_S (w[ 3], w[ 4], offset); w[35] = hc_bytealign_S (w[ 2], w[ 3], offset); w[34] = hc_bytealign_S (w[ 1], w[ 2], offset); w[33] = hc_bytealign_S (w[ 0], w[ 1], offset); w[32] = hc_bytealign_S ( 0, w[ 0], offset); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_bytealign_S (w[29], w[30], offset); w[62] = hc_bytealign_S (w[28], w[29], offset); w[61] = hc_bytealign_S (w[27], w[28], offset); w[60] = hc_bytealign_S (w[26], w[27], offset); w[59] = hc_bytealign_S (w[25], w[26], offset); w[58] = hc_bytealign_S (w[24], w[25], offset); w[57] = hc_bytealign_S (w[23], w[24], offset); w[56] = hc_bytealign_S (w[22], w[23], offset); w[55] = hc_bytealign_S (w[21], w[22], offset); w[54] = hc_bytealign_S (w[20], w[21], offset); w[53] = hc_bytealign_S (w[19], w[20], offset); w[52] = hc_bytealign_S (w[18], w[19], offset); w[51] = hc_bytealign_S (w[17], w[18], offset); w[50] = hc_bytealign_S (w[16], w[17], offset); w[49] = hc_bytealign_S (w[15], w[16], offset); w[48] = hc_bytealign_S (w[14], w[15], offset); w[47] = hc_bytealign_S (w[13], w[14], offset); w[46] = hc_bytealign_S (w[12], w[13], offset); w[45] = hc_bytealign_S (w[11], w[12], offset); w[44] = hc_bytealign_S (w[10], w[11], offset); w[43] = hc_bytealign_S (w[ 9], w[10], offset); w[42] = hc_bytealign_S (w[ 8], w[ 9], offset); w[41] = hc_bytealign_S (w[ 7], w[ 8], offset); w[40] = hc_bytealign_S (w[ 6], w[ 7], offset); w[39] = hc_bytealign_S (w[ 5], w[ 6], offset); w[38] = hc_bytealign_S (w[ 4], w[ 5], offset); w[37] = hc_bytealign_S (w[ 3], w[ 4], offset); w[36] = hc_bytealign_S (w[ 2], w[ 3], offset); w[35] = hc_bytealign_S (w[ 1], w[ 2], offset); w[34] = hc_bytealign_S (w[ 0], w[ 1], offset); w[33] = hc_bytealign_S ( 0, w[ 0], offset); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_bytealign_S (w[28], w[29], offset); w[62] = hc_bytealign_S (w[27], w[28], offset); w[61] = hc_bytealign_S (w[26], w[27], offset); w[60] = hc_bytealign_S (w[25], w[26], offset); w[59] = hc_bytealign_S (w[24], w[25], offset); w[58] = hc_bytealign_S (w[23], w[24], offset); w[57] = hc_bytealign_S (w[22], w[23], offset); w[56] = hc_bytealign_S (w[21], w[22], offset); w[55] = hc_bytealign_S (w[20], w[21], offset); w[54] = hc_bytealign_S (w[19], w[20], offset); w[53] = hc_bytealign_S (w[18], w[19], offset); w[52] = hc_bytealign_S (w[17], w[18], offset); w[51] = hc_bytealign_S (w[16], w[17], offset); w[50] = hc_bytealign_S (w[15], w[16], offset); w[49] = hc_bytealign_S (w[14], w[15], offset); w[48] = hc_bytealign_S (w[13], w[14], offset); w[47] = hc_bytealign_S (w[12], w[13], offset); w[46] = hc_bytealign_S (w[11], w[12], offset); w[45] = hc_bytealign_S (w[10], w[11], offset); w[44] = hc_bytealign_S (w[ 9], w[10], offset); w[43] = hc_bytealign_S (w[ 8], w[ 9], offset); w[42] = hc_bytealign_S (w[ 7], w[ 8], offset); w[41] = hc_bytealign_S (w[ 6], w[ 7], offset); w[40] = hc_bytealign_S (w[ 5], w[ 6], offset); w[39] = hc_bytealign_S (w[ 4], w[ 5], offset); w[38] = hc_bytealign_S (w[ 3], w[ 4], offset); w[37] = hc_bytealign_S (w[ 2], w[ 3], offset); w[36] = hc_bytealign_S (w[ 1], w[ 2], offset); w[35] = hc_bytealign_S (w[ 0], w[ 1], offset); w[34] = hc_bytealign_S ( 0, w[ 0], offset); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_bytealign_S (w[27], w[28], offset); w[62] = hc_bytealign_S (w[26], w[27], offset); w[61] = hc_bytealign_S (w[25], w[26], offset); w[60] = hc_bytealign_S (w[24], w[25], offset); w[59] = hc_bytealign_S (w[23], w[24], offset); w[58] = hc_bytealign_S (w[22], w[23], offset); w[57] = hc_bytealign_S (w[21], w[22], offset); w[56] = hc_bytealign_S (w[20], w[21], offset); w[55] = hc_bytealign_S (w[19], w[20], offset); w[54] = hc_bytealign_S (w[18], w[19], offset); w[53] = hc_bytealign_S (w[17], w[18], offset); w[52] = hc_bytealign_S (w[16], w[17], offset); w[51] = hc_bytealign_S (w[15], w[16], offset); w[50] = hc_bytealign_S (w[14], w[15], offset); w[49] = hc_bytealign_S (w[13], w[14], offset); w[48] = hc_bytealign_S (w[12], w[13], offset); w[47] = hc_bytealign_S (w[11], w[12], offset); w[46] = hc_bytealign_S (w[10], w[11], offset); w[45] = hc_bytealign_S (w[ 9], w[10], offset); w[44] = hc_bytealign_S (w[ 8], w[ 9], offset); w[43] = hc_bytealign_S (w[ 7], w[ 8], offset); w[42] = hc_bytealign_S (w[ 6], w[ 7], offset); w[41] = hc_bytealign_S (w[ 5], w[ 6], offset); w[40] = hc_bytealign_S (w[ 4], w[ 5], offset); w[39] = hc_bytealign_S (w[ 3], w[ 4], offset); w[38] = hc_bytealign_S (w[ 2], w[ 3], offset); w[37] = hc_bytealign_S (w[ 1], w[ 2], offset); w[36] = hc_bytealign_S (w[ 0], w[ 1], offset); w[35] = hc_bytealign_S ( 0, w[ 0], offset); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_bytealign_S (w[26], w[27], offset); w[62] = hc_bytealign_S (w[25], w[26], offset); w[61] = hc_bytealign_S (w[24], w[25], offset); w[60] = hc_bytealign_S (w[23], w[24], offset); w[59] = hc_bytealign_S (w[22], w[23], offset); w[58] = hc_bytealign_S (w[21], w[22], offset); w[57] = hc_bytealign_S (w[20], w[21], offset); w[56] = hc_bytealign_S (w[19], w[20], offset); w[55] = hc_bytealign_S (w[18], w[19], offset); w[54] = hc_bytealign_S (w[17], w[18], offset); w[53] = hc_bytealign_S (w[16], w[17], offset); w[52] = hc_bytealign_S (w[15], w[16], offset); w[51] = hc_bytealign_S (w[14], w[15], offset); w[50] = hc_bytealign_S (w[13], w[14], offset); w[49] = hc_bytealign_S (w[12], w[13], offset); w[48] = hc_bytealign_S (w[11], w[12], offset); w[47] = hc_bytealign_S (w[10], w[11], offset); w[46] = hc_bytealign_S (w[ 9], w[10], offset); w[45] = hc_bytealign_S (w[ 8], w[ 9], offset); w[44] = hc_bytealign_S (w[ 7], w[ 8], offset); w[43] = hc_bytealign_S (w[ 6], w[ 7], offset); w[42] = hc_bytealign_S (w[ 5], w[ 6], offset); w[41] = hc_bytealign_S (w[ 4], w[ 5], offset); w[40] = hc_bytealign_S (w[ 3], w[ 4], offset); w[39] = hc_bytealign_S (w[ 2], w[ 3], offset); w[38] = hc_bytealign_S (w[ 1], w[ 2], offset); w[37] = hc_bytealign_S (w[ 0], w[ 1], offset); w[36] = hc_bytealign_S ( 0, w[ 0], offset); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_bytealign_S (w[25], w[26], offset); w[62] = hc_bytealign_S (w[24], w[25], offset); w[61] = hc_bytealign_S (w[23], w[24], offset); w[60] = hc_bytealign_S (w[22], w[23], offset); w[59] = hc_bytealign_S (w[21], w[22], offset); w[58] = hc_bytealign_S (w[20], w[21], offset); w[57] = hc_bytealign_S (w[19], w[20], offset); w[56] = hc_bytealign_S (w[18], w[19], offset); w[55] = hc_bytealign_S (w[17], w[18], offset); w[54] = hc_bytealign_S (w[16], w[17], offset); w[53] = hc_bytealign_S (w[15], w[16], offset); w[52] = hc_bytealign_S (w[14], w[15], offset); w[51] = hc_bytealign_S (w[13], w[14], offset); w[50] = hc_bytealign_S (w[12], w[13], offset); w[49] = hc_bytealign_S (w[11], w[12], offset); w[48] = hc_bytealign_S (w[10], w[11], offset); w[47] = hc_bytealign_S (w[ 9], w[10], offset); w[46] = hc_bytealign_S (w[ 8], w[ 9], offset); w[45] = hc_bytealign_S (w[ 7], w[ 8], offset); w[44] = hc_bytealign_S (w[ 6], w[ 7], offset); w[43] = hc_bytealign_S (w[ 5], w[ 6], offset); w[42] = hc_bytealign_S (w[ 4], w[ 5], offset); w[41] = hc_bytealign_S (w[ 3], w[ 4], offset); w[40] = hc_bytealign_S (w[ 2], w[ 3], offset); w[39] = hc_bytealign_S (w[ 1], w[ 2], offset); w[38] = hc_bytealign_S (w[ 0], w[ 1], offset); w[37] = hc_bytealign_S ( 0, w[ 0], offset); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_bytealign_S (w[24], w[25], offset); w[62] = hc_bytealign_S (w[23], w[24], offset); w[61] = hc_bytealign_S (w[22], w[23], offset); w[60] = hc_bytealign_S (w[21], w[22], offset); w[59] = hc_bytealign_S (w[20], w[21], offset); w[58] = hc_bytealign_S (w[19], w[20], offset); w[57] = hc_bytealign_S (w[18], w[19], offset); w[56] = hc_bytealign_S (w[17], w[18], offset); w[55] = hc_bytealign_S (w[16], w[17], offset); w[54] = hc_bytealign_S (w[15], w[16], offset); w[53] = hc_bytealign_S (w[14], w[15], offset); w[52] = hc_bytealign_S (w[13], w[14], offset); w[51] = hc_bytealign_S (w[12], w[13], offset); w[50] = hc_bytealign_S (w[11], w[12], offset); w[49] = hc_bytealign_S (w[10], w[11], offset); w[48] = hc_bytealign_S (w[ 9], w[10], offset); w[47] = hc_bytealign_S (w[ 8], w[ 9], offset); w[46] = hc_bytealign_S (w[ 7], w[ 8], offset); w[45] = hc_bytealign_S (w[ 6], w[ 7], offset); w[44] = hc_bytealign_S (w[ 5], w[ 6], offset); w[43] = hc_bytealign_S (w[ 4], w[ 5], offset); w[42] = hc_bytealign_S (w[ 3], w[ 4], offset); w[41] = hc_bytealign_S (w[ 2], w[ 3], offset); w[40] = hc_bytealign_S (w[ 1], w[ 2], offset); w[39] = hc_bytealign_S (w[ 0], w[ 1], offset); w[38] = hc_bytealign_S ( 0, w[ 0], offset); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_bytealign_S (w[23], w[24], offset); w[62] = hc_bytealign_S (w[22], w[23], offset); w[61] = hc_bytealign_S (w[21], w[22], offset); w[60] = hc_bytealign_S (w[20], w[21], offset); w[59] = hc_bytealign_S (w[19], w[20], offset); w[58] = hc_bytealign_S (w[18], w[19], offset); w[57] = hc_bytealign_S (w[17], w[18], offset); w[56] = hc_bytealign_S (w[16], w[17], offset); w[55] = hc_bytealign_S (w[15], w[16], offset); w[54] = hc_bytealign_S (w[14], w[15], offset); w[53] = hc_bytealign_S (w[13], w[14], offset); w[52] = hc_bytealign_S (w[12], w[13], offset); w[51] = hc_bytealign_S (w[11], w[12], offset); w[50] = hc_bytealign_S (w[10], w[11], offset); w[49] = hc_bytealign_S (w[ 9], w[10], offset); w[48] = hc_bytealign_S (w[ 8], w[ 9], offset); w[47] = hc_bytealign_S (w[ 7], w[ 8], offset); w[46] = hc_bytealign_S (w[ 6], w[ 7], offset); w[45] = hc_bytealign_S (w[ 5], w[ 6], offset); w[44] = hc_bytealign_S (w[ 4], w[ 5], offset); w[43] = hc_bytealign_S (w[ 3], w[ 4], offset); w[42] = hc_bytealign_S (w[ 2], w[ 3], offset); w[41] = hc_bytealign_S (w[ 1], w[ 2], offset); w[40] = hc_bytealign_S (w[ 0], w[ 1], offset); w[39] = hc_bytealign_S ( 0, w[ 0], offset); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_bytealign_S (w[22], w[23], offset); w[62] = hc_bytealign_S (w[21], w[22], offset); w[61] = hc_bytealign_S (w[20], w[21], offset); w[60] = hc_bytealign_S (w[19], w[20], offset); w[59] = hc_bytealign_S (w[18], w[19], offset); w[58] = hc_bytealign_S (w[17], w[18], offset); w[57] = hc_bytealign_S (w[16], w[17], offset); w[56] = hc_bytealign_S (w[15], w[16], offset); w[55] = hc_bytealign_S (w[14], w[15], offset); w[54] = hc_bytealign_S (w[13], w[14], offset); w[53] = hc_bytealign_S (w[12], w[13], offset); w[52] = hc_bytealign_S (w[11], w[12], offset); w[51] = hc_bytealign_S (w[10], w[11], offset); w[50] = hc_bytealign_S (w[ 9], w[10], offset); w[49] = hc_bytealign_S (w[ 8], w[ 9], offset); w[48] = hc_bytealign_S (w[ 7], w[ 8], offset); w[47] = hc_bytealign_S (w[ 6], w[ 7], offset); w[46] = hc_bytealign_S (w[ 5], w[ 6], offset); w[45] = hc_bytealign_S (w[ 4], w[ 5], offset); w[44] = hc_bytealign_S (w[ 3], w[ 4], offset); w[43] = hc_bytealign_S (w[ 2], w[ 3], offset); w[42] = hc_bytealign_S (w[ 1], w[ 2], offset); w[41] = hc_bytealign_S (w[ 0], w[ 1], offset); w[40] = hc_bytealign_S ( 0, w[ 0], offset); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_bytealign_S (w[21], w[22], offset); w[62] = hc_bytealign_S (w[20], w[21], offset); w[61] = hc_bytealign_S (w[19], w[20], offset); w[60] = hc_bytealign_S (w[18], w[19], offset); w[59] = hc_bytealign_S (w[17], w[18], offset); w[58] = hc_bytealign_S (w[16], w[17], offset); w[57] = hc_bytealign_S (w[15], w[16], offset); w[56] = hc_bytealign_S (w[14], w[15], offset); w[55] = hc_bytealign_S (w[13], w[14], offset); w[54] = hc_bytealign_S (w[12], w[13], offset); w[53] = hc_bytealign_S (w[11], w[12], offset); w[52] = hc_bytealign_S (w[10], w[11], offset); w[51] = hc_bytealign_S (w[ 9], w[10], offset); w[50] = hc_bytealign_S (w[ 8], w[ 9], offset); w[49] = hc_bytealign_S (w[ 7], w[ 8], offset); w[48] = hc_bytealign_S (w[ 6], w[ 7], offset); w[47] = hc_bytealign_S (w[ 5], w[ 6], offset); w[46] = hc_bytealign_S (w[ 4], w[ 5], offset); w[45] = hc_bytealign_S (w[ 3], w[ 4], offset); w[44] = hc_bytealign_S (w[ 2], w[ 3], offset); w[43] = hc_bytealign_S (w[ 1], w[ 2], offset); w[42] = hc_bytealign_S (w[ 0], w[ 1], offset); w[41] = hc_bytealign_S ( 0, w[ 0], offset); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_bytealign_S (w[20], w[21], offset); w[62] = hc_bytealign_S (w[19], w[20], offset); w[61] = hc_bytealign_S (w[18], w[19], offset); w[60] = hc_bytealign_S (w[17], w[18], offset); w[59] = hc_bytealign_S (w[16], w[17], offset); w[58] = hc_bytealign_S (w[15], w[16], offset); w[57] = hc_bytealign_S (w[14], w[15], offset); w[56] = hc_bytealign_S (w[13], w[14], offset); w[55] = hc_bytealign_S (w[12], w[13], offset); w[54] = hc_bytealign_S (w[11], w[12], offset); w[53] = hc_bytealign_S (w[10], w[11], offset); w[52] = hc_bytealign_S (w[ 9], w[10], offset); w[51] = hc_bytealign_S (w[ 8], w[ 9], offset); w[50] = hc_bytealign_S (w[ 7], w[ 8], offset); w[49] = hc_bytealign_S (w[ 6], w[ 7], offset); w[48] = hc_bytealign_S (w[ 5], w[ 6], offset); w[47] = hc_bytealign_S (w[ 4], w[ 5], offset); w[46] = hc_bytealign_S (w[ 3], w[ 4], offset); w[45] = hc_bytealign_S (w[ 2], w[ 3], offset); w[44] = hc_bytealign_S (w[ 1], w[ 2], offset); w[43] = hc_bytealign_S (w[ 0], w[ 1], offset); w[42] = hc_bytealign_S ( 0, w[ 0], offset); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_bytealign_S (w[19], w[20], offset); w[62] = hc_bytealign_S (w[18], w[19], offset); w[61] = hc_bytealign_S (w[17], w[18], offset); w[60] = hc_bytealign_S (w[16], w[17], offset); w[59] = hc_bytealign_S (w[15], w[16], offset); w[58] = hc_bytealign_S (w[14], w[15], offset); w[57] = hc_bytealign_S (w[13], w[14], offset); w[56] = hc_bytealign_S (w[12], w[13], offset); w[55] = hc_bytealign_S (w[11], w[12], offset); w[54] = hc_bytealign_S (w[10], w[11], offset); w[53] = hc_bytealign_S (w[ 9], w[10], offset); w[52] = hc_bytealign_S (w[ 8], w[ 9], offset); w[51] = hc_bytealign_S (w[ 7], w[ 8], offset); w[50] = hc_bytealign_S (w[ 6], w[ 7], offset); w[49] = hc_bytealign_S (w[ 5], w[ 6], offset); w[48] = hc_bytealign_S (w[ 4], w[ 5], offset); w[47] = hc_bytealign_S (w[ 3], w[ 4], offset); w[46] = hc_bytealign_S (w[ 2], w[ 3], offset); w[45] = hc_bytealign_S (w[ 1], w[ 2], offset); w[44] = hc_bytealign_S (w[ 0], w[ 1], offset); w[43] = hc_bytealign_S ( 0, w[ 0], offset); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_bytealign_S (w[18], w[19], offset); w[62] = hc_bytealign_S (w[17], w[18], offset); w[61] = hc_bytealign_S (w[16], w[17], offset); w[60] = hc_bytealign_S (w[15], w[16], offset); w[59] = hc_bytealign_S (w[14], w[15], offset); w[58] = hc_bytealign_S (w[13], w[14], offset); w[57] = hc_bytealign_S (w[12], w[13], offset); w[56] = hc_bytealign_S (w[11], w[12], offset); w[55] = hc_bytealign_S (w[10], w[11], offset); w[54] = hc_bytealign_S (w[ 9], w[10], offset); w[53] = hc_bytealign_S (w[ 8], w[ 9], offset); w[52] = hc_bytealign_S (w[ 7], w[ 8], offset); w[51] = hc_bytealign_S (w[ 6], w[ 7], offset); w[50] = hc_bytealign_S (w[ 5], w[ 6], offset); w[49] = hc_bytealign_S (w[ 4], w[ 5], offset); w[48] = hc_bytealign_S (w[ 3], w[ 4], offset); w[47] = hc_bytealign_S (w[ 2], w[ 3], offset); w[46] = hc_bytealign_S (w[ 1], w[ 2], offset); w[45] = hc_bytealign_S (w[ 0], w[ 1], offset); w[44] = hc_bytealign_S ( 0, w[ 0], offset); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_bytealign_S (w[17], w[18], offset); w[62] = hc_bytealign_S (w[16], w[17], offset); w[61] = hc_bytealign_S (w[15], w[16], offset); w[60] = hc_bytealign_S (w[14], w[15], offset); w[59] = hc_bytealign_S (w[13], w[14], offset); w[58] = hc_bytealign_S (w[12], w[13], offset); w[57] = hc_bytealign_S (w[11], w[12], offset); w[56] = hc_bytealign_S (w[10], w[11], offset); w[55] = hc_bytealign_S (w[ 9], w[10], offset); w[54] = hc_bytealign_S (w[ 8], w[ 9], offset); w[53] = hc_bytealign_S (w[ 7], w[ 8], offset); w[52] = hc_bytealign_S (w[ 6], w[ 7], offset); w[51] = hc_bytealign_S (w[ 5], w[ 6], offset); w[50] = hc_bytealign_S (w[ 4], w[ 5], offset); w[49] = hc_bytealign_S (w[ 3], w[ 4], offset); w[48] = hc_bytealign_S (w[ 2], w[ 3], offset); w[47] = hc_bytealign_S (w[ 1], w[ 2], offset); w[46] = hc_bytealign_S (w[ 0], w[ 1], offset); w[45] = hc_bytealign_S ( 0, w[ 0], offset); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_bytealign_S (w[16], w[17], offset); w[62] = hc_bytealign_S (w[15], w[16], offset); w[61] = hc_bytealign_S (w[14], w[15], offset); w[60] = hc_bytealign_S (w[13], w[14], offset); w[59] = hc_bytealign_S (w[12], w[13], offset); w[58] = hc_bytealign_S (w[11], w[12], offset); w[57] = hc_bytealign_S (w[10], w[11], offset); w[56] = hc_bytealign_S (w[ 9], w[10], offset); w[55] = hc_bytealign_S (w[ 8], w[ 9], offset); w[54] = hc_bytealign_S (w[ 7], w[ 8], offset); w[53] = hc_bytealign_S (w[ 6], w[ 7], offset); w[52] = hc_bytealign_S (w[ 5], w[ 6], offset); w[51] = hc_bytealign_S (w[ 4], w[ 5], offset); w[50] = hc_bytealign_S (w[ 3], w[ 4], offset); w[49] = hc_bytealign_S (w[ 2], w[ 3], offset); w[48] = hc_bytealign_S (w[ 1], w[ 2], offset); w[47] = hc_bytealign_S (w[ 0], w[ 1], offset); w[46] = hc_bytealign_S ( 0, w[ 0], offset); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_bytealign_S (w[15], w[16], offset); w[62] = hc_bytealign_S (w[14], w[15], offset); w[61] = hc_bytealign_S (w[13], w[14], offset); w[60] = hc_bytealign_S (w[12], w[13], offset); w[59] = hc_bytealign_S (w[11], w[12], offset); w[58] = hc_bytealign_S (w[10], w[11], offset); w[57] = hc_bytealign_S (w[ 9], w[10], offset); w[56] = hc_bytealign_S (w[ 8], w[ 9], offset); w[55] = hc_bytealign_S (w[ 7], w[ 8], offset); w[54] = hc_bytealign_S (w[ 6], w[ 7], offset); w[53] = hc_bytealign_S (w[ 5], w[ 6], offset); w[52] = hc_bytealign_S (w[ 4], w[ 5], offset); w[51] = hc_bytealign_S (w[ 3], w[ 4], offset); w[50] = hc_bytealign_S (w[ 2], w[ 3], offset); w[49] = hc_bytealign_S (w[ 1], w[ 2], offset); w[48] = hc_bytealign_S (w[ 0], w[ 1], offset); w[47] = hc_bytealign_S ( 0, w[ 0], offset); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_bytealign_S (w[14], w[15], offset); w[62] = hc_bytealign_S (w[13], w[14], offset); w[61] = hc_bytealign_S (w[12], w[13], offset); w[60] = hc_bytealign_S (w[11], w[12], offset); w[59] = hc_bytealign_S (w[10], w[11], offset); w[58] = hc_bytealign_S (w[ 9], w[10], offset); w[57] = hc_bytealign_S (w[ 8], w[ 9], offset); w[56] = hc_bytealign_S (w[ 7], w[ 8], offset); w[55] = hc_bytealign_S (w[ 6], w[ 7], offset); w[54] = hc_bytealign_S (w[ 5], w[ 6], offset); w[53] = hc_bytealign_S (w[ 4], w[ 5], offset); w[52] = hc_bytealign_S (w[ 3], w[ 4], offset); w[51] = hc_bytealign_S (w[ 2], w[ 3], offset); w[50] = hc_bytealign_S (w[ 1], w[ 2], offset); w[49] = hc_bytealign_S (w[ 0], w[ 1], offset); w[48] = hc_bytealign_S ( 0, w[ 0], offset); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_bytealign_S (w[13], w[14], offset); w[62] = hc_bytealign_S (w[12], w[13], offset); w[61] = hc_bytealign_S (w[11], w[12], offset); w[60] = hc_bytealign_S (w[10], w[11], offset); w[59] = hc_bytealign_S (w[ 9], w[10], offset); w[58] = hc_bytealign_S (w[ 8], w[ 9], offset); w[57] = hc_bytealign_S (w[ 7], w[ 8], offset); w[56] = hc_bytealign_S (w[ 6], w[ 7], offset); w[55] = hc_bytealign_S (w[ 5], w[ 6], offset); w[54] = hc_bytealign_S (w[ 4], w[ 5], offset); w[53] = hc_bytealign_S (w[ 3], w[ 4], offset); w[52] = hc_bytealign_S (w[ 2], w[ 3], offset); w[51] = hc_bytealign_S (w[ 1], w[ 2], offset); w[50] = hc_bytealign_S (w[ 0], w[ 1], offset); w[49] = hc_bytealign_S ( 0, w[ 0], offset); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_bytealign_S (w[12], w[13], offset); w[62] = hc_bytealign_S (w[11], w[12], offset); w[61] = hc_bytealign_S (w[10], w[11], offset); w[60] = hc_bytealign_S (w[ 9], w[10], offset); w[59] = hc_bytealign_S (w[ 8], w[ 9], offset); w[58] = hc_bytealign_S (w[ 7], w[ 8], offset); w[57] = hc_bytealign_S (w[ 6], w[ 7], offset); w[56] = hc_bytealign_S (w[ 5], w[ 6], offset); w[55] = hc_bytealign_S (w[ 4], w[ 5], offset); w[54] = hc_bytealign_S (w[ 3], w[ 4], offset); w[53] = hc_bytealign_S (w[ 2], w[ 3], offset); w[52] = hc_bytealign_S (w[ 1], w[ 2], offset); w[51] = hc_bytealign_S (w[ 0], w[ 1], offset); w[50] = hc_bytealign_S ( 0, w[ 0], offset); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_bytealign_S (w[11], w[12], offset); w[62] = hc_bytealign_S (w[10], w[11], offset); w[61] = hc_bytealign_S (w[ 9], w[10], offset); w[60] = hc_bytealign_S (w[ 8], w[ 9], offset); w[59] = hc_bytealign_S (w[ 7], w[ 8], offset); w[58] = hc_bytealign_S (w[ 6], w[ 7], offset); w[57] = hc_bytealign_S (w[ 5], w[ 6], offset); w[56] = hc_bytealign_S (w[ 4], w[ 5], offset); w[55] = hc_bytealign_S (w[ 3], w[ 4], offset); w[54] = hc_bytealign_S (w[ 2], w[ 3], offset); w[53] = hc_bytealign_S (w[ 1], w[ 2], offset); w[52] = hc_bytealign_S (w[ 0], w[ 1], offset); w[51] = hc_bytealign_S ( 0, w[ 0], offset); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_bytealign_S (w[10], w[11], offset); w[62] = hc_bytealign_S (w[ 9], w[10], offset); w[61] = hc_bytealign_S (w[ 8], w[ 9], offset); w[60] = hc_bytealign_S (w[ 7], w[ 8], offset); w[59] = hc_bytealign_S (w[ 6], w[ 7], offset); w[58] = hc_bytealign_S (w[ 5], w[ 6], offset); w[57] = hc_bytealign_S (w[ 4], w[ 5], offset); w[56] = hc_bytealign_S (w[ 3], w[ 4], offset); w[55] = hc_bytealign_S (w[ 2], w[ 3], offset); w[54] = hc_bytealign_S (w[ 1], w[ 2], offset); w[53] = hc_bytealign_S (w[ 0], w[ 1], offset); w[52] = hc_bytealign_S ( 0, w[ 0], offset); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_bytealign_S (w[ 9], w[10], offset); w[62] = hc_bytealign_S (w[ 8], w[ 9], offset); w[61] = hc_bytealign_S (w[ 7], w[ 8], offset); w[60] = hc_bytealign_S (w[ 6], w[ 7], offset); w[59] = hc_bytealign_S (w[ 5], w[ 6], offset); w[58] = hc_bytealign_S (w[ 4], w[ 5], offset); w[57] = hc_bytealign_S (w[ 3], w[ 4], offset); w[56] = hc_bytealign_S (w[ 2], w[ 3], offset); w[55] = hc_bytealign_S (w[ 1], w[ 2], offset); w[54] = hc_bytealign_S (w[ 0], w[ 1], offset); w[53] = hc_bytealign_S ( 0, w[ 0], offset); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_bytealign_S (w[ 8], w[ 9], offset); w[62] = hc_bytealign_S (w[ 7], w[ 8], offset); w[61] = hc_bytealign_S (w[ 6], w[ 7], offset); w[60] = hc_bytealign_S (w[ 5], w[ 6], offset); w[59] = hc_bytealign_S (w[ 4], w[ 5], offset); w[58] = hc_bytealign_S (w[ 3], w[ 4], offset); w[57] = hc_bytealign_S (w[ 2], w[ 3], offset); w[56] = hc_bytealign_S (w[ 1], w[ 2], offset); w[55] = hc_bytealign_S (w[ 0], w[ 1], offset); w[54] = hc_bytealign_S ( 0, w[ 0], offset); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_bytealign_S (w[ 7], w[ 8], offset); w[62] = hc_bytealign_S (w[ 6], w[ 7], offset); w[61] = hc_bytealign_S (w[ 5], w[ 6], offset); w[60] = hc_bytealign_S (w[ 4], w[ 5], offset); w[59] = hc_bytealign_S (w[ 3], w[ 4], offset); w[58] = hc_bytealign_S (w[ 2], w[ 3], offset); w[57] = hc_bytealign_S (w[ 1], w[ 2], offset); w[56] = hc_bytealign_S (w[ 0], w[ 1], offset); w[55] = hc_bytealign_S ( 0, w[ 0], offset); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_bytealign_S (w[ 6], w[ 7], offset); w[62] = hc_bytealign_S (w[ 5], w[ 6], offset); w[61] = hc_bytealign_S (w[ 4], w[ 5], offset); w[60] = hc_bytealign_S (w[ 3], w[ 4], offset); w[59] = hc_bytealign_S (w[ 2], w[ 3], offset); w[58] = hc_bytealign_S (w[ 1], w[ 2], offset); w[57] = hc_bytealign_S (w[ 0], w[ 1], offset); w[56] = hc_bytealign_S ( 0, w[ 0], offset); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_bytealign_S (w[ 5], w[ 6], offset); w[62] = hc_bytealign_S (w[ 4], w[ 5], offset); w[61] = hc_bytealign_S (w[ 3], w[ 4], offset); w[60] = hc_bytealign_S (w[ 2], w[ 3], offset); w[59] = hc_bytealign_S (w[ 1], w[ 2], offset); w[58] = hc_bytealign_S (w[ 0], w[ 1], offset); w[57] = hc_bytealign_S ( 0, w[ 0], offset); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_bytealign_S (w[ 4], w[ 5], offset); w[62] = hc_bytealign_S (w[ 3], w[ 4], offset); w[61] = hc_bytealign_S (w[ 2], w[ 3], offset); w[60] = hc_bytealign_S (w[ 1], w[ 2], offset); w[59] = hc_bytealign_S (w[ 0], w[ 1], offset); w[58] = hc_bytealign_S ( 0, w[ 0], offset); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_bytealign_S (w[ 3], w[ 4], offset); w[62] = hc_bytealign_S (w[ 2], w[ 3], offset); w[61] = hc_bytealign_S (w[ 1], w[ 2], offset); w[60] = hc_bytealign_S (w[ 0], w[ 1], offset); w[59] = hc_bytealign_S ( 0, w[ 0], offset); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_bytealign_S (w[ 2], w[ 3], offset); w[62] = hc_bytealign_S (w[ 1], w[ 2], offset); w[61] = hc_bytealign_S (w[ 0], w[ 1], offset); w[60] = hc_bytealign_S ( 0, w[ 0], offset); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_bytealign_S (w[ 1], w[ 2], offset); w[62] = hc_bytealign_S (w[ 0], w[ 1], offset); w[61] = hc_bytealign_S ( 0, w[ 0], offset); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_bytealign_S (w[ 0], w[ 1], offset); w[62] = hc_bytealign_S ( 0, w[ 0], offset); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_bytealign_S ( 0, w[ 0], offset); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV const int offset_mod_4 = offset & 3; const int offset_minus_4 = 4 - offset_mod_4; #if defined IS_NV const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); #endif switch (offset_switch) { case 0: w[63] = hc_byte_perm_S (w[62], w[63], selector); w[62] = hc_byte_perm_S (w[61], w[62], selector); w[61] = hc_byte_perm_S (w[60], w[61], selector); w[60] = hc_byte_perm_S (w[59], w[60], selector); w[59] = hc_byte_perm_S (w[58], w[59], selector); w[58] = hc_byte_perm_S (w[57], w[58], selector); w[57] = hc_byte_perm_S (w[56], w[57], selector); w[56] = hc_byte_perm_S (w[55], w[56], selector); w[55] = hc_byte_perm_S (w[54], w[55], selector); w[54] = hc_byte_perm_S (w[53], w[54], selector); w[53] = hc_byte_perm_S (w[52], w[53], selector); w[52] = hc_byte_perm_S (w[51], w[52], selector); w[51] = hc_byte_perm_S (w[50], w[51], selector); w[50] = hc_byte_perm_S (w[49], w[50], selector); w[49] = hc_byte_perm_S (w[48], w[49], selector); w[48] = hc_byte_perm_S (w[47], w[48], selector); w[47] = hc_byte_perm_S (w[46], w[47], selector); w[46] = hc_byte_perm_S (w[45], w[46], selector); w[45] = hc_byte_perm_S (w[44], w[45], selector); w[44] = hc_byte_perm_S (w[43], w[44], selector); w[43] = hc_byte_perm_S (w[42], w[43], selector); w[42] = hc_byte_perm_S (w[41], w[42], selector); w[41] = hc_byte_perm_S (w[40], w[41], selector); w[40] = hc_byte_perm_S (w[39], w[40], selector); w[39] = hc_byte_perm_S (w[38], w[39], selector); w[38] = hc_byte_perm_S (w[37], w[38], selector); w[37] = hc_byte_perm_S (w[36], w[37], selector); w[36] = hc_byte_perm_S (w[35], w[36], selector); w[35] = hc_byte_perm_S (w[34], w[35], selector); w[34] = hc_byte_perm_S (w[33], w[34], selector); w[33] = hc_byte_perm_S (w[32], w[33], selector); w[32] = hc_byte_perm_S (w[31], w[32], selector); w[31] = hc_byte_perm_S (w[30], w[31], selector); w[30] = hc_byte_perm_S (w[29], w[30], selector); w[29] = hc_byte_perm_S (w[28], w[29], selector); w[28] = hc_byte_perm_S (w[27], w[28], selector); w[27] = hc_byte_perm_S (w[26], w[27], selector); w[26] = hc_byte_perm_S (w[25], w[26], selector); w[25] = hc_byte_perm_S (w[24], w[25], selector); w[24] = hc_byte_perm_S (w[23], w[24], selector); w[23] = hc_byte_perm_S (w[22], w[23], selector); w[22] = hc_byte_perm_S (w[21], w[22], selector); w[21] = hc_byte_perm_S (w[20], w[21], selector); w[20] = hc_byte_perm_S (w[19], w[20], selector); w[19] = hc_byte_perm_S (w[18], w[19], selector); w[18] = hc_byte_perm_S (w[17], w[18], selector); w[17] = hc_byte_perm_S (w[16], w[17], selector); w[16] = hc_byte_perm_S (w[15], w[16], selector); w[15] = hc_byte_perm_S (w[14], w[15], selector); w[14] = hc_byte_perm_S (w[13], w[14], selector); w[13] = hc_byte_perm_S (w[12], w[13], selector); w[12] = hc_byte_perm_S (w[11], w[12], selector); w[11] = hc_byte_perm_S (w[10], w[11], selector); w[10] = hc_byte_perm_S (w[ 9], w[10], selector); w[ 9] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[ 8] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[ 7] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[ 6] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[ 5] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 4] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 3] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 2] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 1] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 0] = hc_byte_perm_S ( 0, w[ 0], selector); break; case 1: w[63] = hc_byte_perm_S (w[61], w[62], selector); w[62] = hc_byte_perm_S (w[60], w[61], selector); w[61] = hc_byte_perm_S (w[59], w[60], selector); w[60] = hc_byte_perm_S (w[58], w[59], selector); w[59] = hc_byte_perm_S (w[57], w[58], selector); w[58] = hc_byte_perm_S (w[56], w[57], selector); w[57] = hc_byte_perm_S (w[55], w[56], selector); w[56] = hc_byte_perm_S (w[54], w[55], selector); w[55] = hc_byte_perm_S (w[53], w[54], selector); w[54] = hc_byte_perm_S (w[52], w[53], selector); w[53] = hc_byte_perm_S (w[51], w[52], selector); w[52] = hc_byte_perm_S (w[50], w[51], selector); w[51] = hc_byte_perm_S (w[49], w[50], selector); w[50] = hc_byte_perm_S (w[48], w[49], selector); w[49] = hc_byte_perm_S (w[47], w[48], selector); w[48] = hc_byte_perm_S (w[46], w[47], selector); w[47] = hc_byte_perm_S (w[45], w[46], selector); w[46] = hc_byte_perm_S (w[44], w[45], selector); w[45] = hc_byte_perm_S (w[43], w[44], selector); w[44] = hc_byte_perm_S (w[42], w[43], selector); w[43] = hc_byte_perm_S (w[41], w[42], selector); w[42] = hc_byte_perm_S (w[40], w[41], selector); w[41] = hc_byte_perm_S (w[39], w[40], selector); w[40] = hc_byte_perm_S (w[38], w[39], selector); w[39] = hc_byte_perm_S (w[37], w[38], selector); w[38] = hc_byte_perm_S (w[36], w[37], selector); w[37] = hc_byte_perm_S (w[35], w[36], selector); w[36] = hc_byte_perm_S (w[34], w[35], selector); w[35] = hc_byte_perm_S (w[33], w[34], selector); w[34] = hc_byte_perm_S (w[32], w[33], selector); w[33] = hc_byte_perm_S (w[31], w[32], selector); w[32] = hc_byte_perm_S (w[30], w[31], selector); w[31] = hc_byte_perm_S (w[29], w[30], selector); w[30] = hc_byte_perm_S (w[28], w[29], selector); w[29] = hc_byte_perm_S (w[27], w[28], selector); w[28] = hc_byte_perm_S (w[26], w[27], selector); w[27] = hc_byte_perm_S (w[25], w[26], selector); w[26] = hc_byte_perm_S (w[24], w[25], selector); w[25] = hc_byte_perm_S (w[23], w[24], selector); w[24] = hc_byte_perm_S (w[22], w[23], selector); w[23] = hc_byte_perm_S (w[21], w[22], selector); w[22] = hc_byte_perm_S (w[20], w[21], selector); w[21] = hc_byte_perm_S (w[19], w[20], selector); w[20] = hc_byte_perm_S (w[18], w[19], selector); w[19] = hc_byte_perm_S (w[17], w[18], selector); w[18] = hc_byte_perm_S (w[16], w[17], selector); w[17] = hc_byte_perm_S (w[15], w[16], selector); w[16] = hc_byte_perm_S (w[14], w[15], selector); w[15] = hc_byte_perm_S (w[13], w[14], selector); w[14] = hc_byte_perm_S (w[12], w[13], selector); w[13] = hc_byte_perm_S (w[11], w[12], selector); w[12] = hc_byte_perm_S (w[10], w[11], selector); w[11] = hc_byte_perm_S (w[ 9], w[10], selector); w[10] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[ 9] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[ 8] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[ 7] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[ 6] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 5] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 4] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 3] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 2] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 1] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 0] = 0; break; case 2: w[63] = hc_byte_perm_S (w[60], w[61], selector); w[62] = hc_byte_perm_S (w[59], w[60], selector); w[61] = hc_byte_perm_S (w[58], w[59], selector); w[60] = hc_byte_perm_S (w[57], w[58], selector); w[59] = hc_byte_perm_S (w[56], w[57], selector); w[58] = hc_byte_perm_S (w[55], w[56], selector); w[57] = hc_byte_perm_S (w[54], w[55], selector); w[56] = hc_byte_perm_S (w[53], w[54], selector); w[55] = hc_byte_perm_S (w[52], w[53], selector); w[54] = hc_byte_perm_S (w[51], w[52], selector); w[53] = hc_byte_perm_S (w[50], w[51], selector); w[52] = hc_byte_perm_S (w[49], w[50], selector); w[51] = hc_byte_perm_S (w[48], w[49], selector); w[50] = hc_byte_perm_S (w[47], w[48], selector); w[49] = hc_byte_perm_S (w[46], w[47], selector); w[48] = hc_byte_perm_S (w[45], w[46], selector); w[47] = hc_byte_perm_S (w[44], w[45], selector); w[46] = hc_byte_perm_S (w[43], w[44], selector); w[45] = hc_byte_perm_S (w[42], w[43], selector); w[44] = hc_byte_perm_S (w[41], w[42], selector); w[43] = hc_byte_perm_S (w[40], w[41], selector); w[42] = hc_byte_perm_S (w[39], w[40], selector); w[41] = hc_byte_perm_S (w[38], w[39], selector); w[40] = hc_byte_perm_S (w[37], w[38], selector); w[39] = hc_byte_perm_S (w[36], w[37], selector); w[38] = hc_byte_perm_S (w[35], w[36], selector); w[37] = hc_byte_perm_S (w[34], w[35], selector); w[36] = hc_byte_perm_S (w[33], w[34], selector); w[35] = hc_byte_perm_S (w[32], w[33], selector); w[34] = hc_byte_perm_S (w[31], w[32], selector); w[33] = hc_byte_perm_S (w[30], w[31], selector); w[32] = hc_byte_perm_S (w[29], w[30], selector); w[31] = hc_byte_perm_S (w[28], w[29], selector); w[30] = hc_byte_perm_S (w[27], w[28], selector); w[29] = hc_byte_perm_S (w[26], w[27], selector); w[28] = hc_byte_perm_S (w[25], w[26], selector); w[27] = hc_byte_perm_S (w[24], w[25], selector); w[26] = hc_byte_perm_S (w[23], w[24], selector); w[25] = hc_byte_perm_S (w[22], w[23], selector); w[24] = hc_byte_perm_S (w[21], w[22], selector); w[23] = hc_byte_perm_S (w[20], w[21], selector); w[22] = hc_byte_perm_S (w[19], w[20], selector); w[21] = hc_byte_perm_S (w[18], w[19], selector); w[20] = hc_byte_perm_S (w[17], w[18], selector); w[19] = hc_byte_perm_S (w[16], w[17], selector); w[18] = hc_byte_perm_S (w[15], w[16], selector); w[17] = hc_byte_perm_S (w[14], w[15], selector); w[16] = hc_byte_perm_S (w[13], w[14], selector); w[15] = hc_byte_perm_S (w[12], w[13], selector); w[14] = hc_byte_perm_S (w[11], w[12], selector); w[13] = hc_byte_perm_S (w[10], w[11], selector); w[12] = hc_byte_perm_S (w[ 9], w[10], selector); w[11] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[10] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[ 9] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[ 8] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[ 7] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 6] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 5] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 4] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 3] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 2] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_byte_perm_S (w[59], w[60], selector); w[62] = hc_byte_perm_S (w[58], w[59], selector); w[61] = hc_byte_perm_S (w[57], w[58], selector); w[60] = hc_byte_perm_S (w[56], w[57], selector); w[59] = hc_byte_perm_S (w[55], w[56], selector); w[58] = hc_byte_perm_S (w[54], w[55], selector); w[57] = hc_byte_perm_S (w[53], w[54], selector); w[56] = hc_byte_perm_S (w[52], w[53], selector); w[55] = hc_byte_perm_S (w[51], w[52], selector); w[54] = hc_byte_perm_S (w[50], w[51], selector); w[53] = hc_byte_perm_S (w[49], w[50], selector); w[52] = hc_byte_perm_S (w[48], w[49], selector); w[51] = hc_byte_perm_S (w[47], w[48], selector); w[50] = hc_byte_perm_S (w[46], w[47], selector); w[49] = hc_byte_perm_S (w[45], w[46], selector); w[48] = hc_byte_perm_S (w[44], w[45], selector); w[47] = hc_byte_perm_S (w[43], w[44], selector); w[46] = hc_byte_perm_S (w[42], w[43], selector); w[45] = hc_byte_perm_S (w[41], w[42], selector); w[44] = hc_byte_perm_S (w[40], w[41], selector); w[43] = hc_byte_perm_S (w[39], w[40], selector); w[42] = hc_byte_perm_S (w[38], w[39], selector); w[41] = hc_byte_perm_S (w[37], w[38], selector); w[40] = hc_byte_perm_S (w[36], w[37], selector); w[39] = hc_byte_perm_S (w[35], w[36], selector); w[38] = hc_byte_perm_S (w[34], w[35], selector); w[37] = hc_byte_perm_S (w[33], w[34], selector); w[36] = hc_byte_perm_S (w[32], w[33], selector); w[35] = hc_byte_perm_S (w[31], w[32], selector); w[34] = hc_byte_perm_S (w[30], w[31], selector); w[33] = hc_byte_perm_S (w[29], w[30], selector); w[32] = hc_byte_perm_S (w[28], w[29], selector); w[31] = hc_byte_perm_S (w[27], w[28], selector); w[30] = hc_byte_perm_S (w[26], w[27], selector); w[29] = hc_byte_perm_S (w[25], w[26], selector); w[28] = hc_byte_perm_S (w[24], w[25], selector); w[27] = hc_byte_perm_S (w[23], w[24], selector); w[26] = hc_byte_perm_S (w[22], w[23], selector); w[25] = hc_byte_perm_S (w[21], w[22], selector); w[24] = hc_byte_perm_S (w[20], w[21], selector); w[23] = hc_byte_perm_S (w[19], w[20], selector); w[22] = hc_byte_perm_S (w[18], w[19], selector); w[21] = hc_byte_perm_S (w[17], w[18], selector); w[20] = hc_byte_perm_S (w[16], w[17], selector); w[19] = hc_byte_perm_S (w[15], w[16], selector); w[18] = hc_byte_perm_S (w[14], w[15], selector); w[17] = hc_byte_perm_S (w[13], w[14], selector); w[16] = hc_byte_perm_S (w[12], w[13], selector); w[15] = hc_byte_perm_S (w[11], w[12], selector); w[14] = hc_byte_perm_S (w[10], w[11], selector); w[13] = hc_byte_perm_S (w[ 9], w[10], selector); w[12] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[11] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[10] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[ 9] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[ 8] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 7] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 6] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 5] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 4] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 3] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_byte_perm_S (w[58], w[59], selector); w[62] = hc_byte_perm_S (w[57], w[58], selector); w[61] = hc_byte_perm_S (w[56], w[57], selector); w[60] = hc_byte_perm_S (w[55], w[56], selector); w[59] = hc_byte_perm_S (w[54], w[55], selector); w[58] = hc_byte_perm_S (w[53], w[54], selector); w[57] = hc_byte_perm_S (w[52], w[53], selector); w[56] = hc_byte_perm_S (w[51], w[52], selector); w[55] = hc_byte_perm_S (w[50], w[51], selector); w[54] = hc_byte_perm_S (w[49], w[50], selector); w[53] = hc_byte_perm_S (w[48], w[49], selector); w[52] = hc_byte_perm_S (w[47], w[48], selector); w[51] = hc_byte_perm_S (w[46], w[47], selector); w[50] = hc_byte_perm_S (w[45], w[46], selector); w[49] = hc_byte_perm_S (w[44], w[45], selector); w[48] = hc_byte_perm_S (w[43], w[44], selector); w[47] = hc_byte_perm_S (w[42], w[43], selector); w[46] = hc_byte_perm_S (w[41], w[42], selector); w[45] = hc_byte_perm_S (w[40], w[41], selector); w[44] = hc_byte_perm_S (w[39], w[40], selector); w[43] = hc_byte_perm_S (w[38], w[39], selector); w[42] = hc_byte_perm_S (w[37], w[38], selector); w[41] = hc_byte_perm_S (w[36], w[37], selector); w[40] = hc_byte_perm_S (w[35], w[36], selector); w[39] = hc_byte_perm_S (w[34], w[35], selector); w[38] = hc_byte_perm_S (w[33], w[34], selector); w[37] = hc_byte_perm_S (w[32], w[33], selector); w[36] = hc_byte_perm_S (w[31], w[32], selector); w[35] = hc_byte_perm_S (w[30], w[31], selector); w[34] = hc_byte_perm_S (w[29], w[30], selector); w[33] = hc_byte_perm_S (w[28], w[29], selector); w[32] = hc_byte_perm_S (w[27], w[28], selector); w[31] = hc_byte_perm_S (w[26], w[27], selector); w[30] = hc_byte_perm_S (w[25], w[26], selector); w[29] = hc_byte_perm_S (w[24], w[25], selector); w[28] = hc_byte_perm_S (w[23], w[24], selector); w[27] = hc_byte_perm_S (w[22], w[23], selector); w[26] = hc_byte_perm_S (w[21], w[22], selector); w[25] = hc_byte_perm_S (w[20], w[21], selector); w[24] = hc_byte_perm_S (w[19], w[20], selector); w[23] = hc_byte_perm_S (w[18], w[19], selector); w[22] = hc_byte_perm_S (w[17], w[18], selector); w[21] = hc_byte_perm_S (w[16], w[17], selector); w[20] = hc_byte_perm_S (w[15], w[16], selector); w[19] = hc_byte_perm_S (w[14], w[15], selector); w[18] = hc_byte_perm_S (w[13], w[14], selector); w[17] = hc_byte_perm_S (w[12], w[13], selector); w[16] = hc_byte_perm_S (w[11], w[12], selector); w[15] = hc_byte_perm_S (w[10], w[11], selector); w[14] = hc_byte_perm_S (w[ 9], w[10], selector); w[13] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[12] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[11] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[10] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[ 9] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 8] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 7] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 6] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 5] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 4] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_byte_perm_S (w[57], w[58], selector); w[62] = hc_byte_perm_S (w[56], w[57], selector); w[61] = hc_byte_perm_S (w[55], w[56], selector); w[60] = hc_byte_perm_S (w[54], w[55], selector); w[59] = hc_byte_perm_S (w[53], w[54], selector); w[58] = hc_byte_perm_S (w[52], w[53], selector); w[57] = hc_byte_perm_S (w[51], w[52], selector); w[56] = hc_byte_perm_S (w[50], w[51], selector); w[55] = hc_byte_perm_S (w[49], w[50], selector); w[54] = hc_byte_perm_S (w[48], w[49], selector); w[53] = hc_byte_perm_S (w[47], w[48], selector); w[52] = hc_byte_perm_S (w[46], w[47], selector); w[51] = hc_byte_perm_S (w[45], w[46], selector); w[50] = hc_byte_perm_S (w[44], w[45], selector); w[49] = hc_byte_perm_S (w[43], w[44], selector); w[48] = hc_byte_perm_S (w[42], w[43], selector); w[47] = hc_byte_perm_S (w[41], w[42], selector); w[46] = hc_byte_perm_S (w[40], w[41], selector); w[45] = hc_byte_perm_S (w[39], w[40], selector); w[44] = hc_byte_perm_S (w[38], w[39], selector); w[43] = hc_byte_perm_S (w[37], w[38], selector); w[42] = hc_byte_perm_S (w[36], w[37], selector); w[41] = hc_byte_perm_S (w[35], w[36], selector); w[40] = hc_byte_perm_S (w[34], w[35], selector); w[39] = hc_byte_perm_S (w[33], w[34], selector); w[38] = hc_byte_perm_S (w[32], w[33], selector); w[37] = hc_byte_perm_S (w[31], w[32], selector); w[36] = hc_byte_perm_S (w[30], w[31], selector); w[35] = hc_byte_perm_S (w[29], w[30], selector); w[34] = hc_byte_perm_S (w[28], w[29], selector); w[33] = hc_byte_perm_S (w[27], w[28], selector); w[32] = hc_byte_perm_S (w[26], w[27], selector); w[31] = hc_byte_perm_S (w[25], w[26], selector); w[30] = hc_byte_perm_S (w[24], w[25], selector); w[29] = hc_byte_perm_S (w[23], w[24], selector); w[28] = hc_byte_perm_S (w[22], w[23], selector); w[27] = hc_byte_perm_S (w[21], w[22], selector); w[26] = hc_byte_perm_S (w[20], w[21], selector); w[25] = hc_byte_perm_S (w[19], w[20], selector); w[24] = hc_byte_perm_S (w[18], w[19], selector); w[23] = hc_byte_perm_S (w[17], w[18], selector); w[22] = hc_byte_perm_S (w[16], w[17], selector); w[21] = hc_byte_perm_S (w[15], w[16], selector); w[20] = hc_byte_perm_S (w[14], w[15], selector); w[19] = hc_byte_perm_S (w[13], w[14], selector); w[18] = hc_byte_perm_S (w[12], w[13], selector); w[17] = hc_byte_perm_S (w[11], w[12], selector); w[16] = hc_byte_perm_S (w[10], w[11], selector); w[15] = hc_byte_perm_S (w[ 9], w[10], selector); w[14] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[13] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[12] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[11] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[10] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[ 9] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 8] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 7] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 6] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 5] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_byte_perm_S (w[56], w[57], selector); w[62] = hc_byte_perm_S (w[55], w[56], selector); w[61] = hc_byte_perm_S (w[54], w[55], selector); w[60] = hc_byte_perm_S (w[53], w[54], selector); w[59] = hc_byte_perm_S (w[52], w[53], selector); w[58] = hc_byte_perm_S (w[51], w[52], selector); w[57] = hc_byte_perm_S (w[50], w[51], selector); w[56] = hc_byte_perm_S (w[49], w[50], selector); w[55] = hc_byte_perm_S (w[48], w[49], selector); w[54] = hc_byte_perm_S (w[47], w[48], selector); w[53] = hc_byte_perm_S (w[46], w[47], selector); w[52] = hc_byte_perm_S (w[45], w[46], selector); w[51] = hc_byte_perm_S (w[44], w[45], selector); w[50] = hc_byte_perm_S (w[43], w[44], selector); w[49] = hc_byte_perm_S (w[42], w[43], selector); w[48] = hc_byte_perm_S (w[41], w[42], selector); w[47] = hc_byte_perm_S (w[40], w[41], selector); w[46] = hc_byte_perm_S (w[39], w[40], selector); w[45] = hc_byte_perm_S (w[38], w[39], selector); w[44] = hc_byte_perm_S (w[37], w[38], selector); w[43] = hc_byte_perm_S (w[36], w[37], selector); w[42] = hc_byte_perm_S (w[35], w[36], selector); w[41] = hc_byte_perm_S (w[34], w[35], selector); w[40] = hc_byte_perm_S (w[33], w[34], selector); w[39] = hc_byte_perm_S (w[32], w[33], selector); w[38] = hc_byte_perm_S (w[31], w[32], selector); w[37] = hc_byte_perm_S (w[30], w[31], selector); w[36] = hc_byte_perm_S (w[29], w[30], selector); w[35] = hc_byte_perm_S (w[28], w[29], selector); w[34] = hc_byte_perm_S (w[27], w[28], selector); w[33] = hc_byte_perm_S (w[26], w[27], selector); w[32] = hc_byte_perm_S (w[25], w[26], selector); w[31] = hc_byte_perm_S (w[24], w[25], selector); w[30] = hc_byte_perm_S (w[23], w[24], selector); w[29] = hc_byte_perm_S (w[22], w[23], selector); w[28] = hc_byte_perm_S (w[21], w[22], selector); w[27] = hc_byte_perm_S (w[20], w[21], selector); w[26] = hc_byte_perm_S (w[19], w[20], selector); w[25] = hc_byte_perm_S (w[18], w[19], selector); w[24] = hc_byte_perm_S (w[17], w[18], selector); w[23] = hc_byte_perm_S (w[16], w[17], selector); w[22] = hc_byte_perm_S (w[15], w[16], selector); w[21] = hc_byte_perm_S (w[14], w[15], selector); w[20] = hc_byte_perm_S (w[13], w[14], selector); w[19] = hc_byte_perm_S (w[12], w[13], selector); w[18] = hc_byte_perm_S (w[11], w[12], selector); w[17] = hc_byte_perm_S (w[10], w[11], selector); w[16] = hc_byte_perm_S (w[ 9], w[10], selector); w[15] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[14] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[13] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[12] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[11] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[10] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[ 9] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 8] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 7] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 6] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_byte_perm_S (w[55], w[56], selector); w[62] = hc_byte_perm_S (w[54], w[55], selector); w[61] = hc_byte_perm_S (w[53], w[54], selector); w[60] = hc_byte_perm_S (w[52], w[53], selector); w[59] = hc_byte_perm_S (w[51], w[52], selector); w[58] = hc_byte_perm_S (w[50], w[51], selector); w[57] = hc_byte_perm_S (w[49], w[50], selector); w[56] = hc_byte_perm_S (w[48], w[49], selector); w[55] = hc_byte_perm_S (w[47], w[48], selector); w[54] = hc_byte_perm_S (w[46], w[47], selector); w[53] = hc_byte_perm_S (w[45], w[46], selector); w[52] = hc_byte_perm_S (w[44], w[45], selector); w[51] = hc_byte_perm_S (w[43], w[44], selector); w[50] = hc_byte_perm_S (w[42], w[43], selector); w[49] = hc_byte_perm_S (w[41], w[42], selector); w[48] = hc_byte_perm_S (w[40], w[41], selector); w[47] = hc_byte_perm_S (w[39], w[40], selector); w[46] = hc_byte_perm_S (w[38], w[39], selector); w[45] = hc_byte_perm_S (w[37], w[38], selector); w[44] = hc_byte_perm_S (w[36], w[37], selector); w[43] = hc_byte_perm_S (w[35], w[36], selector); w[42] = hc_byte_perm_S (w[34], w[35], selector); w[41] = hc_byte_perm_S (w[33], w[34], selector); w[40] = hc_byte_perm_S (w[32], w[33], selector); w[39] = hc_byte_perm_S (w[31], w[32], selector); w[38] = hc_byte_perm_S (w[30], w[31], selector); w[37] = hc_byte_perm_S (w[29], w[30], selector); w[36] = hc_byte_perm_S (w[28], w[29], selector); w[35] = hc_byte_perm_S (w[27], w[28], selector); w[34] = hc_byte_perm_S (w[26], w[27], selector); w[33] = hc_byte_perm_S (w[25], w[26], selector); w[32] = hc_byte_perm_S (w[24], w[25], selector); w[31] = hc_byte_perm_S (w[23], w[24], selector); w[30] = hc_byte_perm_S (w[22], w[23], selector); w[29] = hc_byte_perm_S (w[21], w[22], selector); w[28] = hc_byte_perm_S (w[20], w[21], selector); w[27] = hc_byte_perm_S (w[19], w[20], selector); w[26] = hc_byte_perm_S (w[18], w[19], selector); w[25] = hc_byte_perm_S (w[17], w[18], selector); w[24] = hc_byte_perm_S (w[16], w[17], selector); w[23] = hc_byte_perm_S (w[15], w[16], selector); w[22] = hc_byte_perm_S (w[14], w[15], selector); w[21] = hc_byte_perm_S (w[13], w[14], selector); w[20] = hc_byte_perm_S (w[12], w[13], selector); w[19] = hc_byte_perm_S (w[11], w[12], selector); w[18] = hc_byte_perm_S (w[10], w[11], selector); w[17] = hc_byte_perm_S (w[ 9], w[10], selector); w[16] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[15] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[14] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[13] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[12] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[11] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[10] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[ 9] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 8] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 7] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_byte_perm_S (w[54], w[55], selector); w[62] = hc_byte_perm_S (w[53], w[54], selector); w[61] = hc_byte_perm_S (w[52], w[53], selector); w[60] = hc_byte_perm_S (w[51], w[52], selector); w[59] = hc_byte_perm_S (w[50], w[51], selector); w[58] = hc_byte_perm_S (w[49], w[50], selector); w[57] = hc_byte_perm_S (w[48], w[49], selector); w[56] = hc_byte_perm_S (w[47], w[48], selector); w[55] = hc_byte_perm_S (w[46], w[47], selector); w[54] = hc_byte_perm_S (w[45], w[46], selector); w[53] = hc_byte_perm_S (w[44], w[45], selector); w[52] = hc_byte_perm_S (w[43], w[44], selector); w[51] = hc_byte_perm_S (w[42], w[43], selector); w[50] = hc_byte_perm_S (w[41], w[42], selector); w[49] = hc_byte_perm_S (w[40], w[41], selector); w[48] = hc_byte_perm_S (w[39], w[40], selector); w[47] = hc_byte_perm_S (w[38], w[39], selector); w[46] = hc_byte_perm_S (w[37], w[38], selector); w[45] = hc_byte_perm_S (w[36], w[37], selector); w[44] = hc_byte_perm_S (w[35], w[36], selector); w[43] = hc_byte_perm_S (w[34], w[35], selector); w[42] = hc_byte_perm_S (w[33], w[34], selector); w[41] = hc_byte_perm_S (w[32], w[33], selector); w[40] = hc_byte_perm_S (w[31], w[32], selector); w[39] = hc_byte_perm_S (w[30], w[31], selector); w[38] = hc_byte_perm_S (w[29], w[30], selector); w[37] = hc_byte_perm_S (w[28], w[29], selector); w[36] = hc_byte_perm_S (w[27], w[28], selector); w[35] = hc_byte_perm_S (w[26], w[27], selector); w[34] = hc_byte_perm_S (w[25], w[26], selector); w[33] = hc_byte_perm_S (w[24], w[25], selector); w[32] = hc_byte_perm_S (w[23], w[24], selector); w[31] = hc_byte_perm_S (w[22], w[23], selector); w[30] = hc_byte_perm_S (w[21], w[22], selector); w[29] = hc_byte_perm_S (w[20], w[21], selector); w[28] = hc_byte_perm_S (w[19], w[20], selector); w[27] = hc_byte_perm_S (w[18], w[19], selector); w[26] = hc_byte_perm_S (w[17], w[18], selector); w[25] = hc_byte_perm_S (w[16], w[17], selector); w[24] = hc_byte_perm_S (w[15], w[16], selector); w[23] = hc_byte_perm_S (w[14], w[15], selector); w[22] = hc_byte_perm_S (w[13], w[14], selector); w[21] = hc_byte_perm_S (w[12], w[13], selector); w[20] = hc_byte_perm_S (w[11], w[12], selector); w[19] = hc_byte_perm_S (w[10], w[11], selector); w[18] = hc_byte_perm_S (w[ 9], w[10], selector); w[17] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[16] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[15] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[14] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[13] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[12] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[11] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[10] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[ 9] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 8] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_byte_perm_S (w[53], w[54], selector); w[62] = hc_byte_perm_S (w[52], w[53], selector); w[61] = hc_byte_perm_S (w[51], w[52], selector); w[60] = hc_byte_perm_S (w[50], w[51], selector); w[59] = hc_byte_perm_S (w[49], w[50], selector); w[58] = hc_byte_perm_S (w[48], w[49], selector); w[57] = hc_byte_perm_S (w[47], w[48], selector); w[56] = hc_byte_perm_S (w[46], w[47], selector); w[55] = hc_byte_perm_S (w[45], w[46], selector); w[54] = hc_byte_perm_S (w[44], w[45], selector); w[53] = hc_byte_perm_S (w[43], w[44], selector); w[52] = hc_byte_perm_S (w[42], w[43], selector); w[51] = hc_byte_perm_S (w[41], w[42], selector); w[50] = hc_byte_perm_S (w[40], w[41], selector); w[49] = hc_byte_perm_S (w[39], w[40], selector); w[48] = hc_byte_perm_S (w[38], w[39], selector); w[47] = hc_byte_perm_S (w[37], w[38], selector); w[46] = hc_byte_perm_S (w[36], w[37], selector); w[45] = hc_byte_perm_S (w[35], w[36], selector); w[44] = hc_byte_perm_S (w[34], w[35], selector); w[43] = hc_byte_perm_S (w[33], w[34], selector); w[42] = hc_byte_perm_S (w[32], w[33], selector); w[41] = hc_byte_perm_S (w[31], w[32], selector); w[40] = hc_byte_perm_S (w[30], w[31], selector); w[39] = hc_byte_perm_S (w[29], w[30], selector); w[38] = hc_byte_perm_S (w[28], w[29], selector); w[37] = hc_byte_perm_S (w[27], w[28], selector); w[36] = hc_byte_perm_S (w[26], w[27], selector); w[35] = hc_byte_perm_S (w[25], w[26], selector); w[34] = hc_byte_perm_S (w[24], w[25], selector); w[33] = hc_byte_perm_S (w[23], w[24], selector); w[32] = hc_byte_perm_S (w[22], w[23], selector); w[31] = hc_byte_perm_S (w[21], w[22], selector); w[30] = hc_byte_perm_S (w[20], w[21], selector); w[29] = hc_byte_perm_S (w[19], w[20], selector); w[28] = hc_byte_perm_S (w[18], w[19], selector); w[27] = hc_byte_perm_S (w[17], w[18], selector); w[26] = hc_byte_perm_S (w[16], w[17], selector); w[25] = hc_byte_perm_S (w[15], w[16], selector); w[24] = hc_byte_perm_S (w[14], w[15], selector); w[23] = hc_byte_perm_S (w[13], w[14], selector); w[22] = hc_byte_perm_S (w[12], w[13], selector); w[21] = hc_byte_perm_S (w[11], w[12], selector); w[20] = hc_byte_perm_S (w[10], w[11], selector); w[19] = hc_byte_perm_S (w[ 9], w[10], selector); w[18] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[17] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[16] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[15] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[14] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[13] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[12] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[11] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[10] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[ 9] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_byte_perm_S (w[52], w[53], selector); w[62] = hc_byte_perm_S (w[51], w[52], selector); w[61] = hc_byte_perm_S (w[50], w[51], selector); w[60] = hc_byte_perm_S (w[49], w[50], selector); w[59] = hc_byte_perm_S (w[48], w[49], selector); w[58] = hc_byte_perm_S (w[47], w[48], selector); w[57] = hc_byte_perm_S (w[46], w[47], selector); w[56] = hc_byte_perm_S (w[45], w[46], selector); w[55] = hc_byte_perm_S (w[44], w[45], selector); w[54] = hc_byte_perm_S (w[43], w[44], selector); w[53] = hc_byte_perm_S (w[42], w[43], selector); w[52] = hc_byte_perm_S (w[41], w[42], selector); w[51] = hc_byte_perm_S (w[40], w[41], selector); w[50] = hc_byte_perm_S (w[39], w[40], selector); w[49] = hc_byte_perm_S (w[38], w[39], selector); w[48] = hc_byte_perm_S (w[37], w[38], selector); w[47] = hc_byte_perm_S (w[36], w[37], selector); w[46] = hc_byte_perm_S (w[35], w[36], selector); w[45] = hc_byte_perm_S (w[34], w[35], selector); w[44] = hc_byte_perm_S (w[33], w[34], selector); w[43] = hc_byte_perm_S (w[32], w[33], selector); w[42] = hc_byte_perm_S (w[31], w[32], selector); w[41] = hc_byte_perm_S (w[30], w[31], selector); w[40] = hc_byte_perm_S (w[29], w[30], selector); w[39] = hc_byte_perm_S (w[28], w[29], selector); w[38] = hc_byte_perm_S (w[27], w[28], selector); w[37] = hc_byte_perm_S (w[26], w[27], selector); w[36] = hc_byte_perm_S (w[25], w[26], selector); w[35] = hc_byte_perm_S (w[24], w[25], selector); w[34] = hc_byte_perm_S (w[23], w[24], selector); w[33] = hc_byte_perm_S (w[22], w[23], selector); w[32] = hc_byte_perm_S (w[21], w[22], selector); w[31] = hc_byte_perm_S (w[20], w[21], selector); w[30] = hc_byte_perm_S (w[19], w[20], selector); w[29] = hc_byte_perm_S (w[18], w[19], selector); w[28] = hc_byte_perm_S (w[17], w[18], selector); w[27] = hc_byte_perm_S (w[16], w[17], selector); w[26] = hc_byte_perm_S (w[15], w[16], selector); w[25] = hc_byte_perm_S (w[14], w[15], selector); w[24] = hc_byte_perm_S (w[13], w[14], selector); w[23] = hc_byte_perm_S (w[12], w[13], selector); w[22] = hc_byte_perm_S (w[11], w[12], selector); w[21] = hc_byte_perm_S (w[10], w[11], selector); w[20] = hc_byte_perm_S (w[ 9], w[10], selector); w[19] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[18] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[17] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[16] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[15] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[14] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[13] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[12] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[11] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[10] = hc_byte_perm_S ( 0, w[ 0], selector); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_byte_perm_S (w[51], w[52], selector); w[62] = hc_byte_perm_S (w[50], w[51], selector); w[61] = hc_byte_perm_S (w[49], w[50], selector); w[60] = hc_byte_perm_S (w[48], w[49], selector); w[59] = hc_byte_perm_S (w[47], w[48], selector); w[58] = hc_byte_perm_S (w[46], w[47], selector); w[57] = hc_byte_perm_S (w[45], w[46], selector); w[56] = hc_byte_perm_S (w[44], w[45], selector); w[55] = hc_byte_perm_S (w[43], w[44], selector); w[54] = hc_byte_perm_S (w[42], w[43], selector); w[53] = hc_byte_perm_S (w[41], w[42], selector); w[52] = hc_byte_perm_S (w[40], w[41], selector); w[51] = hc_byte_perm_S (w[39], w[40], selector); w[50] = hc_byte_perm_S (w[38], w[39], selector); w[49] = hc_byte_perm_S (w[37], w[38], selector); w[48] = hc_byte_perm_S (w[36], w[37], selector); w[47] = hc_byte_perm_S (w[35], w[36], selector); w[46] = hc_byte_perm_S (w[34], w[35], selector); w[45] = hc_byte_perm_S (w[33], w[34], selector); w[44] = hc_byte_perm_S (w[32], w[33], selector); w[43] = hc_byte_perm_S (w[31], w[32], selector); w[42] = hc_byte_perm_S (w[30], w[31], selector); w[41] = hc_byte_perm_S (w[29], w[30], selector); w[40] = hc_byte_perm_S (w[28], w[29], selector); w[39] = hc_byte_perm_S (w[27], w[28], selector); w[38] = hc_byte_perm_S (w[26], w[27], selector); w[37] = hc_byte_perm_S (w[25], w[26], selector); w[36] = hc_byte_perm_S (w[24], w[25], selector); w[35] = hc_byte_perm_S (w[23], w[24], selector); w[34] = hc_byte_perm_S (w[22], w[23], selector); w[33] = hc_byte_perm_S (w[21], w[22], selector); w[32] = hc_byte_perm_S (w[20], w[21], selector); w[31] = hc_byte_perm_S (w[19], w[20], selector); w[30] = hc_byte_perm_S (w[18], w[19], selector); w[29] = hc_byte_perm_S (w[17], w[18], selector); w[28] = hc_byte_perm_S (w[16], w[17], selector); w[27] = hc_byte_perm_S (w[15], w[16], selector); w[26] = hc_byte_perm_S (w[14], w[15], selector); w[25] = hc_byte_perm_S (w[13], w[14], selector); w[24] = hc_byte_perm_S (w[12], w[13], selector); w[23] = hc_byte_perm_S (w[11], w[12], selector); w[22] = hc_byte_perm_S (w[10], w[11], selector); w[21] = hc_byte_perm_S (w[ 9], w[10], selector); w[20] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[19] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[18] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[17] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[16] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[15] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[14] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[13] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[12] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[11] = hc_byte_perm_S ( 0, w[ 0], selector); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_byte_perm_S (w[50], w[51], selector); w[62] = hc_byte_perm_S (w[49], w[50], selector); w[61] = hc_byte_perm_S (w[48], w[49], selector); w[60] = hc_byte_perm_S (w[47], w[48], selector); w[59] = hc_byte_perm_S (w[46], w[47], selector); w[58] = hc_byte_perm_S (w[45], w[46], selector); w[57] = hc_byte_perm_S (w[44], w[45], selector); w[56] = hc_byte_perm_S (w[43], w[44], selector); w[55] = hc_byte_perm_S (w[42], w[43], selector); w[54] = hc_byte_perm_S (w[41], w[42], selector); w[53] = hc_byte_perm_S (w[40], w[41], selector); w[52] = hc_byte_perm_S (w[39], w[40], selector); w[51] = hc_byte_perm_S (w[38], w[39], selector); w[50] = hc_byte_perm_S (w[37], w[38], selector); w[49] = hc_byte_perm_S (w[36], w[37], selector); w[48] = hc_byte_perm_S (w[35], w[36], selector); w[47] = hc_byte_perm_S (w[34], w[35], selector); w[46] = hc_byte_perm_S (w[33], w[34], selector); w[45] = hc_byte_perm_S (w[32], w[33], selector); w[44] = hc_byte_perm_S (w[31], w[32], selector); w[43] = hc_byte_perm_S (w[30], w[31], selector); w[42] = hc_byte_perm_S (w[29], w[30], selector); w[41] = hc_byte_perm_S (w[28], w[29], selector); w[40] = hc_byte_perm_S (w[27], w[28], selector); w[39] = hc_byte_perm_S (w[26], w[27], selector); w[38] = hc_byte_perm_S (w[25], w[26], selector); w[37] = hc_byte_perm_S (w[24], w[25], selector); w[36] = hc_byte_perm_S (w[23], w[24], selector); w[35] = hc_byte_perm_S (w[22], w[23], selector); w[34] = hc_byte_perm_S (w[21], w[22], selector); w[33] = hc_byte_perm_S (w[20], w[21], selector); w[32] = hc_byte_perm_S (w[19], w[20], selector); w[31] = hc_byte_perm_S (w[18], w[19], selector); w[30] = hc_byte_perm_S (w[17], w[18], selector); w[29] = hc_byte_perm_S (w[16], w[17], selector); w[28] = hc_byte_perm_S (w[15], w[16], selector); w[27] = hc_byte_perm_S (w[14], w[15], selector); w[26] = hc_byte_perm_S (w[13], w[14], selector); w[25] = hc_byte_perm_S (w[12], w[13], selector); w[24] = hc_byte_perm_S (w[11], w[12], selector); w[23] = hc_byte_perm_S (w[10], w[11], selector); w[22] = hc_byte_perm_S (w[ 9], w[10], selector); w[21] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[20] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[19] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[18] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[17] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[16] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[15] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[14] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[13] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[12] = hc_byte_perm_S ( 0, w[ 0], selector); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_byte_perm_S (w[49], w[50], selector); w[62] = hc_byte_perm_S (w[48], w[49], selector); w[61] = hc_byte_perm_S (w[47], w[48], selector); w[60] = hc_byte_perm_S (w[46], w[47], selector); w[59] = hc_byte_perm_S (w[45], w[46], selector); w[58] = hc_byte_perm_S (w[44], w[45], selector); w[57] = hc_byte_perm_S (w[43], w[44], selector); w[56] = hc_byte_perm_S (w[42], w[43], selector); w[55] = hc_byte_perm_S (w[41], w[42], selector); w[54] = hc_byte_perm_S (w[40], w[41], selector); w[53] = hc_byte_perm_S (w[39], w[40], selector); w[52] = hc_byte_perm_S (w[38], w[39], selector); w[51] = hc_byte_perm_S (w[37], w[38], selector); w[50] = hc_byte_perm_S (w[36], w[37], selector); w[49] = hc_byte_perm_S (w[35], w[36], selector); w[48] = hc_byte_perm_S (w[34], w[35], selector); w[47] = hc_byte_perm_S (w[33], w[34], selector); w[46] = hc_byte_perm_S (w[32], w[33], selector); w[45] = hc_byte_perm_S (w[31], w[32], selector); w[44] = hc_byte_perm_S (w[30], w[31], selector); w[43] = hc_byte_perm_S (w[29], w[30], selector); w[42] = hc_byte_perm_S (w[28], w[29], selector); w[41] = hc_byte_perm_S (w[27], w[28], selector); w[40] = hc_byte_perm_S (w[26], w[27], selector); w[39] = hc_byte_perm_S (w[25], w[26], selector); w[38] = hc_byte_perm_S (w[24], w[25], selector); w[37] = hc_byte_perm_S (w[23], w[24], selector); w[36] = hc_byte_perm_S (w[22], w[23], selector); w[35] = hc_byte_perm_S (w[21], w[22], selector); w[34] = hc_byte_perm_S (w[20], w[21], selector); w[33] = hc_byte_perm_S (w[19], w[20], selector); w[32] = hc_byte_perm_S (w[18], w[19], selector); w[31] = hc_byte_perm_S (w[17], w[18], selector); w[30] = hc_byte_perm_S (w[16], w[17], selector); w[29] = hc_byte_perm_S (w[15], w[16], selector); w[28] = hc_byte_perm_S (w[14], w[15], selector); w[27] = hc_byte_perm_S (w[13], w[14], selector); w[26] = hc_byte_perm_S (w[12], w[13], selector); w[25] = hc_byte_perm_S (w[11], w[12], selector); w[24] = hc_byte_perm_S (w[10], w[11], selector); w[23] = hc_byte_perm_S (w[ 9], w[10], selector); w[22] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[21] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[20] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[19] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[18] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[17] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[16] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[15] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[14] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[13] = hc_byte_perm_S ( 0, w[ 0], selector); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_byte_perm_S (w[48], w[49], selector); w[62] = hc_byte_perm_S (w[47], w[48], selector); w[61] = hc_byte_perm_S (w[46], w[47], selector); w[60] = hc_byte_perm_S (w[45], w[46], selector); w[59] = hc_byte_perm_S (w[44], w[45], selector); w[58] = hc_byte_perm_S (w[43], w[44], selector); w[57] = hc_byte_perm_S (w[42], w[43], selector); w[56] = hc_byte_perm_S (w[41], w[42], selector); w[55] = hc_byte_perm_S (w[40], w[41], selector); w[54] = hc_byte_perm_S (w[39], w[40], selector); w[53] = hc_byte_perm_S (w[38], w[39], selector); w[52] = hc_byte_perm_S (w[37], w[38], selector); w[51] = hc_byte_perm_S (w[36], w[37], selector); w[50] = hc_byte_perm_S (w[35], w[36], selector); w[49] = hc_byte_perm_S (w[34], w[35], selector); w[48] = hc_byte_perm_S (w[33], w[34], selector); w[47] = hc_byte_perm_S (w[32], w[33], selector); w[46] = hc_byte_perm_S (w[31], w[32], selector); w[45] = hc_byte_perm_S (w[30], w[31], selector); w[44] = hc_byte_perm_S (w[29], w[30], selector); w[43] = hc_byte_perm_S (w[28], w[29], selector); w[42] = hc_byte_perm_S (w[27], w[28], selector); w[41] = hc_byte_perm_S (w[26], w[27], selector); w[40] = hc_byte_perm_S (w[25], w[26], selector); w[39] = hc_byte_perm_S (w[24], w[25], selector); w[38] = hc_byte_perm_S (w[23], w[24], selector); w[37] = hc_byte_perm_S (w[22], w[23], selector); w[36] = hc_byte_perm_S (w[21], w[22], selector); w[35] = hc_byte_perm_S (w[20], w[21], selector); w[34] = hc_byte_perm_S (w[19], w[20], selector); w[33] = hc_byte_perm_S (w[18], w[19], selector); w[32] = hc_byte_perm_S (w[17], w[18], selector); w[31] = hc_byte_perm_S (w[16], w[17], selector); w[30] = hc_byte_perm_S (w[15], w[16], selector); w[29] = hc_byte_perm_S (w[14], w[15], selector); w[28] = hc_byte_perm_S (w[13], w[14], selector); w[27] = hc_byte_perm_S (w[12], w[13], selector); w[26] = hc_byte_perm_S (w[11], w[12], selector); w[25] = hc_byte_perm_S (w[10], w[11], selector); w[24] = hc_byte_perm_S (w[ 9], w[10], selector); w[23] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[22] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[21] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[20] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[19] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[18] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[17] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[16] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[15] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[14] = hc_byte_perm_S ( 0, w[ 0], selector); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_byte_perm_S (w[47], w[48], selector); w[62] = hc_byte_perm_S (w[46], w[47], selector); w[61] = hc_byte_perm_S (w[45], w[46], selector); w[60] = hc_byte_perm_S (w[44], w[45], selector); w[59] = hc_byte_perm_S (w[43], w[44], selector); w[58] = hc_byte_perm_S (w[42], w[43], selector); w[57] = hc_byte_perm_S (w[41], w[42], selector); w[56] = hc_byte_perm_S (w[40], w[41], selector); w[55] = hc_byte_perm_S (w[39], w[40], selector); w[54] = hc_byte_perm_S (w[38], w[39], selector); w[53] = hc_byte_perm_S (w[37], w[38], selector); w[52] = hc_byte_perm_S (w[36], w[37], selector); w[51] = hc_byte_perm_S (w[35], w[36], selector); w[50] = hc_byte_perm_S (w[34], w[35], selector); w[49] = hc_byte_perm_S (w[33], w[34], selector); w[48] = hc_byte_perm_S (w[32], w[33], selector); w[47] = hc_byte_perm_S (w[31], w[32], selector); w[46] = hc_byte_perm_S (w[30], w[31], selector); w[45] = hc_byte_perm_S (w[29], w[30], selector); w[44] = hc_byte_perm_S (w[28], w[29], selector); w[43] = hc_byte_perm_S (w[27], w[28], selector); w[42] = hc_byte_perm_S (w[26], w[27], selector); w[41] = hc_byte_perm_S (w[25], w[26], selector); w[40] = hc_byte_perm_S (w[24], w[25], selector); w[39] = hc_byte_perm_S (w[23], w[24], selector); w[38] = hc_byte_perm_S (w[22], w[23], selector); w[37] = hc_byte_perm_S (w[21], w[22], selector); w[36] = hc_byte_perm_S (w[20], w[21], selector); w[35] = hc_byte_perm_S (w[19], w[20], selector); w[34] = hc_byte_perm_S (w[18], w[19], selector); w[33] = hc_byte_perm_S (w[17], w[18], selector); w[32] = hc_byte_perm_S (w[16], w[17], selector); w[31] = hc_byte_perm_S (w[15], w[16], selector); w[30] = hc_byte_perm_S (w[14], w[15], selector); w[29] = hc_byte_perm_S (w[13], w[14], selector); w[28] = hc_byte_perm_S (w[12], w[13], selector); w[27] = hc_byte_perm_S (w[11], w[12], selector); w[26] = hc_byte_perm_S (w[10], w[11], selector); w[25] = hc_byte_perm_S (w[ 9], w[10], selector); w[24] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[23] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[22] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[21] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[20] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[19] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[18] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[17] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[16] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[15] = hc_byte_perm_S ( 0, w[ 0], selector); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_byte_perm_S (w[46], w[47], selector); w[62] = hc_byte_perm_S (w[45], w[46], selector); w[61] = hc_byte_perm_S (w[44], w[45], selector); w[60] = hc_byte_perm_S (w[43], w[44], selector); w[59] = hc_byte_perm_S (w[42], w[43], selector); w[58] = hc_byte_perm_S (w[41], w[42], selector); w[57] = hc_byte_perm_S (w[40], w[41], selector); w[56] = hc_byte_perm_S (w[39], w[40], selector); w[55] = hc_byte_perm_S (w[38], w[39], selector); w[54] = hc_byte_perm_S (w[37], w[38], selector); w[53] = hc_byte_perm_S (w[36], w[37], selector); w[52] = hc_byte_perm_S (w[35], w[36], selector); w[51] = hc_byte_perm_S (w[34], w[35], selector); w[50] = hc_byte_perm_S (w[33], w[34], selector); w[49] = hc_byte_perm_S (w[32], w[33], selector); w[48] = hc_byte_perm_S (w[31], w[32], selector); w[47] = hc_byte_perm_S (w[30], w[31], selector); w[46] = hc_byte_perm_S (w[29], w[30], selector); w[45] = hc_byte_perm_S (w[28], w[29], selector); w[44] = hc_byte_perm_S (w[27], w[28], selector); w[43] = hc_byte_perm_S (w[26], w[27], selector); w[42] = hc_byte_perm_S (w[25], w[26], selector); w[41] = hc_byte_perm_S (w[24], w[25], selector); w[40] = hc_byte_perm_S (w[23], w[24], selector); w[39] = hc_byte_perm_S (w[22], w[23], selector); w[38] = hc_byte_perm_S (w[21], w[22], selector); w[37] = hc_byte_perm_S (w[20], w[21], selector); w[36] = hc_byte_perm_S (w[19], w[20], selector); w[35] = hc_byte_perm_S (w[18], w[19], selector); w[34] = hc_byte_perm_S (w[17], w[18], selector); w[33] = hc_byte_perm_S (w[16], w[17], selector); w[32] = hc_byte_perm_S (w[15], w[16], selector); w[31] = hc_byte_perm_S (w[14], w[15], selector); w[30] = hc_byte_perm_S (w[13], w[14], selector); w[29] = hc_byte_perm_S (w[12], w[13], selector); w[28] = hc_byte_perm_S (w[11], w[12], selector); w[27] = hc_byte_perm_S (w[10], w[11], selector); w[26] = hc_byte_perm_S (w[ 9], w[10], selector); w[25] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[24] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[23] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[22] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[21] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[20] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[19] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[18] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[17] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[16] = hc_byte_perm_S ( 0, w[ 0], selector); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_byte_perm_S (w[45], w[46], selector); w[62] = hc_byte_perm_S (w[44], w[45], selector); w[61] = hc_byte_perm_S (w[43], w[44], selector); w[60] = hc_byte_perm_S (w[42], w[43], selector); w[59] = hc_byte_perm_S (w[41], w[42], selector); w[58] = hc_byte_perm_S (w[40], w[41], selector); w[57] = hc_byte_perm_S (w[39], w[40], selector); w[56] = hc_byte_perm_S (w[38], w[39], selector); w[55] = hc_byte_perm_S (w[37], w[38], selector); w[54] = hc_byte_perm_S (w[36], w[37], selector); w[53] = hc_byte_perm_S (w[35], w[36], selector); w[52] = hc_byte_perm_S (w[34], w[35], selector); w[51] = hc_byte_perm_S (w[33], w[34], selector); w[50] = hc_byte_perm_S (w[32], w[33], selector); w[49] = hc_byte_perm_S (w[31], w[32], selector); w[48] = hc_byte_perm_S (w[30], w[31], selector); w[47] = hc_byte_perm_S (w[29], w[30], selector); w[46] = hc_byte_perm_S (w[28], w[29], selector); w[45] = hc_byte_perm_S (w[27], w[28], selector); w[44] = hc_byte_perm_S (w[26], w[27], selector); w[43] = hc_byte_perm_S (w[25], w[26], selector); w[42] = hc_byte_perm_S (w[24], w[25], selector); w[41] = hc_byte_perm_S (w[23], w[24], selector); w[40] = hc_byte_perm_S (w[22], w[23], selector); w[39] = hc_byte_perm_S (w[21], w[22], selector); w[38] = hc_byte_perm_S (w[20], w[21], selector); w[37] = hc_byte_perm_S (w[19], w[20], selector); w[36] = hc_byte_perm_S (w[18], w[19], selector); w[35] = hc_byte_perm_S (w[17], w[18], selector); w[34] = hc_byte_perm_S (w[16], w[17], selector); w[33] = hc_byte_perm_S (w[15], w[16], selector); w[32] = hc_byte_perm_S (w[14], w[15], selector); w[31] = hc_byte_perm_S (w[13], w[14], selector); w[30] = hc_byte_perm_S (w[12], w[13], selector); w[29] = hc_byte_perm_S (w[11], w[12], selector); w[28] = hc_byte_perm_S (w[10], w[11], selector); w[27] = hc_byte_perm_S (w[ 9], w[10], selector); w[26] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[25] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[24] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[23] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[22] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[21] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[20] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[19] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[18] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[17] = hc_byte_perm_S ( 0, w[ 0], selector); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_byte_perm_S (w[44], w[45], selector); w[62] = hc_byte_perm_S (w[43], w[44], selector); w[61] = hc_byte_perm_S (w[42], w[43], selector); w[60] = hc_byte_perm_S (w[41], w[42], selector); w[59] = hc_byte_perm_S (w[40], w[41], selector); w[58] = hc_byte_perm_S (w[39], w[40], selector); w[57] = hc_byte_perm_S (w[38], w[39], selector); w[56] = hc_byte_perm_S (w[37], w[38], selector); w[55] = hc_byte_perm_S (w[36], w[37], selector); w[54] = hc_byte_perm_S (w[35], w[36], selector); w[53] = hc_byte_perm_S (w[34], w[35], selector); w[52] = hc_byte_perm_S (w[33], w[34], selector); w[51] = hc_byte_perm_S (w[32], w[33], selector); w[50] = hc_byte_perm_S (w[31], w[32], selector); w[49] = hc_byte_perm_S (w[30], w[31], selector); w[48] = hc_byte_perm_S (w[29], w[30], selector); w[47] = hc_byte_perm_S (w[28], w[29], selector); w[46] = hc_byte_perm_S (w[27], w[28], selector); w[45] = hc_byte_perm_S (w[26], w[27], selector); w[44] = hc_byte_perm_S (w[25], w[26], selector); w[43] = hc_byte_perm_S (w[24], w[25], selector); w[42] = hc_byte_perm_S (w[23], w[24], selector); w[41] = hc_byte_perm_S (w[22], w[23], selector); w[40] = hc_byte_perm_S (w[21], w[22], selector); w[39] = hc_byte_perm_S (w[20], w[21], selector); w[38] = hc_byte_perm_S (w[19], w[20], selector); w[37] = hc_byte_perm_S (w[18], w[19], selector); w[36] = hc_byte_perm_S (w[17], w[18], selector); w[35] = hc_byte_perm_S (w[16], w[17], selector); w[34] = hc_byte_perm_S (w[15], w[16], selector); w[33] = hc_byte_perm_S (w[14], w[15], selector); w[32] = hc_byte_perm_S (w[13], w[14], selector); w[31] = hc_byte_perm_S (w[12], w[13], selector); w[30] = hc_byte_perm_S (w[11], w[12], selector); w[29] = hc_byte_perm_S (w[10], w[11], selector); w[28] = hc_byte_perm_S (w[ 9], w[10], selector); w[27] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[26] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[25] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[24] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[23] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[22] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[21] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[20] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[19] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[18] = hc_byte_perm_S ( 0, w[ 0], selector); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_byte_perm_S (w[43], w[44], selector); w[62] = hc_byte_perm_S (w[42], w[43], selector); w[61] = hc_byte_perm_S (w[41], w[42], selector); w[60] = hc_byte_perm_S (w[40], w[41], selector); w[59] = hc_byte_perm_S (w[39], w[40], selector); w[58] = hc_byte_perm_S (w[38], w[39], selector); w[57] = hc_byte_perm_S (w[37], w[38], selector); w[56] = hc_byte_perm_S (w[36], w[37], selector); w[55] = hc_byte_perm_S (w[35], w[36], selector); w[54] = hc_byte_perm_S (w[34], w[35], selector); w[53] = hc_byte_perm_S (w[33], w[34], selector); w[52] = hc_byte_perm_S (w[32], w[33], selector); w[51] = hc_byte_perm_S (w[31], w[32], selector); w[50] = hc_byte_perm_S (w[30], w[31], selector); w[49] = hc_byte_perm_S (w[29], w[30], selector); w[48] = hc_byte_perm_S (w[28], w[29], selector); w[47] = hc_byte_perm_S (w[27], w[28], selector); w[46] = hc_byte_perm_S (w[26], w[27], selector); w[45] = hc_byte_perm_S (w[25], w[26], selector); w[44] = hc_byte_perm_S (w[24], w[25], selector); w[43] = hc_byte_perm_S (w[23], w[24], selector); w[42] = hc_byte_perm_S (w[22], w[23], selector); w[41] = hc_byte_perm_S (w[21], w[22], selector); w[40] = hc_byte_perm_S (w[20], w[21], selector); w[39] = hc_byte_perm_S (w[19], w[20], selector); w[38] = hc_byte_perm_S (w[18], w[19], selector); w[37] = hc_byte_perm_S (w[17], w[18], selector); w[36] = hc_byte_perm_S (w[16], w[17], selector); w[35] = hc_byte_perm_S (w[15], w[16], selector); w[34] = hc_byte_perm_S (w[14], w[15], selector); w[33] = hc_byte_perm_S (w[13], w[14], selector); w[32] = hc_byte_perm_S (w[12], w[13], selector); w[31] = hc_byte_perm_S (w[11], w[12], selector); w[30] = hc_byte_perm_S (w[10], w[11], selector); w[29] = hc_byte_perm_S (w[ 9], w[10], selector); w[28] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[27] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[26] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[25] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[24] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[23] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[22] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[21] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[20] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[19] = hc_byte_perm_S ( 0, w[ 0], selector); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_byte_perm_S (w[42], w[43], selector); w[62] = hc_byte_perm_S (w[41], w[42], selector); w[61] = hc_byte_perm_S (w[40], w[41], selector); w[60] = hc_byte_perm_S (w[39], w[40], selector); w[59] = hc_byte_perm_S (w[38], w[39], selector); w[58] = hc_byte_perm_S (w[37], w[38], selector); w[57] = hc_byte_perm_S (w[36], w[37], selector); w[56] = hc_byte_perm_S (w[35], w[36], selector); w[55] = hc_byte_perm_S (w[34], w[35], selector); w[54] = hc_byte_perm_S (w[33], w[34], selector); w[53] = hc_byte_perm_S (w[32], w[33], selector); w[52] = hc_byte_perm_S (w[31], w[32], selector); w[51] = hc_byte_perm_S (w[30], w[31], selector); w[50] = hc_byte_perm_S (w[29], w[30], selector); w[49] = hc_byte_perm_S (w[28], w[29], selector); w[48] = hc_byte_perm_S (w[27], w[28], selector); w[47] = hc_byte_perm_S (w[26], w[27], selector); w[46] = hc_byte_perm_S (w[25], w[26], selector); w[45] = hc_byte_perm_S (w[24], w[25], selector); w[44] = hc_byte_perm_S (w[23], w[24], selector); w[43] = hc_byte_perm_S (w[22], w[23], selector); w[42] = hc_byte_perm_S (w[21], w[22], selector); w[41] = hc_byte_perm_S (w[20], w[21], selector); w[40] = hc_byte_perm_S (w[19], w[20], selector); w[39] = hc_byte_perm_S (w[18], w[19], selector); w[38] = hc_byte_perm_S (w[17], w[18], selector); w[37] = hc_byte_perm_S (w[16], w[17], selector); w[36] = hc_byte_perm_S (w[15], w[16], selector); w[35] = hc_byte_perm_S (w[14], w[15], selector); w[34] = hc_byte_perm_S (w[13], w[14], selector); w[33] = hc_byte_perm_S (w[12], w[13], selector); w[32] = hc_byte_perm_S (w[11], w[12], selector); w[31] = hc_byte_perm_S (w[10], w[11], selector); w[30] = hc_byte_perm_S (w[ 9], w[10], selector); w[29] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[28] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[27] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[26] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[25] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[24] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[23] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[22] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[21] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[20] = hc_byte_perm_S ( 0, w[ 0], selector); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_byte_perm_S (w[41], w[42], selector); w[62] = hc_byte_perm_S (w[40], w[41], selector); w[61] = hc_byte_perm_S (w[39], w[40], selector); w[60] = hc_byte_perm_S (w[38], w[39], selector); w[59] = hc_byte_perm_S (w[37], w[38], selector); w[58] = hc_byte_perm_S (w[36], w[37], selector); w[57] = hc_byte_perm_S (w[35], w[36], selector); w[56] = hc_byte_perm_S (w[34], w[35], selector); w[55] = hc_byte_perm_S (w[33], w[34], selector); w[54] = hc_byte_perm_S (w[32], w[33], selector); w[53] = hc_byte_perm_S (w[31], w[32], selector); w[52] = hc_byte_perm_S (w[30], w[31], selector); w[51] = hc_byte_perm_S (w[29], w[30], selector); w[50] = hc_byte_perm_S (w[28], w[29], selector); w[49] = hc_byte_perm_S (w[27], w[28], selector); w[48] = hc_byte_perm_S (w[26], w[27], selector); w[47] = hc_byte_perm_S (w[25], w[26], selector); w[46] = hc_byte_perm_S (w[24], w[25], selector); w[45] = hc_byte_perm_S (w[23], w[24], selector); w[44] = hc_byte_perm_S (w[22], w[23], selector); w[43] = hc_byte_perm_S (w[21], w[22], selector); w[42] = hc_byte_perm_S (w[20], w[21], selector); w[41] = hc_byte_perm_S (w[19], w[20], selector); w[40] = hc_byte_perm_S (w[18], w[19], selector); w[39] = hc_byte_perm_S (w[17], w[18], selector); w[38] = hc_byte_perm_S (w[16], w[17], selector); w[37] = hc_byte_perm_S (w[15], w[16], selector); w[36] = hc_byte_perm_S (w[14], w[15], selector); w[35] = hc_byte_perm_S (w[13], w[14], selector); w[34] = hc_byte_perm_S (w[12], w[13], selector); w[33] = hc_byte_perm_S (w[11], w[12], selector); w[32] = hc_byte_perm_S (w[10], w[11], selector); w[31] = hc_byte_perm_S (w[ 9], w[10], selector); w[30] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[29] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[28] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[27] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[26] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[25] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[24] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[23] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[22] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[21] = hc_byte_perm_S ( 0, w[ 0], selector); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_byte_perm_S (w[40], w[41], selector); w[62] = hc_byte_perm_S (w[39], w[40], selector); w[61] = hc_byte_perm_S (w[38], w[39], selector); w[60] = hc_byte_perm_S (w[37], w[38], selector); w[59] = hc_byte_perm_S (w[36], w[37], selector); w[58] = hc_byte_perm_S (w[35], w[36], selector); w[57] = hc_byte_perm_S (w[34], w[35], selector); w[56] = hc_byte_perm_S (w[33], w[34], selector); w[55] = hc_byte_perm_S (w[32], w[33], selector); w[54] = hc_byte_perm_S (w[31], w[32], selector); w[53] = hc_byte_perm_S (w[30], w[31], selector); w[52] = hc_byte_perm_S (w[29], w[30], selector); w[51] = hc_byte_perm_S (w[28], w[29], selector); w[50] = hc_byte_perm_S (w[27], w[28], selector); w[49] = hc_byte_perm_S (w[26], w[27], selector); w[48] = hc_byte_perm_S (w[25], w[26], selector); w[47] = hc_byte_perm_S (w[24], w[25], selector); w[46] = hc_byte_perm_S (w[23], w[24], selector); w[45] = hc_byte_perm_S (w[22], w[23], selector); w[44] = hc_byte_perm_S (w[21], w[22], selector); w[43] = hc_byte_perm_S (w[20], w[21], selector); w[42] = hc_byte_perm_S (w[19], w[20], selector); w[41] = hc_byte_perm_S (w[18], w[19], selector); w[40] = hc_byte_perm_S (w[17], w[18], selector); w[39] = hc_byte_perm_S (w[16], w[17], selector); w[38] = hc_byte_perm_S (w[15], w[16], selector); w[37] = hc_byte_perm_S (w[14], w[15], selector); w[36] = hc_byte_perm_S (w[13], w[14], selector); w[35] = hc_byte_perm_S (w[12], w[13], selector); w[34] = hc_byte_perm_S (w[11], w[12], selector); w[33] = hc_byte_perm_S (w[10], w[11], selector); w[32] = hc_byte_perm_S (w[ 9], w[10], selector); w[31] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[30] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[29] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[28] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[27] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[26] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[25] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[24] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[23] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[22] = hc_byte_perm_S ( 0, w[ 0], selector); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_byte_perm_S (w[39], w[40], selector); w[62] = hc_byte_perm_S (w[38], w[39], selector); w[61] = hc_byte_perm_S (w[37], w[38], selector); w[60] = hc_byte_perm_S (w[36], w[37], selector); w[59] = hc_byte_perm_S (w[35], w[36], selector); w[58] = hc_byte_perm_S (w[34], w[35], selector); w[57] = hc_byte_perm_S (w[33], w[34], selector); w[56] = hc_byte_perm_S (w[32], w[33], selector); w[55] = hc_byte_perm_S (w[31], w[32], selector); w[54] = hc_byte_perm_S (w[30], w[31], selector); w[53] = hc_byte_perm_S (w[29], w[30], selector); w[52] = hc_byte_perm_S (w[28], w[29], selector); w[51] = hc_byte_perm_S (w[27], w[28], selector); w[50] = hc_byte_perm_S (w[26], w[27], selector); w[49] = hc_byte_perm_S (w[25], w[26], selector); w[48] = hc_byte_perm_S (w[24], w[25], selector); w[47] = hc_byte_perm_S (w[23], w[24], selector); w[46] = hc_byte_perm_S (w[22], w[23], selector); w[45] = hc_byte_perm_S (w[21], w[22], selector); w[44] = hc_byte_perm_S (w[20], w[21], selector); w[43] = hc_byte_perm_S (w[19], w[20], selector); w[42] = hc_byte_perm_S (w[18], w[19], selector); w[41] = hc_byte_perm_S (w[17], w[18], selector); w[40] = hc_byte_perm_S (w[16], w[17], selector); w[39] = hc_byte_perm_S (w[15], w[16], selector); w[38] = hc_byte_perm_S (w[14], w[15], selector); w[37] = hc_byte_perm_S (w[13], w[14], selector); w[36] = hc_byte_perm_S (w[12], w[13], selector); w[35] = hc_byte_perm_S (w[11], w[12], selector); w[34] = hc_byte_perm_S (w[10], w[11], selector); w[33] = hc_byte_perm_S (w[ 9], w[10], selector); w[32] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[31] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[30] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[29] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[28] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[27] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[26] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[25] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[24] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[23] = hc_byte_perm_S ( 0, w[ 0], selector); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_byte_perm_S (w[38], w[39], selector); w[62] = hc_byte_perm_S (w[37], w[38], selector); w[61] = hc_byte_perm_S (w[36], w[37], selector); w[60] = hc_byte_perm_S (w[35], w[36], selector); w[59] = hc_byte_perm_S (w[34], w[35], selector); w[58] = hc_byte_perm_S (w[33], w[34], selector); w[57] = hc_byte_perm_S (w[32], w[33], selector); w[56] = hc_byte_perm_S (w[31], w[32], selector); w[55] = hc_byte_perm_S (w[30], w[31], selector); w[54] = hc_byte_perm_S (w[29], w[30], selector); w[53] = hc_byte_perm_S (w[28], w[29], selector); w[52] = hc_byte_perm_S (w[27], w[28], selector); w[51] = hc_byte_perm_S (w[26], w[27], selector); w[50] = hc_byte_perm_S (w[25], w[26], selector); w[49] = hc_byte_perm_S (w[24], w[25], selector); w[48] = hc_byte_perm_S (w[23], w[24], selector); w[47] = hc_byte_perm_S (w[22], w[23], selector); w[46] = hc_byte_perm_S (w[21], w[22], selector); w[45] = hc_byte_perm_S (w[20], w[21], selector); w[44] = hc_byte_perm_S (w[19], w[20], selector); w[43] = hc_byte_perm_S (w[18], w[19], selector); w[42] = hc_byte_perm_S (w[17], w[18], selector); w[41] = hc_byte_perm_S (w[16], w[17], selector); w[40] = hc_byte_perm_S (w[15], w[16], selector); w[39] = hc_byte_perm_S (w[14], w[15], selector); w[38] = hc_byte_perm_S (w[13], w[14], selector); w[37] = hc_byte_perm_S (w[12], w[13], selector); w[36] = hc_byte_perm_S (w[11], w[12], selector); w[35] = hc_byte_perm_S (w[10], w[11], selector); w[34] = hc_byte_perm_S (w[ 9], w[10], selector); w[33] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[32] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[31] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[30] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[29] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[28] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[27] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[26] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[25] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[24] = hc_byte_perm_S ( 0, w[ 0], selector); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_byte_perm_S (w[37], w[38], selector); w[62] = hc_byte_perm_S (w[36], w[37], selector); w[61] = hc_byte_perm_S (w[35], w[36], selector); w[60] = hc_byte_perm_S (w[34], w[35], selector); w[59] = hc_byte_perm_S (w[33], w[34], selector); w[58] = hc_byte_perm_S (w[32], w[33], selector); w[57] = hc_byte_perm_S (w[31], w[32], selector); w[56] = hc_byte_perm_S (w[30], w[31], selector); w[55] = hc_byte_perm_S (w[29], w[30], selector); w[54] = hc_byte_perm_S (w[28], w[29], selector); w[53] = hc_byte_perm_S (w[27], w[28], selector); w[52] = hc_byte_perm_S (w[26], w[27], selector); w[51] = hc_byte_perm_S (w[25], w[26], selector); w[50] = hc_byte_perm_S (w[24], w[25], selector); w[49] = hc_byte_perm_S (w[23], w[24], selector); w[48] = hc_byte_perm_S (w[22], w[23], selector); w[47] = hc_byte_perm_S (w[21], w[22], selector); w[46] = hc_byte_perm_S (w[20], w[21], selector); w[45] = hc_byte_perm_S (w[19], w[20], selector); w[44] = hc_byte_perm_S (w[18], w[19], selector); w[43] = hc_byte_perm_S (w[17], w[18], selector); w[42] = hc_byte_perm_S (w[16], w[17], selector); w[41] = hc_byte_perm_S (w[15], w[16], selector); w[40] = hc_byte_perm_S (w[14], w[15], selector); w[39] = hc_byte_perm_S (w[13], w[14], selector); w[38] = hc_byte_perm_S (w[12], w[13], selector); w[37] = hc_byte_perm_S (w[11], w[12], selector); w[36] = hc_byte_perm_S (w[10], w[11], selector); w[35] = hc_byte_perm_S (w[ 9], w[10], selector); w[34] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[33] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[32] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[31] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[30] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[29] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[28] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[27] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[26] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[25] = hc_byte_perm_S ( 0, w[ 0], selector); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_byte_perm_S (w[36], w[37], selector); w[62] = hc_byte_perm_S (w[35], w[36], selector); w[61] = hc_byte_perm_S (w[34], w[35], selector); w[60] = hc_byte_perm_S (w[33], w[34], selector); w[59] = hc_byte_perm_S (w[32], w[33], selector); w[58] = hc_byte_perm_S (w[31], w[32], selector); w[57] = hc_byte_perm_S (w[30], w[31], selector); w[56] = hc_byte_perm_S (w[29], w[30], selector); w[55] = hc_byte_perm_S (w[28], w[29], selector); w[54] = hc_byte_perm_S (w[27], w[28], selector); w[53] = hc_byte_perm_S (w[26], w[27], selector); w[52] = hc_byte_perm_S (w[25], w[26], selector); w[51] = hc_byte_perm_S (w[24], w[25], selector); w[50] = hc_byte_perm_S (w[23], w[24], selector); w[49] = hc_byte_perm_S (w[22], w[23], selector); w[48] = hc_byte_perm_S (w[21], w[22], selector); w[47] = hc_byte_perm_S (w[20], w[21], selector); w[46] = hc_byte_perm_S (w[19], w[20], selector); w[45] = hc_byte_perm_S (w[18], w[19], selector); w[44] = hc_byte_perm_S (w[17], w[18], selector); w[43] = hc_byte_perm_S (w[16], w[17], selector); w[42] = hc_byte_perm_S (w[15], w[16], selector); w[41] = hc_byte_perm_S (w[14], w[15], selector); w[40] = hc_byte_perm_S (w[13], w[14], selector); w[39] = hc_byte_perm_S (w[12], w[13], selector); w[38] = hc_byte_perm_S (w[11], w[12], selector); w[37] = hc_byte_perm_S (w[10], w[11], selector); w[36] = hc_byte_perm_S (w[ 9], w[10], selector); w[35] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[34] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[33] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[32] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[31] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[30] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[29] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[28] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[27] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[26] = hc_byte_perm_S ( 0, w[ 0], selector); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_byte_perm_S (w[35], w[36], selector); w[62] = hc_byte_perm_S (w[34], w[35], selector); w[61] = hc_byte_perm_S (w[33], w[34], selector); w[60] = hc_byte_perm_S (w[32], w[33], selector); w[59] = hc_byte_perm_S (w[31], w[32], selector); w[58] = hc_byte_perm_S (w[30], w[31], selector); w[57] = hc_byte_perm_S (w[29], w[30], selector); w[56] = hc_byte_perm_S (w[28], w[29], selector); w[55] = hc_byte_perm_S (w[27], w[28], selector); w[54] = hc_byte_perm_S (w[26], w[27], selector); w[53] = hc_byte_perm_S (w[25], w[26], selector); w[52] = hc_byte_perm_S (w[24], w[25], selector); w[51] = hc_byte_perm_S (w[23], w[24], selector); w[50] = hc_byte_perm_S (w[22], w[23], selector); w[49] = hc_byte_perm_S (w[21], w[22], selector); w[48] = hc_byte_perm_S (w[20], w[21], selector); w[47] = hc_byte_perm_S (w[19], w[20], selector); w[46] = hc_byte_perm_S (w[18], w[19], selector); w[45] = hc_byte_perm_S (w[17], w[18], selector); w[44] = hc_byte_perm_S (w[16], w[17], selector); w[43] = hc_byte_perm_S (w[15], w[16], selector); w[42] = hc_byte_perm_S (w[14], w[15], selector); w[41] = hc_byte_perm_S (w[13], w[14], selector); w[40] = hc_byte_perm_S (w[12], w[13], selector); w[39] = hc_byte_perm_S (w[11], w[12], selector); w[38] = hc_byte_perm_S (w[10], w[11], selector); w[37] = hc_byte_perm_S (w[ 9], w[10], selector); w[36] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[35] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[34] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[33] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[32] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[31] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[30] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[29] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[28] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[27] = hc_byte_perm_S ( 0, w[ 0], selector); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_byte_perm_S (w[34], w[35], selector); w[62] = hc_byte_perm_S (w[33], w[34], selector); w[61] = hc_byte_perm_S (w[32], w[33], selector); w[60] = hc_byte_perm_S (w[31], w[32], selector); w[59] = hc_byte_perm_S (w[30], w[31], selector); w[58] = hc_byte_perm_S (w[29], w[30], selector); w[57] = hc_byte_perm_S (w[28], w[29], selector); w[56] = hc_byte_perm_S (w[27], w[28], selector); w[55] = hc_byte_perm_S (w[26], w[27], selector); w[54] = hc_byte_perm_S (w[25], w[26], selector); w[53] = hc_byte_perm_S (w[24], w[25], selector); w[52] = hc_byte_perm_S (w[23], w[24], selector); w[51] = hc_byte_perm_S (w[22], w[23], selector); w[50] = hc_byte_perm_S (w[21], w[22], selector); w[49] = hc_byte_perm_S (w[20], w[21], selector); w[48] = hc_byte_perm_S (w[19], w[20], selector); w[47] = hc_byte_perm_S (w[18], w[19], selector); w[46] = hc_byte_perm_S (w[17], w[18], selector); w[45] = hc_byte_perm_S (w[16], w[17], selector); w[44] = hc_byte_perm_S (w[15], w[16], selector); w[43] = hc_byte_perm_S (w[14], w[15], selector); w[42] = hc_byte_perm_S (w[13], w[14], selector); w[41] = hc_byte_perm_S (w[12], w[13], selector); w[40] = hc_byte_perm_S (w[11], w[12], selector); w[39] = hc_byte_perm_S (w[10], w[11], selector); w[38] = hc_byte_perm_S (w[ 9], w[10], selector); w[37] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[36] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[35] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[34] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[33] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[32] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[31] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[30] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[29] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[28] = hc_byte_perm_S ( 0, w[ 0], selector); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_byte_perm_S (w[33], w[34], selector); w[62] = hc_byte_perm_S (w[32], w[33], selector); w[61] = hc_byte_perm_S (w[31], w[32], selector); w[60] = hc_byte_perm_S (w[30], w[31], selector); w[59] = hc_byte_perm_S (w[29], w[30], selector); w[58] = hc_byte_perm_S (w[28], w[29], selector); w[57] = hc_byte_perm_S (w[27], w[28], selector); w[56] = hc_byte_perm_S (w[26], w[27], selector); w[55] = hc_byte_perm_S (w[25], w[26], selector); w[54] = hc_byte_perm_S (w[24], w[25], selector); w[53] = hc_byte_perm_S (w[23], w[24], selector); w[52] = hc_byte_perm_S (w[22], w[23], selector); w[51] = hc_byte_perm_S (w[21], w[22], selector); w[50] = hc_byte_perm_S (w[20], w[21], selector); w[49] = hc_byte_perm_S (w[19], w[20], selector); w[48] = hc_byte_perm_S (w[18], w[19], selector); w[47] = hc_byte_perm_S (w[17], w[18], selector); w[46] = hc_byte_perm_S (w[16], w[17], selector); w[45] = hc_byte_perm_S (w[15], w[16], selector); w[44] = hc_byte_perm_S (w[14], w[15], selector); w[43] = hc_byte_perm_S (w[13], w[14], selector); w[42] = hc_byte_perm_S (w[12], w[13], selector); w[41] = hc_byte_perm_S (w[11], w[12], selector); w[40] = hc_byte_perm_S (w[10], w[11], selector); w[39] = hc_byte_perm_S (w[ 9], w[10], selector); w[38] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[37] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[36] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[35] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[34] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[33] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[32] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[31] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[30] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[29] = hc_byte_perm_S ( 0, w[ 0], selector); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_byte_perm_S (w[32], w[33], selector); w[62] = hc_byte_perm_S (w[31], w[32], selector); w[61] = hc_byte_perm_S (w[30], w[31], selector); w[60] = hc_byte_perm_S (w[29], w[30], selector); w[59] = hc_byte_perm_S (w[28], w[29], selector); w[58] = hc_byte_perm_S (w[27], w[28], selector); w[57] = hc_byte_perm_S (w[26], w[27], selector); w[56] = hc_byte_perm_S (w[25], w[26], selector); w[55] = hc_byte_perm_S (w[24], w[25], selector); w[54] = hc_byte_perm_S (w[23], w[24], selector); w[53] = hc_byte_perm_S (w[22], w[23], selector); w[52] = hc_byte_perm_S (w[21], w[22], selector); w[51] = hc_byte_perm_S (w[20], w[21], selector); w[50] = hc_byte_perm_S (w[19], w[20], selector); w[49] = hc_byte_perm_S (w[18], w[19], selector); w[48] = hc_byte_perm_S (w[17], w[18], selector); w[47] = hc_byte_perm_S (w[16], w[17], selector); w[46] = hc_byte_perm_S (w[15], w[16], selector); w[45] = hc_byte_perm_S (w[14], w[15], selector); w[44] = hc_byte_perm_S (w[13], w[14], selector); w[43] = hc_byte_perm_S (w[12], w[13], selector); w[42] = hc_byte_perm_S (w[11], w[12], selector); w[41] = hc_byte_perm_S (w[10], w[11], selector); w[40] = hc_byte_perm_S (w[ 9], w[10], selector); w[39] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[38] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[37] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[36] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[35] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[34] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[33] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[32] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[31] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[30] = hc_byte_perm_S ( 0, w[ 0], selector); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_byte_perm_S (w[31], w[32], selector); w[62] = hc_byte_perm_S (w[30], w[31], selector); w[61] = hc_byte_perm_S (w[29], w[30], selector); w[60] = hc_byte_perm_S (w[28], w[29], selector); w[59] = hc_byte_perm_S (w[27], w[28], selector); w[58] = hc_byte_perm_S (w[26], w[27], selector); w[57] = hc_byte_perm_S (w[25], w[26], selector); w[56] = hc_byte_perm_S (w[24], w[25], selector); w[55] = hc_byte_perm_S (w[23], w[24], selector); w[54] = hc_byte_perm_S (w[22], w[23], selector); w[53] = hc_byte_perm_S (w[21], w[22], selector); w[52] = hc_byte_perm_S (w[20], w[21], selector); w[51] = hc_byte_perm_S (w[19], w[20], selector); w[50] = hc_byte_perm_S (w[18], w[19], selector); w[49] = hc_byte_perm_S (w[17], w[18], selector); w[48] = hc_byte_perm_S (w[16], w[17], selector); w[47] = hc_byte_perm_S (w[15], w[16], selector); w[46] = hc_byte_perm_S (w[14], w[15], selector); w[45] = hc_byte_perm_S (w[13], w[14], selector); w[44] = hc_byte_perm_S (w[12], w[13], selector); w[43] = hc_byte_perm_S (w[11], w[12], selector); w[42] = hc_byte_perm_S (w[10], w[11], selector); w[41] = hc_byte_perm_S (w[ 9], w[10], selector); w[40] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[39] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[38] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[37] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[36] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[35] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[34] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[33] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[32] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[31] = hc_byte_perm_S ( 0, w[ 0], selector); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_byte_perm_S (w[30], w[31], selector); w[62] = hc_byte_perm_S (w[29], w[30], selector); w[61] = hc_byte_perm_S (w[28], w[29], selector); w[60] = hc_byte_perm_S (w[27], w[28], selector); w[59] = hc_byte_perm_S (w[26], w[27], selector); w[58] = hc_byte_perm_S (w[25], w[26], selector); w[57] = hc_byte_perm_S (w[24], w[25], selector); w[56] = hc_byte_perm_S (w[23], w[24], selector); w[55] = hc_byte_perm_S (w[22], w[23], selector); w[54] = hc_byte_perm_S (w[21], w[22], selector); w[53] = hc_byte_perm_S (w[20], w[21], selector); w[52] = hc_byte_perm_S (w[19], w[20], selector); w[51] = hc_byte_perm_S (w[18], w[19], selector); w[50] = hc_byte_perm_S (w[17], w[18], selector); w[49] = hc_byte_perm_S (w[16], w[17], selector); w[48] = hc_byte_perm_S (w[15], w[16], selector); w[47] = hc_byte_perm_S (w[14], w[15], selector); w[46] = hc_byte_perm_S (w[13], w[14], selector); w[45] = hc_byte_perm_S (w[12], w[13], selector); w[44] = hc_byte_perm_S (w[11], w[12], selector); w[43] = hc_byte_perm_S (w[10], w[11], selector); w[42] = hc_byte_perm_S (w[ 9], w[10], selector); w[41] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[40] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[39] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[38] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[37] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[36] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[35] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[34] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[33] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[32] = hc_byte_perm_S ( 0, w[ 0], selector); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_byte_perm_S (w[29], w[30], selector); w[62] = hc_byte_perm_S (w[28], w[29], selector); w[61] = hc_byte_perm_S (w[27], w[28], selector); w[60] = hc_byte_perm_S (w[26], w[27], selector); w[59] = hc_byte_perm_S (w[25], w[26], selector); w[58] = hc_byte_perm_S (w[24], w[25], selector); w[57] = hc_byte_perm_S (w[23], w[24], selector); w[56] = hc_byte_perm_S (w[22], w[23], selector); w[55] = hc_byte_perm_S (w[21], w[22], selector); w[54] = hc_byte_perm_S (w[20], w[21], selector); w[53] = hc_byte_perm_S (w[19], w[20], selector); w[52] = hc_byte_perm_S (w[18], w[19], selector); w[51] = hc_byte_perm_S (w[17], w[18], selector); w[50] = hc_byte_perm_S (w[16], w[17], selector); w[49] = hc_byte_perm_S (w[15], w[16], selector); w[48] = hc_byte_perm_S (w[14], w[15], selector); w[47] = hc_byte_perm_S (w[13], w[14], selector); w[46] = hc_byte_perm_S (w[12], w[13], selector); w[45] = hc_byte_perm_S (w[11], w[12], selector); w[44] = hc_byte_perm_S (w[10], w[11], selector); w[43] = hc_byte_perm_S (w[ 9], w[10], selector); w[42] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[41] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[40] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[39] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[38] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[37] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[36] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[35] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[34] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[33] = hc_byte_perm_S ( 0, w[ 0], selector); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_byte_perm_S (w[28], w[29], selector); w[62] = hc_byte_perm_S (w[27], w[28], selector); w[61] = hc_byte_perm_S (w[26], w[27], selector); w[60] = hc_byte_perm_S (w[25], w[26], selector); w[59] = hc_byte_perm_S (w[24], w[25], selector); w[58] = hc_byte_perm_S (w[23], w[24], selector); w[57] = hc_byte_perm_S (w[22], w[23], selector); w[56] = hc_byte_perm_S (w[21], w[22], selector); w[55] = hc_byte_perm_S (w[20], w[21], selector); w[54] = hc_byte_perm_S (w[19], w[20], selector); w[53] = hc_byte_perm_S (w[18], w[19], selector); w[52] = hc_byte_perm_S (w[17], w[18], selector); w[51] = hc_byte_perm_S (w[16], w[17], selector); w[50] = hc_byte_perm_S (w[15], w[16], selector); w[49] = hc_byte_perm_S (w[14], w[15], selector); w[48] = hc_byte_perm_S (w[13], w[14], selector); w[47] = hc_byte_perm_S (w[12], w[13], selector); w[46] = hc_byte_perm_S (w[11], w[12], selector); w[45] = hc_byte_perm_S (w[10], w[11], selector); w[44] = hc_byte_perm_S (w[ 9], w[10], selector); w[43] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[42] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[41] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[40] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[39] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[38] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[37] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[36] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[35] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[34] = hc_byte_perm_S ( 0, w[ 0], selector); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_byte_perm_S (w[27], w[28], selector); w[62] = hc_byte_perm_S (w[26], w[27], selector); w[61] = hc_byte_perm_S (w[25], w[26], selector); w[60] = hc_byte_perm_S (w[24], w[25], selector); w[59] = hc_byte_perm_S (w[23], w[24], selector); w[58] = hc_byte_perm_S (w[22], w[23], selector); w[57] = hc_byte_perm_S (w[21], w[22], selector); w[56] = hc_byte_perm_S (w[20], w[21], selector); w[55] = hc_byte_perm_S (w[19], w[20], selector); w[54] = hc_byte_perm_S (w[18], w[19], selector); w[53] = hc_byte_perm_S (w[17], w[18], selector); w[52] = hc_byte_perm_S (w[16], w[17], selector); w[51] = hc_byte_perm_S (w[15], w[16], selector); w[50] = hc_byte_perm_S (w[14], w[15], selector); w[49] = hc_byte_perm_S (w[13], w[14], selector); w[48] = hc_byte_perm_S (w[12], w[13], selector); w[47] = hc_byte_perm_S (w[11], w[12], selector); w[46] = hc_byte_perm_S (w[10], w[11], selector); w[45] = hc_byte_perm_S (w[ 9], w[10], selector); w[44] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[43] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[42] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[41] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[40] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[39] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[38] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[37] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[36] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[35] = hc_byte_perm_S ( 0, w[ 0], selector); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_byte_perm_S (w[26], w[27], selector); w[62] = hc_byte_perm_S (w[25], w[26], selector); w[61] = hc_byte_perm_S (w[24], w[25], selector); w[60] = hc_byte_perm_S (w[23], w[24], selector); w[59] = hc_byte_perm_S (w[22], w[23], selector); w[58] = hc_byte_perm_S (w[21], w[22], selector); w[57] = hc_byte_perm_S (w[20], w[21], selector); w[56] = hc_byte_perm_S (w[19], w[20], selector); w[55] = hc_byte_perm_S (w[18], w[19], selector); w[54] = hc_byte_perm_S (w[17], w[18], selector); w[53] = hc_byte_perm_S (w[16], w[17], selector); w[52] = hc_byte_perm_S (w[15], w[16], selector); w[51] = hc_byte_perm_S (w[14], w[15], selector); w[50] = hc_byte_perm_S (w[13], w[14], selector); w[49] = hc_byte_perm_S (w[12], w[13], selector); w[48] = hc_byte_perm_S (w[11], w[12], selector); w[47] = hc_byte_perm_S (w[10], w[11], selector); w[46] = hc_byte_perm_S (w[ 9], w[10], selector); w[45] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[44] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[43] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[42] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[41] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[40] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[39] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[38] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[37] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[36] = hc_byte_perm_S ( 0, w[ 0], selector); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_byte_perm_S (w[25], w[26], selector); w[62] = hc_byte_perm_S (w[24], w[25], selector); w[61] = hc_byte_perm_S (w[23], w[24], selector); w[60] = hc_byte_perm_S (w[22], w[23], selector); w[59] = hc_byte_perm_S (w[21], w[22], selector); w[58] = hc_byte_perm_S (w[20], w[21], selector); w[57] = hc_byte_perm_S (w[19], w[20], selector); w[56] = hc_byte_perm_S (w[18], w[19], selector); w[55] = hc_byte_perm_S (w[17], w[18], selector); w[54] = hc_byte_perm_S (w[16], w[17], selector); w[53] = hc_byte_perm_S (w[15], w[16], selector); w[52] = hc_byte_perm_S (w[14], w[15], selector); w[51] = hc_byte_perm_S (w[13], w[14], selector); w[50] = hc_byte_perm_S (w[12], w[13], selector); w[49] = hc_byte_perm_S (w[11], w[12], selector); w[48] = hc_byte_perm_S (w[10], w[11], selector); w[47] = hc_byte_perm_S (w[ 9], w[10], selector); w[46] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[45] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[44] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[43] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[42] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[41] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[40] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[39] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[38] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[37] = hc_byte_perm_S ( 0, w[ 0], selector); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_byte_perm_S (w[24], w[25], selector); w[62] = hc_byte_perm_S (w[23], w[24], selector); w[61] = hc_byte_perm_S (w[22], w[23], selector); w[60] = hc_byte_perm_S (w[21], w[22], selector); w[59] = hc_byte_perm_S (w[20], w[21], selector); w[58] = hc_byte_perm_S (w[19], w[20], selector); w[57] = hc_byte_perm_S (w[18], w[19], selector); w[56] = hc_byte_perm_S (w[17], w[18], selector); w[55] = hc_byte_perm_S (w[16], w[17], selector); w[54] = hc_byte_perm_S (w[15], w[16], selector); w[53] = hc_byte_perm_S (w[14], w[15], selector); w[52] = hc_byte_perm_S (w[13], w[14], selector); w[51] = hc_byte_perm_S (w[12], w[13], selector); w[50] = hc_byte_perm_S (w[11], w[12], selector); w[49] = hc_byte_perm_S (w[10], w[11], selector); w[48] = hc_byte_perm_S (w[ 9], w[10], selector); w[47] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[46] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[45] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[44] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[43] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[42] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[41] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[40] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[39] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[38] = hc_byte_perm_S ( 0, w[ 0], selector); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_byte_perm_S (w[23], w[24], selector); w[62] = hc_byte_perm_S (w[22], w[23], selector); w[61] = hc_byte_perm_S (w[21], w[22], selector); w[60] = hc_byte_perm_S (w[20], w[21], selector); w[59] = hc_byte_perm_S (w[19], w[20], selector); w[58] = hc_byte_perm_S (w[18], w[19], selector); w[57] = hc_byte_perm_S (w[17], w[18], selector); w[56] = hc_byte_perm_S (w[16], w[17], selector); w[55] = hc_byte_perm_S (w[15], w[16], selector); w[54] = hc_byte_perm_S (w[14], w[15], selector); w[53] = hc_byte_perm_S (w[13], w[14], selector); w[52] = hc_byte_perm_S (w[12], w[13], selector); w[51] = hc_byte_perm_S (w[11], w[12], selector); w[50] = hc_byte_perm_S (w[10], w[11], selector); w[49] = hc_byte_perm_S (w[ 9], w[10], selector); w[48] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[47] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[46] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[45] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[44] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[43] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[42] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[41] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[40] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[39] = hc_byte_perm_S ( 0, w[ 0], selector); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_byte_perm_S (w[22], w[23], selector); w[62] = hc_byte_perm_S (w[21], w[22], selector); w[61] = hc_byte_perm_S (w[20], w[21], selector); w[60] = hc_byte_perm_S (w[19], w[20], selector); w[59] = hc_byte_perm_S (w[18], w[19], selector); w[58] = hc_byte_perm_S (w[17], w[18], selector); w[57] = hc_byte_perm_S (w[16], w[17], selector); w[56] = hc_byte_perm_S (w[15], w[16], selector); w[55] = hc_byte_perm_S (w[14], w[15], selector); w[54] = hc_byte_perm_S (w[13], w[14], selector); w[53] = hc_byte_perm_S (w[12], w[13], selector); w[52] = hc_byte_perm_S (w[11], w[12], selector); w[51] = hc_byte_perm_S (w[10], w[11], selector); w[50] = hc_byte_perm_S (w[ 9], w[10], selector); w[49] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[48] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[47] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[46] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[45] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[44] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[43] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[42] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[41] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[40] = hc_byte_perm_S ( 0, w[ 0], selector); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_byte_perm_S (w[21], w[22], selector); w[62] = hc_byte_perm_S (w[20], w[21], selector); w[61] = hc_byte_perm_S (w[19], w[20], selector); w[60] = hc_byte_perm_S (w[18], w[19], selector); w[59] = hc_byte_perm_S (w[17], w[18], selector); w[58] = hc_byte_perm_S (w[16], w[17], selector); w[57] = hc_byte_perm_S (w[15], w[16], selector); w[56] = hc_byte_perm_S (w[14], w[15], selector); w[55] = hc_byte_perm_S (w[13], w[14], selector); w[54] = hc_byte_perm_S (w[12], w[13], selector); w[53] = hc_byte_perm_S (w[11], w[12], selector); w[52] = hc_byte_perm_S (w[10], w[11], selector); w[51] = hc_byte_perm_S (w[ 9], w[10], selector); w[50] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[49] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[48] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[47] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[46] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[45] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[44] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[43] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[42] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[41] = hc_byte_perm_S ( 0, w[ 0], selector); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_byte_perm_S (w[20], w[21], selector); w[62] = hc_byte_perm_S (w[19], w[20], selector); w[61] = hc_byte_perm_S (w[18], w[19], selector); w[60] = hc_byte_perm_S (w[17], w[18], selector); w[59] = hc_byte_perm_S (w[16], w[17], selector); w[58] = hc_byte_perm_S (w[15], w[16], selector); w[57] = hc_byte_perm_S (w[14], w[15], selector); w[56] = hc_byte_perm_S (w[13], w[14], selector); w[55] = hc_byte_perm_S (w[12], w[13], selector); w[54] = hc_byte_perm_S (w[11], w[12], selector); w[53] = hc_byte_perm_S (w[10], w[11], selector); w[52] = hc_byte_perm_S (w[ 9], w[10], selector); w[51] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[50] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[49] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[48] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[47] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[46] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[45] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[44] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[43] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[42] = hc_byte_perm_S ( 0, w[ 0], selector); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_byte_perm_S (w[19], w[20], selector); w[62] = hc_byte_perm_S (w[18], w[19], selector); w[61] = hc_byte_perm_S (w[17], w[18], selector); w[60] = hc_byte_perm_S (w[16], w[17], selector); w[59] = hc_byte_perm_S (w[15], w[16], selector); w[58] = hc_byte_perm_S (w[14], w[15], selector); w[57] = hc_byte_perm_S (w[13], w[14], selector); w[56] = hc_byte_perm_S (w[12], w[13], selector); w[55] = hc_byte_perm_S (w[11], w[12], selector); w[54] = hc_byte_perm_S (w[10], w[11], selector); w[53] = hc_byte_perm_S (w[ 9], w[10], selector); w[52] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[51] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[50] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[49] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[48] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[47] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[46] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[45] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[44] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[43] = hc_byte_perm_S ( 0, w[ 0], selector); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_byte_perm_S (w[18], w[19], selector); w[62] = hc_byte_perm_S (w[17], w[18], selector); w[61] = hc_byte_perm_S (w[16], w[17], selector); w[60] = hc_byte_perm_S (w[15], w[16], selector); w[59] = hc_byte_perm_S (w[14], w[15], selector); w[58] = hc_byte_perm_S (w[13], w[14], selector); w[57] = hc_byte_perm_S (w[12], w[13], selector); w[56] = hc_byte_perm_S (w[11], w[12], selector); w[55] = hc_byte_perm_S (w[10], w[11], selector); w[54] = hc_byte_perm_S (w[ 9], w[10], selector); w[53] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[52] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[51] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[50] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[49] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[48] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[47] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[46] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[45] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[44] = hc_byte_perm_S ( 0, w[ 0], selector); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_byte_perm_S (w[17], w[18], selector); w[62] = hc_byte_perm_S (w[16], w[17], selector); w[61] = hc_byte_perm_S (w[15], w[16], selector); w[60] = hc_byte_perm_S (w[14], w[15], selector); w[59] = hc_byte_perm_S (w[13], w[14], selector); w[58] = hc_byte_perm_S (w[12], w[13], selector); w[57] = hc_byte_perm_S (w[11], w[12], selector); w[56] = hc_byte_perm_S (w[10], w[11], selector); w[55] = hc_byte_perm_S (w[ 9], w[10], selector); w[54] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[53] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[52] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[51] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[50] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[49] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[48] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[47] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[46] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[45] = hc_byte_perm_S ( 0, w[ 0], selector); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_byte_perm_S (w[16], w[17], selector); w[62] = hc_byte_perm_S (w[15], w[16], selector); w[61] = hc_byte_perm_S (w[14], w[15], selector); w[60] = hc_byte_perm_S (w[13], w[14], selector); w[59] = hc_byte_perm_S (w[12], w[13], selector); w[58] = hc_byte_perm_S (w[11], w[12], selector); w[57] = hc_byte_perm_S (w[10], w[11], selector); w[56] = hc_byte_perm_S (w[ 9], w[10], selector); w[55] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[54] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[53] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[52] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[51] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[50] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[49] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[48] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[47] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[46] = hc_byte_perm_S ( 0, w[ 0], selector); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_byte_perm_S (w[15], w[16], selector); w[62] = hc_byte_perm_S (w[14], w[15], selector); w[61] = hc_byte_perm_S (w[13], w[14], selector); w[60] = hc_byte_perm_S (w[12], w[13], selector); w[59] = hc_byte_perm_S (w[11], w[12], selector); w[58] = hc_byte_perm_S (w[10], w[11], selector); w[57] = hc_byte_perm_S (w[ 9], w[10], selector); w[56] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[55] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[54] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[53] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[52] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[51] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[50] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[49] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[48] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[47] = hc_byte_perm_S ( 0, w[ 0], selector); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_byte_perm_S (w[14], w[15], selector); w[62] = hc_byte_perm_S (w[13], w[14], selector); w[61] = hc_byte_perm_S (w[12], w[13], selector); w[60] = hc_byte_perm_S (w[11], w[12], selector); w[59] = hc_byte_perm_S (w[10], w[11], selector); w[58] = hc_byte_perm_S (w[ 9], w[10], selector); w[57] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[56] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[55] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[54] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[53] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[52] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[51] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[50] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[49] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[48] = hc_byte_perm_S ( 0, w[ 0], selector); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_byte_perm_S (w[13], w[14], selector); w[62] = hc_byte_perm_S (w[12], w[13], selector); w[61] = hc_byte_perm_S (w[11], w[12], selector); w[60] = hc_byte_perm_S (w[10], w[11], selector); w[59] = hc_byte_perm_S (w[ 9], w[10], selector); w[58] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[57] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[56] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[55] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[54] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[53] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[52] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[51] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[50] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[49] = hc_byte_perm_S ( 0, w[ 0], selector); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_byte_perm_S (w[12], w[13], selector); w[62] = hc_byte_perm_S (w[11], w[12], selector); w[61] = hc_byte_perm_S (w[10], w[11], selector); w[60] = hc_byte_perm_S (w[ 9], w[10], selector); w[59] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[58] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[57] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[56] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[55] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[54] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[53] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[52] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[51] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[50] = hc_byte_perm_S ( 0, w[ 0], selector); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_byte_perm_S (w[11], w[12], selector); w[62] = hc_byte_perm_S (w[10], w[11], selector); w[61] = hc_byte_perm_S (w[ 9], w[10], selector); w[60] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[59] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[58] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[57] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[56] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[55] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[54] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[53] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[52] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[51] = hc_byte_perm_S ( 0, w[ 0], selector); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_byte_perm_S (w[10], w[11], selector); w[62] = hc_byte_perm_S (w[ 9], w[10], selector); w[61] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[60] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[59] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[58] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[57] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[56] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[55] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[54] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[53] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[52] = hc_byte_perm_S ( 0, w[ 0], selector); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_byte_perm_S (w[ 9], w[10], selector); w[62] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[61] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[60] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[59] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[58] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[57] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[56] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[55] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[54] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[53] = hc_byte_perm_S ( 0, w[ 0], selector); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_byte_perm_S (w[ 8], w[ 9], selector); w[62] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[61] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[60] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[59] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[58] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[57] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[56] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[55] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[54] = hc_byte_perm_S ( 0, w[ 0], selector); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_byte_perm_S (w[ 7], w[ 8], selector); w[62] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[61] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[60] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[59] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[58] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[57] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[56] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[55] = hc_byte_perm_S ( 0, w[ 0], selector); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_byte_perm_S (w[ 6], w[ 7], selector); w[62] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[61] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[60] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[59] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[58] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[57] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[56] = hc_byte_perm_S ( 0, w[ 0], selector); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_byte_perm_S (w[ 5], w[ 6], selector); w[62] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[61] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[60] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[59] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[58] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[57] = hc_byte_perm_S ( 0, w[ 0], selector); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_byte_perm_S (w[ 4], w[ 5], selector); w[62] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[61] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[60] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[59] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[58] = hc_byte_perm_S ( 0, w[ 0], selector); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_byte_perm_S (w[ 3], w[ 4], selector); w[62] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[61] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[60] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[59] = hc_byte_perm_S ( 0, w[ 0], selector); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_byte_perm_S (w[ 2], w[ 3], selector); w[62] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[61] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[60] = hc_byte_perm_S ( 0, w[ 0], selector); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_byte_perm_S (w[ 1], w[ 2], selector); w[62] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[61] = hc_byte_perm_S ( 0, w[ 0], selector); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_byte_perm_S (w[ 0], w[ 1], selector); w[62] = hc_byte_perm_S ( 0, w[ 0], selector); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_byte_perm_S ( 0, w[ 0], selector); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif } DECLSPEC void switch_buffer_by_offset_1x64_be_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: w[63] = hc_bytealign_be_S (w[62], w[63], offset); w[62] = hc_bytealign_be_S (w[61], w[62], offset); w[61] = hc_bytealign_be_S (w[60], w[61], offset); w[60] = hc_bytealign_be_S (w[59], w[60], offset); w[59] = hc_bytealign_be_S (w[58], w[59], offset); w[58] = hc_bytealign_be_S (w[57], w[58], offset); w[57] = hc_bytealign_be_S (w[56], w[57], offset); w[56] = hc_bytealign_be_S (w[55], w[56], offset); w[55] = hc_bytealign_be_S (w[54], w[55], offset); w[54] = hc_bytealign_be_S (w[53], w[54], offset); w[53] = hc_bytealign_be_S (w[52], w[53], offset); w[52] = hc_bytealign_be_S (w[51], w[52], offset); w[51] = hc_bytealign_be_S (w[50], w[51], offset); w[50] = hc_bytealign_be_S (w[49], w[50], offset); w[49] = hc_bytealign_be_S (w[48], w[49], offset); w[48] = hc_bytealign_be_S (w[47], w[48], offset); w[47] = hc_bytealign_be_S (w[46], w[47], offset); w[46] = hc_bytealign_be_S (w[45], w[46], offset); w[45] = hc_bytealign_be_S (w[44], w[45], offset); w[44] = hc_bytealign_be_S (w[43], w[44], offset); w[43] = hc_bytealign_be_S (w[42], w[43], offset); w[42] = hc_bytealign_be_S (w[41], w[42], offset); w[41] = hc_bytealign_be_S (w[40], w[41], offset); w[40] = hc_bytealign_be_S (w[39], w[40], offset); w[39] = hc_bytealign_be_S (w[38], w[39], offset); w[38] = hc_bytealign_be_S (w[37], w[38], offset); w[37] = hc_bytealign_be_S (w[36], w[37], offset); w[36] = hc_bytealign_be_S (w[35], w[36], offset); w[35] = hc_bytealign_be_S (w[34], w[35], offset); w[34] = hc_bytealign_be_S (w[33], w[34], offset); w[33] = hc_bytealign_be_S (w[32], w[33], offset); w[32] = hc_bytealign_be_S (w[31], w[32], offset); w[31] = hc_bytealign_be_S (w[30], w[31], offset); w[30] = hc_bytealign_be_S (w[29], w[30], offset); w[29] = hc_bytealign_be_S (w[28], w[29], offset); w[28] = hc_bytealign_be_S (w[27], w[28], offset); w[27] = hc_bytealign_be_S (w[26], w[27], offset); w[26] = hc_bytealign_be_S (w[25], w[26], offset); w[25] = hc_bytealign_be_S (w[24], w[25], offset); w[24] = hc_bytealign_be_S (w[23], w[24], offset); w[23] = hc_bytealign_be_S (w[22], w[23], offset); w[22] = hc_bytealign_be_S (w[21], w[22], offset); w[21] = hc_bytealign_be_S (w[20], w[21], offset); w[20] = hc_bytealign_be_S (w[19], w[20], offset); w[19] = hc_bytealign_be_S (w[18], w[19], offset); w[18] = hc_bytealign_be_S (w[17], w[18], offset); w[17] = hc_bytealign_be_S (w[16], w[17], offset); w[16] = hc_bytealign_be_S (w[15], w[16], offset); w[15] = hc_bytealign_be_S (w[14], w[15], offset); w[14] = hc_bytealign_be_S (w[13], w[14], offset); w[13] = hc_bytealign_be_S (w[12], w[13], offset); w[12] = hc_bytealign_be_S (w[11], w[12], offset); w[11] = hc_bytealign_be_S (w[10], w[11], offset); w[10] = hc_bytealign_be_S (w[ 9], w[10], offset); w[ 9] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[ 8] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[ 7] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[ 6] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[ 5] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 4] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 3] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 2] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 1] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 0] = hc_bytealign_be_S ( 0, w[ 0], offset); break; case 1: w[63] = hc_bytealign_be_S (w[61], w[62], offset); w[62] = hc_bytealign_be_S (w[60], w[61], offset); w[61] = hc_bytealign_be_S (w[59], w[60], offset); w[60] = hc_bytealign_be_S (w[58], w[59], offset); w[59] = hc_bytealign_be_S (w[57], w[58], offset); w[58] = hc_bytealign_be_S (w[56], w[57], offset); w[57] = hc_bytealign_be_S (w[55], w[56], offset); w[56] = hc_bytealign_be_S (w[54], w[55], offset); w[55] = hc_bytealign_be_S (w[53], w[54], offset); w[54] = hc_bytealign_be_S (w[52], w[53], offset); w[53] = hc_bytealign_be_S (w[51], w[52], offset); w[52] = hc_bytealign_be_S (w[50], w[51], offset); w[51] = hc_bytealign_be_S (w[49], w[50], offset); w[50] = hc_bytealign_be_S (w[48], w[49], offset); w[49] = hc_bytealign_be_S (w[47], w[48], offset); w[48] = hc_bytealign_be_S (w[46], w[47], offset); w[47] = hc_bytealign_be_S (w[45], w[46], offset); w[46] = hc_bytealign_be_S (w[44], w[45], offset); w[45] = hc_bytealign_be_S (w[43], w[44], offset); w[44] = hc_bytealign_be_S (w[42], w[43], offset); w[43] = hc_bytealign_be_S (w[41], w[42], offset); w[42] = hc_bytealign_be_S (w[40], w[41], offset); w[41] = hc_bytealign_be_S (w[39], w[40], offset); w[40] = hc_bytealign_be_S (w[38], w[39], offset); w[39] = hc_bytealign_be_S (w[37], w[38], offset); w[38] = hc_bytealign_be_S (w[36], w[37], offset); w[37] = hc_bytealign_be_S (w[35], w[36], offset); w[36] = hc_bytealign_be_S (w[34], w[35], offset); w[35] = hc_bytealign_be_S (w[33], w[34], offset); w[34] = hc_bytealign_be_S (w[32], w[33], offset); w[33] = hc_bytealign_be_S (w[31], w[32], offset); w[32] = hc_bytealign_be_S (w[30], w[31], offset); w[31] = hc_bytealign_be_S (w[29], w[30], offset); w[30] = hc_bytealign_be_S (w[28], w[29], offset); w[29] = hc_bytealign_be_S (w[27], w[28], offset); w[28] = hc_bytealign_be_S (w[26], w[27], offset); w[27] = hc_bytealign_be_S (w[25], w[26], offset); w[26] = hc_bytealign_be_S (w[24], w[25], offset); w[25] = hc_bytealign_be_S (w[23], w[24], offset); w[24] = hc_bytealign_be_S (w[22], w[23], offset); w[23] = hc_bytealign_be_S (w[21], w[22], offset); w[22] = hc_bytealign_be_S (w[20], w[21], offset); w[21] = hc_bytealign_be_S (w[19], w[20], offset); w[20] = hc_bytealign_be_S (w[18], w[19], offset); w[19] = hc_bytealign_be_S (w[17], w[18], offset); w[18] = hc_bytealign_be_S (w[16], w[17], offset); w[17] = hc_bytealign_be_S (w[15], w[16], offset); w[16] = hc_bytealign_be_S (w[14], w[15], offset); w[15] = hc_bytealign_be_S (w[13], w[14], offset); w[14] = hc_bytealign_be_S (w[12], w[13], offset); w[13] = hc_bytealign_be_S (w[11], w[12], offset); w[12] = hc_bytealign_be_S (w[10], w[11], offset); w[11] = hc_bytealign_be_S (w[ 9], w[10], offset); w[10] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[ 9] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[ 8] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[ 7] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[ 6] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 5] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 4] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 3] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 2] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 1] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 0] = 0; break; case 2: w[63] = hc_bytealign_be_S (w[60], w[61], offset); w[62] = hc_bytealign_be_S (w[59], w[60], offset); w[61] = hc_bytealign_be_S (w[58], w[59], offset); w[60] = hc_bytealign_be_S (w[57], w[58], offset); w[59] = hc_bytealign_be_S (w[56], w[57], offset); w[58] = hc_bytealign_be_S (w[55], w[56], offset); w[57] = hc_bytealign_be_S (w[54], w[55], offset); w[56] = hc_bytealign_be_S (w[53], w[54], offset); w[55] = hc_bytealign_be_S (w[52], w[53], offset); w[54] = hc_bytealign_be_S (w[51], w[52], offset); w[53] = hc_bytealign_be_S (w[50], w[51], offset); w[52] = hc_bytealign_be_S (w[49], w[50], offset); w[51] = hc_bytealign_be_S (w[48], w[49], offset); w[50] = hc_bytealign_be_S (w[47], w[48], offset); w[49] = hc_bytealign_be_S (w[46], w[47], offset); w[48] = hc_bytealign_be_S (w[45], w[46], offset); w[47] = hc_bytealign_be_S (w[44], w[45], offset); w[46] = hc_bytealign_be_S (w[43], w[44], offset); w[45] = hc_bytealign_be_S (w[42], w[43], offset); w[44] = hc_bytealign_be_S (w[41], w[42], offset); w[43] = hc_bytealign_be_S (w[40], w[41], offset); w[42] = hc_bytealign_be_S (w[39], w[40], offset); w[41] = hc_bytealign_be_S (w[38], w[39], offset); w[40] = hc_bytealign_be_S (w[37], w[38], offset); w[39] = hc_bytealign_be_S (w[36], w[37], offset); w[38] = hc_bytealign_be_S (w[35], w[36], offset); w[37] = hc_bytealign_be_S (w[34], w[35], offset); w[36] = hc_bytealign_be_S (w[33], w[34], offset); w[35] = hc_bytealign_be_S (w[32], w[33], offset); w[34] = hc_bytealign_be_S (w[31], w[32], offset); w[33] = hc_bytealign_be_S (w[30], w[31], offset); w[32] = hc_bytealign_be_S (w[29], w[30], offset); w[31] = hc_bytealign_be_S (w[28], w[29], offset); w[30] = hc_bytealign_be_S (w[27], w[28], offset); w[29] = hc_bytealign_be_S (w[26], w[27], offset); w[28] = hc_bytealign_be_S (w[25], w[26], offset); w[27] = hc_bytealign_be_S (w[24], w[25], offset); w[26] = hc_bytealign_be_S (w[23], w[24], offset); w[25] = hc_bytealign_be_S (w[22], w[23], offset); w[24] = hc_bytealign_be_S (w[21], w[22], offset); w[23] = hc_bytealign_be_S (w[20], w[21], offset); w[22] = hc_bytealign_be_S (w[19], w[20], offset); w[21] = hc_bytealign_be_S (w[18], w[19], offset); w[20] = hc_bytealign_be_S (w[17], w[18], offset); w[19] = hc_bytealign_be_S (w[16], w[17], offset); w[18] = hc_bytealign_be_S (w[15], w[16], offset); w[17] = hc_bytealign_be_S (w[14], w[15], offset); w[16] = hc_bytealign_be_S (w[13], w[14], offset); w[15] = hc_bytealign_be_S (w[12], w[13], offset); w[14] = hc_bytealign_be_S (w[11], w[12], offset); w[13] = hc_bytealign_be_S (w[10], w[11], offset); w[12] = hc_bytealign_be_S (w[ 9], w[10], offset); w[11] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[10] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[ 9] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[ 8] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[ 7] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 6] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 5] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 4] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 3] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 2] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_bytealign_be_S (w[59], w[60], offset); w[62] = hc_bytealign_be_S (w[58], w[59], offset); w[61] = hc_bytealign_be_S (w[57], w[58], offset); w[60] = hc_bytealign_be_S (w[56], w[57], offset); w[59] = hc_bytealign_be_S (w[55], w[56], offset); w[58] = hc_bytealign_be_S (w[54], w[55], offset); w[57] = hc_bytealign_be_S (w[53], w[54], offset); w[56] = hc_bytealign_be_S (w[52], w[53], offset); w[55] = hc_bytealign_be_S (w[51], w[52], offset); w[54] = hc_bytealign_be_S (w[50], w[51], offset); w[53] = hc_bytealign_be_S (w[49], w[50], offset); w[52] = hc_bytealign_be_S (w[48], w[49], offset); w[51] = hc_bytealign_be_S (w[47], w[48], offset); w[50] = hc_bytealign_be_S (w[46], w[47], offset); w[49] = hc_bytealign_be_S (w[45], w[46], offset); w[48] = hc_bytealign_be_S (w[44], w[45], offset); w[47] = hc_bytealign_be_S (w[43], w[44], offset); w[46] = hc_bytealign_be_S (w[42], w[43], offset); w[45] = hc_bytealign_be_S (w[41], w[42], offset); w[44] = hc_bytealign_be_S (w[40], w[41], offset); w[43] = hc_bytealign_be_S (w[39], w[40], offset); w[42] = hc_bytealign_be_S (w[38], w[39], offset); w[41] = hc_bytealign_be_S (w[37], w[38], offset); w[40] = hc_bytealign_be_S (w[36], w[37], offset); w[39] = hc_bytealign_be_S (w[35], w[36], offset); w[38] = hc_bytealign_be_S (w[34], w[35], offset); w[37] = hc_bytealign_be_S (w[33], w[34], offset); w[36] = hc_bytealign_be_S (w[32], w[33], offset); w[35] = hc_bytealign_be_S (w[31], w[32], offset); w[34] = hc_bytealign_be_S (w[30], w[31], offset); w[33] = hc_bytealign_be_S (w[29], w[30], offset); w[32] = hc_bytealign_be_S (w[28], w[29], offset); w[31] = hc_bytealign_be_S (w[27], w[28], offset); w[30] = hc_bytealign_be_S (w[26], w[27], offset); w[29] = hc_bytealign_be_S (w[25], w[26], offset); w[28] = hc_bytealign_be_S (w[24], w[25], offset); w[27] = hc_bytealign_be_S (w[23], w[24], offset); w[26] = hc_bytealign_be_S (w[22], w[23], offset); w[25] = hc_bytealign_be_S (w[21], w[22], offset); w[24] = hc_bytealign_be_S (w[20], w[21], offset); w[23] = hc_bytealign_be_S (w[19], w[20], offset); w[22] = hc_bytealign_be_S (w[18], w[19], offset); w[21] = hc_bytealign_be_S (w[17], w[18], offset); w[20] = hc_bytealign_be_S (w[16], w[17], offset); w[19] = hc_bytealign_be_S (w[15], w[16], offset); w[18] = hc_bytealign_be_S (w[14], w[15], offset); w[17] = hc_bytealign_be_S (w[13], w[14], offset); w[16] = hc_bytealign_be_S (w[12], w[13], offset); w[15] = hc_bytealign_be_S (w[11], w[12], offset); w[14] = hc_bytealign_be_S (w[10], w[11], offset); w[13] = hc_bytealign_be_S (w[ 9], w[10], offset); w[12] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[11] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[10] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[ 9] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[ 8] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 7] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 6] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 5] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 4] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 3] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_bytealign_be_S (w[58], w[59], offset); w[62] = hc_bytealign_be_S (w[57], w[58], offset); w[61] = hc_bytealign_be_S (w[56], w[57], offset); w[60] = hc_bytealign_be_S (w[55], w[56], offset); w[59] = hc_bytealign_be_S (w[54], w[55], offset); w[58] = hc_bytealign_be_S (w[53], w[54], offset); w[57] = hc_bytealign_be_S (w[52], w[53], offset); w[56] = hc_bytealign_be_S (w[51], w[52], offset); w[55] = hc_bytealign_be_S (w[50], w[51], offset); w[54] = hc_bytealign_be_S (w[49], w[50], offset); w[53] = hc_bytealign_be_S (w[48], w[49], offset); w[52] = hc_bytealign_be_S (w[47], w[48], offset); w[51] = hc_bytealign_be_S (w[46], w[47], offset); w[50] = hc_bytealign_be_S (w[45], w[46], offset); w[49] = hc_bytealign_be_S (w[44], w[45], offset); w[48] = hc_bytealign_be_S (w[43], w[44], offset); w[47] = hc_bytealign_be_S (w[42], w[43], offset); w[46] = hc_bytealign_be_S (w[41], w[42], offset); w[45] = hc_bytealign_be_S (w[40], w[41], offset); w[44] = hc_bytealign_be_S (w[39], w[40], offset); w[43] = hc_bytealign_be_S (w[38], w[39], offset); w[42] = hc_bytealign_be_S (w[37], w[38], offset); w[41] = hc_bytealign_be_S (w[36], w[37], offset); w[40] = hc_bytealign_be_S (w[35], w[36], offset); w[39] = hc_bytealign_be_S (w[34], w[35], offset); w[38] = hc_bytealign_be_S (w[33], w[34], offset); w[37] = hc_bytealign_be_S (w[32], w[33], offset); w[36] = hc_bytealign_be_S (w[31], w[32], offset); w[35] = hc_bytealign_be_S (w[30], w[31], offset); w[34] = hc_bytealign_be_S (w[29], w[30], offset); w[33] = hc_bytealign_be_S (w[28], w[29], offset); w[32] = hc_bytealign_be_S (w[27], w[28], offset); w[31] = hc_bytealign_be_S (w[26], w[27], offset); w[30] = hc_bytealign_be_S (w[25], w[26], offset); w[29] = hc_bytealign_be_S (w[24], w[25], offset); w[28] = hc_bytealign_be_S (w[23], w[24], offset); w[27] = hc_bytealign_be_S (w[22], w[23], offset); w[26] = hc_bytealign_be_S (w[21], w[22], offset); w[25] = hc_bytealign_be_S (w[20], w[21], offset); w[24] = hc_bytealign_be_S (w[19], w[20], offset); w[23] = hc_bytealign_be_S (w[18], w[19], offset); w[22] = hc_bytealign_be_S (w[17], w[18], offset); w[21] = hc_bytealign_be_S (w[16], w[17], offset); w[20] = hc_bytealign_be_S (w[15], w[16], offset); w[19] = hc_bytealign_be_S (w[14], w[15], offset); w[18] = hc_bytealign_be_S (w[13], w[14], offset); w[17] = hc_bytealign_be_S (w[12], w[13], offset); w[16] = hc_bytealign_be_S (w[11], w[12], offset); w[15] = hc_bytealign_be_S (w[10], w[11], offset); w[14] = hc_bytealign_be_S (w[ 9], w[10], offset); w[13] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[12] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[11] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[10] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[ 9] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 8] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 7] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 6] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 5] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 4] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_bytealign_be_S (w[57], w[58], offset); w[62] = hc_bytealign_be_S (w[56], w[57], offset); w[61] = hc_bytealign_be_S (w[55], w[56], offset); w[60] = hc_bytealign_be_S (w[54], w[55], offset); w[59] = hc_bytealign_be_S (w[53], w[54], offset); w[58] = hc_bytealign_be_S (w[52], w[53], offset); w[57] = hc_bytealign_be_S (w[51], w[52], offset); w[56] = hc_bytealign_be_S (w[50], w[51], offset); w[55] = hc_bytealign_be_S (w[49], w[50], offset); w[54] = hc_bytealign_be_S (w[48], w[49], offset); w[53] = hc_bytealign_be_S (w[47], w[48], offset); w[52] = hc_bytealign_be_S (w[46], w[47], offset); w[51] = hc_bytealign_be_S (w[45], w[46], offset); w[50] = hc_bytealign_be_S (w[44], w[45], offset); w[49] = hc_bytealign_be_S (w[43], w[44], offset); w[48] = hc_bytealign_be_S (w[42], w[43], offset); w[47] = hc_bytealign_be_S (w[41], w[42], offset); w[46] = hc_bytealign_be_S (w[40], w[41], offset); w[45] = hc_bytealign_be_S (w[39], w[40], offset); w[44] = hc_bytealign_be_S (w[38], w[39], offset); w[43] = hc_bytealign_be_S (w[37], w[38], offset); w[42] = hc_bytealign_be_S (w[36], w[37], offset); w[41] = hc_bytealign_be_S (w[35], w[36], offset); w[40] = hc_bytealign_be_S (w[34], w[35], offset); w[39] = hc_bytealign_be_S (w[33], w[34], offset); w[38] = hc_bytealign_be_S (w[32], w[33], offset); w[37] = hc_bytealign_be_S (w[31], w[32], offset); w[36] = hc_bytealign_be_S (w[30], w[31], offset); w[35] = hc_bytealign_be_S (w[29], w[30], offset); w[34] = hc_bytealign_be_S (w[28], w[29], offset); w[33] = hc_bytealign_be_S (w[27], w[28], offset); w[32] = hc_bytealign_be_S (w[26], w[27], offset); w[31] = hc_bytealign_be_S (w[25], w[26], offset); w[30] = hc_bytealign_be_S (w[24], w[25], offset); w[29] = hc_bytealign_be_S (w[23], w[24], offset); w[28] = hc_bytealign_be_S (w[22], w[23], offset); w[27] = hc_bytealign_be_S (w[21], w[22], offset); w[26] = hc_bytealign_be_S (w[20], w[21], offset); w[25] = hc_bytealign_be_S (w[19], w[20], offset); w[24] = hc_bytealign_be_S (w[18], w[19], offset); w[23] = hc_bytealign_be_S (w[17], w[18], offset); w[22] = hc_bytealign_be_S (w[16], w[17], offset); w[21] = hc_bytealign_be_S (w[15], w[16], offset); w[20] = hc_bytealign_be_S (w[14], w[15], offset); w[19] = hc_bytealign_be_S (w[13], w[14], offset); w[18] = hc_bytealign_be_S (w[12], w[13], offset); w[17] = hc_bytealign_be_S (w[11], w[12], offset); w[16] = hc_bytealign_be_S (w[10], w[11], offset); w[15] = hc_bytealign_be_S (w[ 9], w[10], offset); w[14] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[13] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[12] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[11] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[10] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[ 9] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 8] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 7] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 6] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 5] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_bytealign_be_S (w[56], w[57], offset); w[62] = hc_bytealign_be_S (w[55], w[56], offset); w[61] = hc_bytealign_be_S (w[54], w[55], offset); w[60] = hc_bytealign_be_S (w[53], w[54], offset); w[59] = hc_bytealign_be_S (w[52], w[53], offset); w[58] = hc_bytealign_be_S (w[51], w[52], offset); w[57] = hc_bytealign_be_S (w[50], w[51], offset); w[56] = hc_bytealign_be_S (w[49], w[50], offset); w[55] = hc_bytealign_be_S (w[48], w[49], offset); w[54] = hc_bytealign_be_S (w[47], w[48], offset); w[53] = hc_bytealign_be_S (w[46], w[47], offset); w[52] = hc_bytealign_be_S (w[45], w[46], offset); w[51] = hc_bytealign_be_S (w[44], w[45], offset); w[50] = hc_bytealign_be_S (w[43], w[44], offset); w[49] = hc_bytealign_be_S (w[42], w[43], offset); w[48] = hc_bytealign_be_S (w[41], w[42], offset); w[47] = hc_bytealign_be_S (w[40], w[41], offset); w[46] = hc_bytealign_be_S (w[39], w[40], offset); w[45] = hc_bytealign_be_S (w[38], w[39], offset); w[44] = hc_bytealign_be_S (w[37], w[38], offset); w[43] = hc_bytealign_be_S (w[36], w[37], offset); w[42] = hc_bytealign_be_S (w[35], w[36], offset); w[41] = hc_bytealign_be_S (w[34], w[35], offset); w[40] = hc_bytealign_be_S (w[33], w[34], offset); w[39] = hc_bytealign_be_S (w[32], w[33], offset); w[38] = hc_bytealign_be_S (w[31], w[32], offset); w[37] = hc_bytealign_be_S (w[30], w[31], offset); w[36] = hc_bytealign_be_S (w[29], w[30], offset); w[35] = hc_bytealign_be_S (w[28], w[29], offset); w[34] = hc_bytealign_be_S (w[27], w[28], offset); w[33] = hc_bytealign_be_S (w[26], w[27], offset); w[32] = hc_bytealign_be_S (w[25], w[26], offset); w[31] = hc_bytealign_be_S (w[24], w[25], offset); w[30] = hc_bytealign_be_S (w[23], w[24], offset); w[29] = hc_bytealign_be_S (w[22], w[23], offset); w[28] = hc_bytealign_be_S (w[21], w[22], offset); w[27] = hc_bytealign_be_S (w[20], w[21], offset); w[26] = hc_bytealign_be_S (w[19], w[20], offset); w[25] = hc_bytealign_be_S (w[18], w[19], offset); w[24] = hc_bytealign_be_S (w[17], w[18], offset); w[23] = hc_bytealign_be_S (w[16], w[17], offset); w[22] = hc_bytealign_be_S (w[15], w[16], offset); w[21] = hc_bytealign_be_S (w[14], w[15], offset); w[20] = hc_bytealign_be_S (w[13], w[14], offset); w[19] = hc_bytealign_be_S (w[12], w[13], offset); w[18] = hc_bytealign_be_S (w[11], w[12], offset); w[17] = hc_bytealign_be_S (w[10], w[11], offset); w[16] = hc_bytealign_be_S (w[ 9], w[10], offset); w[15] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[14] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[13] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[12] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[11] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[10] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[ 9] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 8] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 7] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 6] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_bytealign_be_S (w[55], w[56], offset); w[62] = hc_bytealign_be_S (w[54], w[55], offset); w[61] = hc_bytealign_be_S (w[53], w[54], offset); w[60] = hc_bytealign_be_S (w[52], w[53], offset); w[59] = hc_bytealign_be_S (w[51], w[52], offset); w[58] = hc_bytealign_be_S (w[50], w[51], offset); w[57] = hc_bytealign_be_S (w[49], w[50], offset); w[56] = hc_bytealign_be_S (w[48], w[49], offset); w[55] = hc_bytealign_be_S (w[47], w[48], offset); w[54] = hc_bytealign_be_S (w[46], w[47], offset); w[53] = hc_bytealign_be_S (w[45], w[46], offset); w[52] = hc_bytealign_be_S (w[44], w[45], offset); w[51] = hc_bytealign_be_S (w[43], w[44], offset); w[50] = hc_bytealign_be_S (w[42], w[43], offset); w[49] = hc_bytealign_be_S (w[41], w[42], offset); w[48] = hc_bytealign_be_S (w[40], w[41], offset); w[47] = hc_bytealign_be_S (w[39], w[40], offset); w[46] = hc_bytealign_be_S (w[38], w[39], offset); w[45] = hc_bytealign_be_S (w[37], w[38], offset); w[44] = hc_bytealign_be_S (w[36], w[37], offset); w[43] = hc_bytealign_be_S (w[35], w[36], offset); w[42] = hc_bytealign_be_S (w[34], w[35], offset); w[41] = hc_bytealign_be_S (w[33], w[34], offset); w[40] = hc_bytealign_be_S (w[32], w[33], offset); w[39] = hc_bytealign_be_S (w[31], w[32], offset); w[38] = hc_bytealign_be_S (w[30], w[31], offset); w[37] = hc_bytealign_be_S (w[29], w[30], offset); w[36] = hc_bytealign_be_S (w[28], w[29], offset); w[35] = hc_bytealign_be_S (w[27], w[28], offset); w[34] = hc_bytealign_be_S (w[26], w[27], offset); w[33] = hc_bytealign_be_S (w[25], w[26], offset); w[32] = hc_bytealign_be_S (w[24], w[25], offset); w[31] = hc_bytealign_be_S (w[23], w[24], offset); w[30] = hc_bytealign_be_S (w[22], w[23], offset); w[29] = hc_bytealign_be_S (w[21], w[22], offset); w[28] = hc_bytealign_be_S (w[20], w[21], offset); w[27] = hc_bytealign_be_S (w[19], w[20], offset); w[26] = hc_bytealign_be_S (w[18], w[19], offset); w[25] = hc_bytealign_be_S (w[17], w[18], offset); w[24] = hc_bytealign_be_S (w[16], w[17], offset); w[23] = hc_bytealign_be_S (w[15], w[16], offset); w[22] = hc_bytealign_be_S (w[14], w[15], offset); w[21] = hc_bytealign_be_S (w[13], w[14], offset); w[20] = hc_bytealign_be_S (w[12], w[13], offset); w[19] = hc_bytealign_be_S (w[11], w[12], offset); w[18] = hc_bytealign_be_S (w[10], w[11], offset); w[17] = hc_bytealign_be_S (w[ 9], w[10], offset); w[16] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[15] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[14] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[13] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[12] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[11] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[10] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[ 9] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 8] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 7] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_bytealign_be_S (w[54], w[55], offset); w[62] = hc_bytealign_be_S (w[53], w[54], offset); w[61] = hc_bytealign_be_S (w[52], w[53], offset); w[60] = hc_bytealign_be_S (w[51], w[52], offset); w[59] = hc_bytealign_be_S (w[50], w[51], offset); w[58] = hc_bytealign_be_S (w[49], w[50], offset); w[57] = hc_bytealign_be_S (w[48], w[49], offset); w[56] = hc_bytealign_be_S (w[47], w[48], offset); w[55] = hc_bytealign_be_S (w[46], w[47], offset); w[54] = hc_bytealign_be_S (w[45], w[46], offset); w[53] = hc_bytealign_be_S (w[44], w[45], offset); w[52] = hc_bytealign_be_S (w[43], w[44], offset); w[51] = hc_bytealign_be_S (w[42], w[43], offset); w[50] = hc_bytealign_be_S (w[41], w[42], offset); w[49] = hc_bytealign_be_S (w[40], w[41], offset); w[48] = hc_bytealign_be_S (w[39], w[40], offset); w[47] = hc_bytealign_be_S (w[38], w[39], offset); w[46] = hc_bytealign_be_S (w[37], w[38], offset); w[45] = hc_bytealign_be_S (w[36], w[37], offset); w[44] = hc_bytealign_be_S (w[35], w[36], offset); w[43] = hc_bytealign_be_S (w[34], w[35], offset); w[42] = hc_bytealign_be_S (w[33], w[34], offset); w[41] = hc_bytealign_be_S (w[32], w[33], offset); w[40] = hc_bytealign_be_S (w[31], w[32], offset); w[39] = hc_bytealign_be_S (w[30], w[31], offset); w[38] = hc_bytealign_be_S (w[29], w[30], offset); w[37] = hc_bytealign_be_S (w[28], w[29], offset); w[36] = hc_bytealign_be_S (w[27], w[28], offset); w[35] = hc_bytealign_be_S (w[26], w[27], offset); w[34] = hc_bytealign_be_S (w[25], w[26], offset); w[33] = hc_bytealign_be_S (w[24], w[25], offset); w[32] = hc_bytealign_be_S (w[23], w[24], offset); w[31] = hc_bytealign_be_S (w[22], w[23], offset); w[30] = hc_bytealign_be_S (w[21], w[22], offset); w[29] = hc_bytealign_be_S (w[20], w[21], offset); w[28] = hc_bytealign_be_S (w[19], w[20], offset); w[27] = hc_bytealign_be_S (w[18], w[19], offset); w[26] = hc_bytealign_be_S (w[17], w[18], offset); w[25] = hc_bytealign_be_S (w[16], w[17], offset); w[24] = hc_bytealign_be_S (w[15], w[16], offset); w[23] = hc_bytealign_be_S (w[14], w[15], offset); w[22] = hc_bytealign_be_S (w[13], w[14], offset); w[21] = hc_bytealign_be_S (w[12], w[13], offset); w[20] = hc_bytealign_be_S (w[11], w[12], offset); w[19] = hc_bytealign_be_S (w[10], w[11], offset); w[18] = hc_bytealign_be_S (w[ 9], w[10], offset); w[17] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[16] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[15] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[14] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[13] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[12] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[11] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[10] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[ 9] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 8] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_bytealign_be_S (w[53], w[54], offset); w[62] = hc_bytealign_be_S (w[52], w[53], offset); w[61] = hc_bytealign_be_S (w[51], w[52], offset); w[60] = hc_bytealign_be_S (w[50], w[51], offset); w[59] = hc_bytealign_be_S (w[49], w[50], offset); w[58] = hc_bytealign_be_S (w[48], w[49], offset); w[57] = hc_bytealign_be_S (w[47], w[48], offset); w[56] = hc_bytealign_be_S (w[46], w[47], offset); w[55] = hc_bytealign_be_S (w[45], w[46], offset); w[54] = hc_bytealign_be_S (w[44], w[45], offset); w[53] = hc_bytealign_be_S (w[43], w[44], offset); w[52] = hc_bytealign_be_S (w[42], w[43], offset); w[51] = hc_bytealign_be_S (w[41], w[42], offset); w[50] = hc_bytealign_be_S (w[40], w[41], offset); w[49] = hc_bytealign_be_S (w[39], w[40], offset); w[48] = hc_bytealign_be_S (w[38], w[39], offset); w[47] = hc_bytealign_be_S (w[37], w[38], offset); w[46] = hc_bytealign_be_S (w[36], w[37], offset); w[45] = hc_bytealign_be_S (w[35], w[36], offset); w[44] = hc_bytealign_be_S (w[34], w[35], offset); w[43] = hc_bytealign_be_S (w[33], w[34], offset); w[42] = hc_bytealign_be_S (w[32], w[33], offset); w[41] = hc_bytealign_be_S (w[31], w[32], offset); w[40] = hc_bytealign_be_S (w[30], w[31], offset); w[39] = hc_bytealign_be_S (w[29], w[30], offset); w[38] = hc_bytealign_be_S (w[28], w[29], offset); w[37] = hc_bytealign_be_S (w[27], w[28], offset); w[36] = hc_bytealign_be_S (w[26], w[27], offset); w[35] = hc_bytealign_be_S (w[25], w[26], offset); w[34] = hc_bytealign_be_S (w[24], w[25], offset); w[33] = hc_bytealign_be_S (w[23], w[24], offset); w[32] = hc_bytealign_be_S (w[22], w[23], offset); w[31] = hc_bytealign_be_S (w[21], w[22], offset); w[30] = hc_bytealign_be_S (w[20], w[21], offset); w[29] = hc_bytealign_be_S (w[19], w[20], offset); w[28] = hc_bytealign_be_S (w[18], w[19], offset); w[27] = hc_bytealign_be_S (w[17], w[18], offset); w[26] = hc_bytealign_be_S (w[16], w[17], offset); w[25] = hc_bytealign_be_S (w[15], w[16], offset); w[24] = hc_bytealign_be_S (w[14], w[15], offset); w[23] = hc_bytealign_be_S (w[13], w[14], offset); w[22] = hc_bytealign_be_S (w[12], w[13], offset); w[21] = hc_bytealign_be_S (w[11], w[12], offset); w[20] = hc_bytealign_be_S (w[10], w[11], offset); w[19] = hc_bytealign_be_S (w[ 9], w[10], offset); w[18] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[17] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[16] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[15] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[14] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[13] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[12] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[11] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[10] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[ 9] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_bytealign_be_S (w[52], w[53], offset); w[62] = hc_bytealign_be_S (w[51], w[52], offset); w[61] = hc_bytealign_be_S (w[50], w[51], offset); w[60] = hc_bytealign_be_S (w[49], w[50], offset); w[59] = hc_bytealign_be_S (w[48], w[49], offset); w[58] = hc_bytealign_be_S (w[47], w[48], offset); w[57] = hc_bytealign_be_S (w[46], w[47], offset); w[56] = hc_bytealign_be_S (w[45], w[46], offset); w[55] = hc_bytealign_be_S (w[44], w[45], offset); w[54] = hc_bytealign_be_S (w[43], w[44], offset); w[53] = hc_bytealign_be_S (w[42], w[43], offset); w[52] = hc_bytealign_be_S (w[41], w[42], offset); w[51] = hc_bytealign_be_S (w[40], w[41], offset); w[50] = hc_bytealign_be_S (w[39], w[40], offset); w[49] = hc_bytealign_be_S (w[38], w[39], offset); w[48] = hc_bytealign_be_S (w[37], w[38], offset); w[47] = hc_bytealign_be_S (w[36], w[37], offset); w[46] = hc_bytealign_be_S (w[35], w[36], offset); w[45] = hc_bytealign_be_S (w[34], w[35], offset); w[44] = hc_bytealign_be_S (w[33], w[34], offset); w[43] = hc_bytealign_be_S (w[32], w[33], offset); w[42] = hc_bytealign_be_S (w[31], w[32], offset); w[41] = hc_bytealign_be_S (w[30], w[31], offset); w[40] = hc_bytealign_be_S (w[29], w[30], offset); w[39] = hc_bytealign_be_S (w[28], w[29], offset); w[38] = hc_bytealign_be_S (w[27], w[28], offset); w[37] = hc_bytealign_be_S (w[26], w[27], offset); w[36] = hc_bytealign_be_S (w[25], w[26], offset); w[35] = hc_bytealign_be_S (w[24], w[25], offset); w[34] = hc_bytealign_be_S (w[23], w[24], offset); w[33] = hc_bytealign_be_S (w[22], w[23], offset); w[32] = hc_bytealign_be_S (w[21], w[22], offset); w[31] = hc_bytealign_be_S (w[20], w[21], offset); w[30] = hc_bytealign_be_S (w[19], w[20], offset); w[29] = hc_bytealign_be_S (w[18], w[19], offset); w[28] = hc_bytealign_be_S (w[17], w[18], offset); w[27] = hc_bytealign_be_S (w[16], w[17], offset); w[26] = hc_bytealign_be_S (w[15], w[16], offset); w[25] = hc_bytealign_be_S (w[14], w[15], offset); w[24] = hc_bytealign_be_S (w[13], w[14], offset); w[23] = hc_bytealign_be_S (w[12], w[13], offset); w[22] = hc_bytealign_be_S (w[11], w[12], offset); w[21] = hc_bytealign_be_S (w[10], w[11], offset); w[20] = hc_bytealign_be_S (w[ 9], w[10], offset); w[19] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[18] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[17] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[16] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[15] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[14] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[13] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[12] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[11] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[10] = hc_bytealign_be_S ( 0, w[ 0], offset); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_bytealign_be_S (w[51], w[52], offset); w[62] = hc_bytealign_be_S (w[50], w[51], offset); w[61] = hc_bytealign_be_S (w[49], w[50], offset); w[60] = hc_bytealign_be_S (w[48], w[49], offset); w[59] = hc_bytealign_be_S (w[47], w[48], offset); w[58] = hc_bytealign_be_S (w[46], w[47], offset); w[57] = hc_bytealign_be_S (w[45], w[46], offset); w[56] = hc_bytealign_be_S (w[44], w[45], offset); w[55] = hc_bytealign_be_S (w[43], w[44], offset); w[54] = hc_bytealign_be_S (w[42], w[43], offset); w[53] = hc_bytealign_be_S (w[41], w[42], offset); w[52] = hc_bytealign_be_S (w[40], w[41], offset); w[51] = hc_bytealign_be_S (w[39], w[40], offset); w[50] = hc_bytealign_be_S (w[38], w[39], offset); w[49] = hc_bytealign_be_S (w[37], w[38], offset); w[48] = hc_bytealign_be_S (w[36], w[37], offset); w[47] = hc_bytealign_be_S (w[35], w[36], offset); w[46] = hc_bytealign_be_S (w[34], w[35], offset); w[45] = hc_bytealign_be_S (w[33], w[34], offset); w[44] = hc_bytealign_be_S (w[32], w[33], offset); w[43] = hc_bytealign_be_S (w[31], w[32], offset); w[42] = hc_bytealign_be_S (w[30], w[31], offset); w[41] = hc_bytealign_be_S (w[29], w[30], offset); w[40] = hc_bytealign_be_S (w[28], w[29], offset); w[39] = hc_bytealign_be_S (w[27], w[28], offset); w[38] = hc_bytealign_be_S (w[26], w[27], offset); w[37] = hc_bytealign_be_S (w[25], w[26], offset); w[36] = hc_bytealign_be_S (w[24], w[25], offset); w[35] = hc_bytealign_be_S (w[23], w[24], offset); w[34] = hc_bytealign_be_S (w[22], w[23], offset); w[33] = hc_bytealign_be_S (w[21], w[22], offset); w[32] = hc_bytealign_be_S (w[20], w[21], offset); w[31] = hc_bytealign_be_S (w[19], w[20], offset); w[30] = hc_bytealign_be_S (w[18], w[19], offset); w[29] = hc_bytealign_be_S (w[17], w[18], offset); w[28] = hc_bytealign_be_S (w[16], w[17], offset); w[27] = hc_bytealign_be_S (w[15], w[16], offset); w[26] = hc_bytealign_be_S (w[14], w[15], offset); w[25] = hc_bytealign_be_S (w[13], w[14], offset); w[24] = hc_bytealign_be_S (w[12], w[13], offset); w[23] = hc_bytealign_be_S (w[11], w[12], offset); w[22] = hc_bytealign_be_S (w[10], w[11], offset); w[21] = hc_bytealign_be_S (w[ 9], w[10], offset); w[20] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[19] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[18] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[17] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[16] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[15] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[14] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[13] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[12] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[11] = hc_bytealign_be_S ( 0, w[ 0], offset); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_bytealign_be_S (w[50], w[51], offset); w[62] = hc_bytealign_be_S (w[49], w[50], offset); w[61] = hc_bytealign_be_S (w[48], w[49], offset); w[60] = hc_bytealign_be_S (w[47], w[48], offset); w[59] = hc_bytealign_be_S (w[46], w[47], offset); w[58] = hc_bytealign_be_S (w[45], w[46], offset); w[57] = hc_bytealign_be_S (w[44], w[45], offset); w[56] = hc_bytealign_be_S (w[43], w[44], offset); w[55] = hc_bytealign_be_S (w[42], w[43], offset); w[54] = hc_bytealign_be_S (w[41], w[42], offset); w[53] = hc_bytealign_be_S (w[40], w[41], offset); w[52] = hc_bytealign_be_S (w[39], w[40], offset); w[51] = hc_bytealign_be_S (w[38], w[39], offset); w[50] = hc_bytealign_be_S (w[37], w[38], offset); w[49] = hc_bytealign_be_S (w[36], w[37], offset); w[48] = hc_bytealign_be_S (w[35], w[36], offset); w[47] = hc_bytealign_be_S (w[34], w[35], offset); w[46] = hc_bytealign_be_S (w[33], w[34], offset); w[45] = hc_bytealign_be_S (w[32], w[33], offset); w[44] = hc_bytealign_be_S (w[31], w[32], offset); w[43] = hc_bytealign_be_S (w[30], w[31], offset); w[42] = hc_bytealign_be_S (w[29], w[30], offset); w[41] = hc_bytealign_be_S (w[28], w[29], offset); w[40] = hc_bytealign_be_S (w[27], w[28], offset); w[39] = hc_bytealign_be_S (w[26], w[27], offset); w[38] = hc_bytealign_be_S (w[25], w[26], offset); w[37] = hc_bytealign_be_S (w[24], w[25], offset); w[36] = hc_bytealign_be_S (w[23], w[24], offset); w[35] = hc_bytealign_be_S (w[22], w[23], offset); w[34] = hc_bytealign_be_S (w[21], w[22], offset); w[33] = hc_bytealign_be_S (w[20], w[21], offset); w[32] = hc_bytealign_be_S (w[19], w[20], offset); w[31] = hc_bytealign_be_S (w[18], w[19], offset); w[30] = hc_bytealign_be_S (w[17], w[18], offset); w[29] = hc_bytealign_be_S (w[16], w[17], offset); w[28] = hc_bytealign_be_S (w[15], w[16], offset); w[27] = hc_bytealign_be_S (w[14], w[15], offset); w[26] = hc_bytealign_be_S (w[13], w[14], offset); w[25] = hc_bytealign_be_S (w[12], w[13], offset); w[24] = hc_bytealign_be_S (w[11], w[12], offset); w[23] = hc_bytealign_be_S (w[10], w[11], offset); w[22] = hc_bytealign_be_S (w[ 9], w[10], offset); w[21] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[20] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[19] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[18] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[17] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[16] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[15] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[14] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[13] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[12] = hc_bytealign_be_S ( 0, w[ 0], offset); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_bytealign_be_S (w[49], w[50], offset); w[62] = hc_bytealign_be_S (w[48], w[49], offset); w[61] = hc_bytealign_be_S (w[47], w[48], offset); w[60] = hc_bytealign_be_S (w[46], w[47], offset); w[59] = hc_bytealign_be_S (w[45], w[46], offset); w[58] = hc_bytealign_be_S (w[44], w[45], offset); w[57] = hc_bytealign_be_S (w[43], w[44], offset); w[56] = hc_bytealign_be_S (w[42], w[43], offset); w[55] = hc_bytealign_be_S (w[41], w[42], offset); w[54] = hc_bytealign_be_S (w[40], w[41], offset); w[53] = hc_bytealign_be_S (w[39], w[40], offset); w[52] = hc_bytealign_be_S (w[38], w[39], offset); w[51] = hc_bytealign_be_S (w[37], w[38], offset); w[50] = hc_bytealign_be_S (w[36], w[37], offset); w[49] = hc_bytealign_be_S (w[35], w[36], offset); w[48] = hc_bytealign_be_S (w[34], w[35], offset); w[47] = hc_bytealign_be_S (w[33], w[34], offset); w[46] = hc_bytealign_be_S (w[32], w[33], offset); w[45] = hc_bytealign_be_S (w[31], w[32], offset); w[44] = hc_bytealign_be_S (w[30], w[31], offset); w[43] = hc_bytealign_be_S (w[29], w[30], offset); w[42] = hc_bytealign_be_S (w[28], w[29], offset); w[41] = hc_bytealign_be_S (w[27], w[28], offset); w[40] = hc_bytealign_be_S (w[26], w[27], offset); w[39] = hc_bytealign_be_S (w[25], w[26], offset); w[38] = hc_bytealign_be_S (w[24], w[25], offset); w[37] = hc_bytealign_be_S (w[23], w[24], offset); w[36] = hc_bytealign_be_S (w[22], w[23], offset); w[35] = hc_bytealign_be_S (w[21], w[22], offset); w[34] = hc_bytealign_be_S (w[20], w[21], offset); w[33] = hc_bytealign_be_S (w[19], w[20], offset); w[32] = hc_bytealign_be_S (w[18], w[19], offset); w[31] = hc_bytealign_be_S (w[17], w[18], offset); w[30] = hc_bytealign_be_S (w[16], w[17], offset); w[29] = hc_bytealign_be_S (w[15], w[16], offset); w[28] = hc_bytealign_be_S (w[14], w[15], offset); w[27] = hc_bytealign_be_S (w[13], w[14], offset); w[26] = hc_bytealign_be_S (w[12], w[13], offset); w[25] = hc_bytealign_be_S (w[11], w[12], offset); w[24] = hc_bytealign_be_S (w[10], w[11], offset); w[23] = hc_bytealign_be_S (w[ 9], w[10], offset); w[22] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[21] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[20] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[19] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[18] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[17] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[16] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[15] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[14] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[13] = hc_bytealign_be_S ( 0, w[ 0], offset); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_bytealign_be_S (w[48], w[49], offset); w[62] = hc_bytealign_be_S (w[47], w[48], offset); w[61] = hc_bytealign_be_S (w[46], w[47], offset); w[60] = hc_bytealign_be_S (w[45], w[46], offset); w[59] = hc_bytealign_be_S (w[44], w[45], offset); w[58] = hc_bytealign_be_S (w[43], w[44], offset); w[57] = hc_bytealign_be_S (w[42], w[43], offset); w[56] = hc_bytealign_be_S (w[41], w[42], offset); w[55] = hc_bytealign_be_S (w[40], w[41], offset); w[54] = hc_bytealign_be_S (w[39], w[40], offset); w[53] = hc_bytealign_be_S (w[38], w[39], offset); w[52] = hc_bytealign_be_S (w[37], w[38], offset); w[51] = hc_bytealign_be_S (w[36], w[37], offset); w[50] = hc_bytealign_be_S (w[35], w[36], offset); w[49] = hc_bytealign_be_S (w[34], w[35], offset); w[48] = hc_bytealign_be_S (w[33], w[34], offset); w[47] = hc_bytealign_be_S (w[32], w[33], offset); w[46] = hc_bytealign_be_S (w[31], w[32], offset); w[45] = hc_bytealign_be_S (w[30], w[31], offset); w[44] = hc_bytealign_be_S (w[29], w[30], offset); w[43] = hc_bytealign_be_S (w[28], w[29], offset); w[42] = hc_bytealign_be_S (w[27], w[28], offset); w[41] = hc_bytealign_be_S (w[26], w[27], offset); w[40] = hc_bytealign_be_S (w[25], w[26], offset); w[39] = hc_bytealign_be_S (w[24], w[25], offset); w[38] = hc_bytealign_be_S (w[23], w[24], offset); w[37] = hc_bytealign_be_S (w[22], w[23], offset); w[36] = hc_bytealign_be_S (w[21], w[22], offset); w[35] = hc_bytealign_be_S (w[20], w[21], offset); w[34] = hc_bytealign_be_S (w[19], w[20], offset); w[33] = hc_bytealign_be_S (w[18], w[19], offset); w[32] = hc_bytealign_be_S (w[17], w[18], offset); w[31] = hc_bytealign_be_S (w[16], w[17], offset); w[30] = hc_bytealign_be_S (w[15], w[16], offset); w[29] = hc_bytealign_be_S (w[14], w[15], offset); w[28] = hc_bytealign_be_S (w[13], w[14], offset); w[27] = hc_bytealign_be_S (w[12], w[13], offset); w[26] = hc_bytealign_be_S (w[11], w[12], offset); w[25] = hc_bytealign_be_S (w[10], w[11], offset); w[24] = hc_bytealign_be_S (w[ 9], w[10], offset); w[23] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[22] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[21] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[20] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[19] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[18] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[17] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[16] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[15] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[14] = hc_bytealign_be_S ( 0, w[ 0], offset); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_bytealign_be_S (w[47], w[48], offset); w[62] = hc_bytealign_be_S (w[46], w[47], offset); w[61] = hc_bytealign_be_S (w[45], w[46], offset); w[60] = hc_bytealign_be_S (w[44], w[45], offset); w[59] = hc_bytealign_be_S (w[43], w[44], offset); w[58] = hc_bytealign_be_S (w[42], w[43], offset); w[57] = hc_bytealign_be_S (w[41], w[42], offset); w[56] = hc_bytealign_be_S (w[40], w[41], offset); w[55] = hc_bytealign_be_S (w[39], w[40], offset); w[54] = hc_bytealign_be_S (w[38], w[39], offset); w[53] = hc_bytealign_be_S (w[37], w[38], offset); w[52] = hc_bytealign_be_S (w[36], w[37], offset); w[51] = hc_bytealign_be_S (w[35], w[36], offset); w[50] = hc_bytealign_be_S (w[34], w[35], offset); w[49] = hc_bytealign_be_S (w[33], w[34], offset); w[48] = hc_bytealign_be_S (w[32], w[33], offset); w[47] = hc_bytealign_be_S (w[31], w[32], offset); w[46] = hc_bytealign_be_S (w[30], w[31], offset); w[45] = hc_bytealign_be_S (w[29], w[30], offset); w[44] = hc_bytealign_be_S (w[28], w[29], offset); w[43] = hc_bytealign_be_S (w[27], w[28], offset); w[42] = hc_bytealign_be_S (w[26], w[27], offset); w[41] = hc_bytealign_be_S (w[25], w[26], offset); w[40] = hc_bytealign_be_S (w[24], w[25], offset); w[39] = hc_bytealign_be_S (w[23], w[24], offset); w[38] = hc_bytealign_be_S (w[22], w[23], offset); w[37] = hc_bytealign_be_S (w[21], w[22], offset); w[36] = hc_bytealign_be_S (w[20], w[21], offset); w[35] = hc_bytealign_be_S (w[19], w[20], offset); w[34] = hc_bytealign_be_S (w[18], w[19], offset); w[33] = hc_bytealign_be_S (w[17], w[18], offset); w[32] = hc_bytealign_be_S (w[16], w[17], offset); w[31] = hc_bytealign_be_S (w[15], w[16], offset); w[30] = hc_bytealign_be_S (w[14], w[15], offset); w[29] = hc_bytealign_be_S (w[13], w[14], offset); w[28] = hc_bytealign_be_S (w[12], w[13], offset); w[27] = hc_bytealign_be_S (w[11], w[12], offset); w[26] = hc_bytealign_be_S (w[10], w[11], offset); w[25] = hc_bytealign_be_S (w[ 9], w[10], offset); w[24] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[23] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[22] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[21] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[20] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[19] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[18] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[17] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[16] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[15] = hc_bytealign_be_S ( 0, w[ 0], offset); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_bytealign_be_S (w[46], w[47], offset); w[62] = hc_bytealign_be_S (w[45], w[46], offset); w[61] = hc_bytealign_be_S (w[44], w[45], offset); w[60] = hc_bytealign_be_S (w[43], w[44], offset); w[59] = hc_bytealign_be_S (w[42], w[43], offset); w[58] = hc_bytealign_be_S (w[41], w[42], offset); w[57] = hc_bytealign_be_S (w[40], w[41], offset); w[56] = hc_bytealign_be_S (w[39], w[40], offset); w[55] = hc_bytealign_be_S (w[38], w[39], offset); w[54] = hc_bytealign_be_S (w[37], w[38], offset); w[53] = hc_bytealign_be_S (w[36], w[37], offset); w[52] = hc_bytealign_be_S (w[35], w[36], offset); w[51] = hc_bytealign_be_S (w[34], w[35], offset); w[50] = hc_bytealign_be_S (w[33], w[34], offset); w[49] = hc_bytealign_be_S (w[32], w[33], offset); w[48] = hc_bytealign_be_S (w[31], w[32], offset); w[47] = hc_bytealign_be_S (w[30], w[31], offset); w[46] = hc_bytealign_be_S (w[29], w[30], offset); w[45] = hc_bytealign_be_S (w[28], w[29], offset); w[44] = hc_bytealign_be_S (w[27], w[28], offset); w[43] = hc_bytealign_be_S (w[26], w[27], offset); w[42] = hc_bytealign_be_S (w[25], w[26], offset); w[41] = hc_bytealign_be_S (w[24], w[25], offset); w[40] = hc_bytealign_be_S (w[23], w[24], offset); w[39] = hc_bytealign_be_S (w[22], w[23], offset); w[38] = hc_bytealign_be_S (w[21], w[22], offset); w[37] = hc_bytealign_be_S (w[20], w[21], offset); w[36] = hc_bytealign_be_S (w[19], w[20], offset); w[35] = hc_bytealign_be_S (w[18], w[19], offset); w[34] = hc_bytealign_be_S (w[17], w[18], offset); w[33] = hc_bytealign_be_S (w[16], w[17], offset); w[32] = hc_bytealign_be_S (w[15], w[16], offset); w[31] = hc_bytealign_be_S (w[14], w[15], offset); w[30] = hc_bytealign_be_S (w[13], w[14], offset); w[29] = hc_bytealign_be_S (w[12], w[13], offset); w[28] = hc_bytealign_be_S (w[11], w[12], offset); w[27] = hc_bytealign_be_S (w[10], w[11], offset); w[26] = hc_bytealign_be_S (w[ 9], w[10], offset); w[25] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[24] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[23] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[22] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[21] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[20] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[19] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[18] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[17] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[16] = hc_bytealign_be_S ( 0, w[ 0], offset); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_bytealign_be_S (w[45], w[46], offset); w[62] = hc_bytealign_be_S (w[44], w[45], offset); w[61] = hc_bytealign_be_S (w[43], w[44], offset); w[60] = hc_bytealign_be_S (w[42], w[43], offset); w[59] = hc_bytealign_be_S (w[41], w[42], offset); w[58] = hc_bytealign_be_S (w[40], w[41], offset); w[57] = hc_bytealign_be_S (w[39], w[40], offset); w[56] = hc_bytealign_be_S (w[38], w[39], offset); w[55] = hc_bytealign_be_S (w[37], w[38], offset); w[54] = hc_bytealign_be_S (w[36], w[37], offset); w[53] = hc_bytealign_be_S (w[35], w[36], offset); w[52] = hc_bytealign_be_S (w[34], w[35], offset); w[51] = hc_bytealign_be_S (w[33], w[34], offset); w[50] = hc_bytealign_be_S (w[32], w[33], offset); w[49] = hc_bytealign_be_S (w[31], w[32], offset); w[48] = hc_bytealign_be_S (w[30], w[31], offset); w[47] = hc_bytealign_be_S (w[29], w[30], offset); w[46] = hc_bytealign_be_S (w[28], w[29], offset); w[45] = hc_bytealign_be_S (w[27], w[28], offset); w[44] = hc_bytealign_be_S (w[26], w[27], offset); w[43] = hc_bytealign_be_S (w[25], w[26], offset); w[42] = hc_bytealign_be_S (w[24], w[25], offset); w[41] = hc_bytealign_be_S (w[23], w[24], offset); w[40] = hc_bytealign_be_S (w[22], w[23], offset); w[39] = hc_bytealign_be_S (w[21], w[22], offset); w[38] = hc_bytealign_be_S (w[20], w[21], offset); w[37] = hc_bytealign_be_S (w[19], w[20], offset); w[36] = hc_bytealign_be_S (w[18], w[19], offset); w[35] = hc_bytealign_be_S (w[17], w[18], offset); w[34] = hc_bytealign_be_S (w[16], w[17], offset); w[33] = hc_bytealign_be_S (w[15], w[16], offset); w[32] = hc_bytealign_be_S (w[14], w[15], offset); w[31] = hc_bytealign_be_S (w[13], w[14], offset); w[30] = hc_bytealign_be_S (w[12], w[13], offset); w[29] = hc_bytealign_be_S (w[11], w[12], offset); w[28] = hc_bytealign_be_S (w[10], w[11], offset); w[27] = hc_bytealign_be_S (w[ 9], w[10], offset); w[26] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[25] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[24] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[23] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[22] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[21] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[20] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[19] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[18] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[17] = hc_bytealign_be_S ( 0, w[ 0], offset); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_bytealign_be_S (w[44], w[45], offset); w[62] = hc_bytealign_be_S (w[43], w[44], offset); w[61] = hc_bytealign_be_S (w[42], w[43], offset); w[60] = hc_bytealign_be_S (w[41], w[42], offset); w[59] = hc_bytealign_be_S (w[40], w[41], offset); w[58] = hc_bytealign_be_S (w[39], w[40], offset); w[57] = hc_bytealign_be_S (w[38], w[39], offset); w[56] = hc_bytealign_be_S (w[37], w[38], offset); w[55] = hc_bytealign_be_S (w[36], w[37], offset); w[54] = hc_bytealign_be_S (w[35], w[36], offset); w[53] = hc_bytealign_be_S (w[34], w[35], offset); w[52] = hc_bytealign_be_S (w[33], w[34], offset); w[51] = hc_bytealign_be_S (w[32], w[33], offset); w[50] = hc_bytealign_be_S (w[31], w[32], offset); w[49] = hc_bytealign_be_S (w[30], w[31], offset); w[48] = hc_bytealign_be_S (w[29], w[30], offset); w[47] = hc_bytealign_be_S (w[28], w[29], offset); w[46] = hc_bytealign_be_S (w[27], w[28], offset); w[45] = hc_bytealign_be_S (w[26], w[27], offset); w[44] = hc_bytealign_be_S (w[25], w[26], offset); w[43] = hc_bytealign_be_S (w[24], w[25], offset); w[42] = hc_bytealign_be_S (w[23], w[24], offset); w[41] = hc_bytealign_be_S (w[22], w[23], offset); w[40] = hc_bytealign_be_S (w[21], w[22], offset); w[39] = hc_bytealign_be_S (w[20], w[21], offset); w[38] = hc_bytealign_be_S (w[19], w[20], offset); w[37] = hc_bytealign_be_S (w[18], w[19], offset); w[36] = hc_bytealign_be_S (w[17], w[18], offset); w[35] = hc_bytealign_be_S (w[16], w[17], offset); w[34] = hc_bytealign_be_S (w[15], w[16], offset); w[33] = hc_bytealign_be_S (w[14], w[15], offset); w[32] = hc_bytealign_be_S (w[13], w[14], offset); w[31] = hc_bytealign_be_S (w[12], w[13], offset); w[30] = hc_bytealign_be_S (w[11], w[12], offset); w[29] = hc_bytealign_be_S (w[10], w[11], offset); w[28] = hc_bytealign_be_S (w[ 9], w[10], offset); w[27] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[26] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[25] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[24] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[23] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[22] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[21] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[20] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[19] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[18] = hc_bytealign_be_S ( 0, w[ 0], offset); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_bytealign_be_S (w[43], w[44], offset); w[62] = hc_bytealign_be_S (w[42], w[43], offset); w[61] = hc_bytealign_be_S (w[41], w[42], offset); w[60] = hc_bytealign_be_S (w[40], w[41], offset); w[59] = hc_bytealign_be_S (w[39], w[40], offset); w[58] = hc_bytealign_be_S (w[38], w[39], offset); w[57] = hc_bytealign_be_S (w[37], w[38], offset); w[56] = hc_bytealign_be_S (w[36], w[37], offset); w[55] = hc_bytealign_be_S (w[35], w[36], offset); w[54] = hc_bytealign_be_S (w[34], w[35], offset); w[53] = hc_bytealign_be_S (w[33], w[34], offset); w[52] = hc_bytealign_be_S (w[32], w[33], offset); w[51] = hc_bytealign_be_S (w[31], w[32], offset); w[50] = hc_bytealign_be_S (w[30], w[31], offset); w[49] = hc_bytealign_be_S (w[29], w[30], offset); w[48] = hc_bytealign_be_S (w[28], w[29], offset); w[47] = hc_bytealign_be_S (w[27], w[28], offset); w[46] = hc_bytealign_be_S (w[26], w[27], offset); w[45] = hc_bytealign_be_S (w[25], w[26], offset); w[44] = hc_bytealign_be_S (w[24], w[25], offset); w[43] = hc_bytealign_be_S (w[23], w[24], offset); w[42] = hc_bytealign_be_S (w[22], w[23], offset); w[41] = hc_bytealign_be_S (w[21], w[22], offset); w[40] = hc_bytealign_be_S (w[20], w[21], offset); w[39] = hc_bytealign_be_S (w[19], w[20], offset); w[38] = hc_bytealign_be_S (w[18], w[19], offset); w[37] = hc_bytealign_be_S (w[17], w[18], offset); w[36] = hc_bytealign_be_S (w[16], w[17], offset); w[35] = hc_bytealign_be_S (w[15], w[16], offset); w[34] = hc_bytealign_be_S (w[14], w[15], offset); w[33] = hc_bytealign_be_S (w[13], w[14], offset); w[32] = hc_bytealign_be_S (w[12], w[13], offset); w[31] = hc_bytealign_be_S (w[11], w[12], offset); w[30] = hc_bytealign_be_S (w[10], w[11], offset); w[29] = hc_bytealign_be_S (w[ 9], w[10], offset); w[28] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[27] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[26] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[25] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[24] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[23] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[22] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[21] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[20] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[19] = hc_bytealign_be_S ( 0, w[ 0], offset); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_bytealign_be_S (w[42], w[43], offset); w[62] = hc_bytealign_be_S (w[41], w[42], offset); w[61] = hc_bytealign_be_S (w[40], w[41], offset); w[60] = hc_bytealign_be_S (w[39], w[40], offset); w[59] = hc_bytealign_be_S (w[38], w[39], offset); w[58] = hc_bytealign_be_S (w[37], w[38], offset); w[57] = hc_bytealign_be_S (w[36], w[37], offset); w[56] = hc_bytealign_be_S (w[35], w[36], offset); w[55] = hc_bytealign_be_S (w[34], w[35], offset); w[54] = hc_bytealign_be_S (w[33], w[34], offset); w[53] = hc_bytealign_be_S (w[32], w[33], offset); w[52] = hc_bytealign_be_S (w[31], w[32], offset); w[51] = hc_bytealign_be_S (w[30], w[31], offset); w[50] = hc_bytealign_be_S (w[29], w[30], offset); w[49] = hc_bytealign_be_S (w[28], w[29], offset); w[48] = hc_bytealign_be_S (w[27], w[28], offset); w[47] = hc_bytealign_be_S (w[26], w[27], offset); w[46] = hc_bytealign_be_S (w[25], w[26], offset); w[45] = hc_bytealign_be_S (w[24], w[25], offset); w[44] = hc_bytealign_be_S (w[23], w[24], offset); w[43] = hc_bytealign_be_S (w[22], w[23], offset); w[42] = hc_bytealign_be_S (w[21], w[22], offset); w[41] = hc_bytealign_be_S (w[20], w[21], offset); w[40] = hc_bytealign_be_S (w[19], w[20], offset); w[39] = hc_bytealign_be_S (w[18], w[19], offset); w[38] = hc_bytealign_be_S (w[17], w[18], offset); w[37] = hc_bytealign_be_S (w[16], w[17], offset); w[36] = hc_bytealign_be_S (w[15], w[16], offset); w[35] = hc_bytealign_be_S (w[14], w[15], offset); w[34] = hc_bytealign_be_S (w[13], w[14], offset); w[33] = hc_bytealign_be_S (w[12], w[13], offset); w[32] = hc_bytealign_be_S (w[11], w[12], offset); w[31] = hc_bytealign_be_S (w[10], w[11], offset); w[30] = hc_bytealign_be_S (w[ 9], w[10], offset); w[29] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[28] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[27] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[26] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[25] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[24] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[23] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[22] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[21] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[20] = hc_bytealign_be_S ( 0, w[ 0], offset); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_bytealign_be_S (w[41], w[42], offset); w[62] = hc_bytealign_be_S (w[40], w[41], offset); w[61] = hc_bytealign_be_S (w[39], w[40], offset); w[60] = hc_bytealign_be_S (w[38], w[39], offset); w[59] = hc_bytealign_be_S (w[37], w[38], offset); w[58] = hc_bytealign_be_S (w[36], w[37], offset); w[57] = hc_bytealign_be_S (w[35], w[36], offset); w[56] = hc_bytealign_be_S (w[34], w[35], offset); w[55] = hc_bytealign_be_S (w[33], w[34], offset); w[54] = hc_bytealign_be_S (w[32], w[33], offset); w[53] = hc_bytealign_be_S (w[31], w[32], offset); w[52] = hc_bytealign_be_S (w[30], w[31], offset); w[51] = hc_bytealign_be_S (w[29], w[30], offset); w[50] = hc_bytealign_be_S (w[28], w[29], offset); w[49] = hc_bytealign_be_S (w[27], w[28], offset); w[48] = hc_bytealign_be_S (w[26], w[27], offset); w[47] = hc_bytealign_be_S (w[25], w[26], offset); w[46] = hc_bytealign_be_S (w[24], w[25], offset); w[45] = hc_bytealign_be_S (w[23], w[24], offset); w[44] = hc_bytealign_be_S (w[22], w[23], offset); w[43] = hc_bytealign_be_S (w[21], w[22], offset); w[42] = hc_bytealign_be_S (w[20], w[21], offset); w[41] = hc_bytealign_be_S (w[19], w[20], offset); w[40] = hc_bytealign_be_S (w[18], w[19], offset); w[39] = hc_bytealign_be_S (w[17], w[18], offset); w[38] = hc_bytealign_be_S (w[16], w[17], offset); w[37] = hc_bytealign_be_S (w[15], w[16], offset); w[36] = hc_bytealign_be_S (w[14], w[15], offset); w[35] = hc_bytealign_be_S (w[13], w[14], offset); w[34] = hc_bytealign_be_S (w[12], w[13], offset); w[33] = hc_bytealign_be_S (w[11], w[12], offset); w[32] = hc_bytealign_be_S (w[10], w[11], offset); w[31] = hc_bytealign_be_S (w[ 9], w[10], offset); w[30] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[29] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[28] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[27] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[26] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[25] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[24] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[23] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[22] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[21] = hc_bytealign_be_S ( 0, w[ 0], offset); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_bytealign_be_S (w[40], w[41], offset); w[62] = hc_bytealign_be_S (w[39], w[40], offset); w[61] = hc_bytealign_be_S (w[38], w[39], offset); w[60] = hc_bytealign_be_S (w[37], w[38], offset); w[59] = hc_bytealign_be_S (w[36], w[37], offset); w[58] = hc_bytealign_be_S (w[35], w[36], offset); w[57] = hc_bytealign_be_S (w[34], w[35], offset); w[56] = hc_bytealign_be_S (w[33], w[34], offset); w[55] = hc_bytealign_be_S (w[32], w[33], offset); w[54] = hc_bytealign_be_S (w[31], w[32], offset); w[53] = hc_bytealign_be_S (w[30], w[31], offset); w[52] = hc_bytealign_be_S (w[29], w[30], offset); w[51] = hc_bytealign_be_S (w[28], w[29], offset); w[50] = hc_bytealign_be_S (w[27], w[28], offset); w[49] = hc_bytealign_be_S (w[26], w[27], offset); w[48] = hc_bytealign_be_S (w[25], w[26], offset); w[47] = hc_bytealign_be_S (w[24], w[25], offset); w[46] = hc_bytealign_be_S (w[23], w[24], offset); w[45] = hc_bytealign_be_S (w[22], w[23], offset); w[44] = hc_bytealign_be_S (w[21], w[22], offset); w[43] = hc_bytealign_be_S (w[20], w[21], offset); w[42] = hc_bytealign_be_S (w[19], w[20], offset); w[41] = hc_bytealign_be_S (w[18], w[19], offset); w[40] = hc_bytealign_be_S (w[17], w[18], offset); w[39] = hc_bytealign_be_S (w[16], w[17], offset); w[38] = hc_bytealign_be_S (w[15], w[16], offset); w[37] = hc_bytealign_be_S (w[14], w[15], offset); w[36] = hc_bytealign_be_S (w[13], w[14], offset); w[35] = hc_bytealign_be_S (w[12], w[13], offset); w[34] = hc_bytealign_be_S (w[11], w[12], offset); w[33] = hc_bytealign_be_S (w[10], w[11], offset); w[32] = hc_bytealign_be_S (w[ 9], w[10], offset); w[31] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[30] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[29] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[28] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[27] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[26] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[25] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[24] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[23] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[22] = hc_bytealign_be_S ( 0, w[ 0], offset); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_bytealign_be_S (w[39], w[40], offset); w[62] = hc_bytealign_be_S (w[38], w[39], offset); w[61] = hc_bytealign_be_S (w[37], w[38], offset); w[60] = hc_bytealign_be_S (w[36], w[37], offset); w[59] = hc_bytealign_be_S (w[35], w[36], offset); w[58] = hc_bytealign_be_S (w[34], w[35], offset); w[57] = hc_bytealign_be_S (w[33], w[34], offset); w[56] = hc_bytealign_be_S (w[32], w[33], offset); w[55] = hc_bytealign_be_S (w[31], w[32], offset); w[54] = hc_bytealign_be_S (w[30], w[31], offset); w[53] = hc_bytealign_be_S (w[29], w[30], offset); w[52] = hc_bytealign_be_S (w[28], w[29], offset); w[51] = hc_bytealign_be_S (w[27], w[28], offset); w[50] = hc_bytealign_be_S (w[26], w[27], offset); w[49] = hc_bytealign_be_S (w[25], w[26], offset); w[48] = hc_bytealign_be_S (w[24], w[25], offset); w[47] = hc_bytealign_be_S (w[23], w[24], offset); w[46] = hc_bytealign_be_S (w[22], w[23], offset); w[45] = hc_bytealign_be_S (w[21], w[22], offset); w[44] = hc_bytealign_be_S (w[20], w[21], offset); w[43] = hc_bytealign_be_S (w[19], w[20], offset); w[42] = hc_bytealign_be_S (w[18], w[19], offset); w[41] = hc_bytealign_be_S (w[17], w[18], offset); w[40] = hc_bytealign_be_S (w[16], w[17], offset); w[39] = hc_bytealign_be_S (w[15], w[16], offset); w[38] = hc_bytealign_be_S (w[14], w[15], offset); w[37] = hc_bytealign_be_S (w[13], w[14], offset); w[36] = hc_bytealign_be_S (w[12], w[13], offset); w[35] = hc_bytealign_be_S (w[11], w[12], offset); w[34] = hc_bytealign_be_S (w[10], w[11], offset); w[33] = hc_bytealign_be_S (w[ 9], w[10], offset); w[32] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[31] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[30] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[29] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[28] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[27] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[26] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[25] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[24] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[23] = hc_bytealign_be_S ( 0, w[ 0], offset); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_bytealign_be_S (w[38], w[39], offset); w[62] = hc_bytealign_be_S (w[37], w[38], offset); w[61] = hc_bytealign_be_S (w[36], w[37], offset); w[60] = hc_bytealign_be_S (w[35], w[36], offset); w[59] = hc_bytealign_be_S (w[34], w[35], offset); w[58] = hc_bytealign_be_S (w[33], w[34], offset); w[57] = hc_bytealign_be_S (w[32], w[33], offset); w[56] = hc_bytealign_be_S (w[31], w[32], offset); w[55] = hc_bytealign_be_S (w[30], w[31], offset); w[54] = hc_bytealign_be_S (w[29], w[30], offset); w[53] = hc_bytealign_be_S (w[28], w[29], offset); w[52] = hc_bytealign_be_S (w[27], w[28], offset); w[51] = hc_bytealign_be_S (w[26], w[27], offset); w[50] = hc_bytealign_be_S (w[25], w[26], offset); w[49] = hc_bytealign_be_S (w[24], w[25], offset); w[48] = hc_bytealign_be_S (w[23], w[24], offset); w[47] = hc_bytealign_be_S (w[22], w[23], offset); w[46] = hc_bytealign_be_S (w[21], w[22], offset); w[45] = hc_bytealign_be_S (w[20], w[21], offset); w[44] = hc_bytealign_be_S (w[19], w[20], offset); w[43] = hc_bytealign_be_S (w[18], w[19], offset); w[42] = hc_bytealign_be_S (w[17], w[18], offset); w[41] = hc_bytealign_be_S (w[16], w[17], offset); w[40] = hc_bytealign_be_S (w[15], w[16], offset); w[39] = hc_bytealign_be_S (w[14], w[15], offset); w[38] = hc_bytealign_be_S (w[13], w[14], offset); w[37] = hc_bytealign_be_S (w[12], w[13], offset); w[36] = hc_bytealign_be_S (w[11], w[12], offset); w[35] = hc_bytealign_be_S (w[10], w[11], offset); w[34] = hc_bytealign_be_S (w[ 9], w[10], offset); w[33] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[32] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[31] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[30] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[29] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[28] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[27] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[26] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[25] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[24] = hc_bytealign_be_S ( 0, w[ 0], offset); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_bytealign_be_S (w[37], w[38], offset); w[62] = hc_bytealign_be_S (w[36], w[37], offset); w[61] = hc_bytealign_be_S (w[35], w[36], offset); w[60] = hc_bytealign_be_S (w[34], w[35], offset); w[59] = hc_bytealign_be_S (w[33], w[34], offset); w[58] = hc_bytealign_be_S (w[32], w[33], offset); w[57] = hc_bytealign_be_S (w[31], w[32], offset); w[56] = hc_bytealign_be_S (w[30], w[31], offset); w[55] = hc_bytealign_be_S (w[29], w[30], offset); w[54] = hc_bytealign_be_S (w[28], w[29], offset); w[53] = hc_bytealign_be_S (w[27], w[28], offset); w[52] = hc_bytealign_be_S (w[26], w[27], offset); w[51] = hc_bytealign_be_S (w[25], w[26], offset); w[50] = hc_bytealign_be_S (w[24], w[25], offset); w[49] = hc_bytealign_be_S (w[23], w[24], offset); w[48] = hc_bytealign_be_S (w[22], w[23], offset); w[47] = hc_bytealign_be_S (w[21], w[22], offset); w[46] = hc_bytealign_be_S (w[20], w[21], offset); w[45] = hc_bytealign_be_S (w[19], w[20], offset); w[44] = hc_bytealign_be_S (w[18], w[19], offset); w[43] = hc_bytealign_be_S (w[17], w[18], offset); w[42] = hc_bytealign_be_S (w[16], w[17], offset); w[41] = hc_bytealign_be_S (w[15], w[16], offset); w[40] = hc_bytealign_be_S (w[14], w[15], offset); w[39] = hc_bytealign_be_S (w[13], w[14], offset); w[38] = hc_bytealign_be_S (w[12], w[13], offset); w[37] = hc_bytealign_be_S (w[11], w[12], offset); w[36] = hc_bytealign_be_S (w[10], w[11], offset); w[35] = hc_bytealign_be_S (w[ 9], w[10], offset); w[34] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[33] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[32] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[31] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[30] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[29] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[28] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[27] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[26] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[25] = hc_bytealign_be_S ( 0, w[ 0], offset); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_bytealign_be_S (w[36], w[37], offset); w[62] = hc_bytealign_be_S (w[35], w[36], offset); w[61] = hc_bytealign_be_S (w[34], w[35], offset); w[60] = hc_bytealign_be_S (w[33], w[34], offset); w[59] = hc_bytealign_be_S (w[32], w[33], offset); w[58] = hc_bytealign_be_S (w[31], w[32], offset); w[57] = hc_bytealign_be_S (w[30], w[31], offset); w[56] = hc_bytealign_be_S (w[29], w[30], offset); w[55] = hc_bytealign_be_S (w[28], w[29], offset); w[54] = hc_bytealign_be_S (w[27], w[28], offset); w[53] = hc_bytealign_be_S (w[26], w[27], offset); w[52] = hc_bytealign_be_S (w[25], w[26], offset); w[51] = hc_bytealign_be_S (w[24], w[25], offset); w[50] = hc_bytealign_be_S (w[23], w[24], offset); w[49] = hc_bytealign_be_S (w[22], w[23], offset); w[48] = hc_bytealign_be_S (w[21], w[22], offset); w[47] = hc_bytealign_be_S (w[20], w[21], offset); w[46] = hc_bytealign_be_S (w[19], w[20], offset); w[45] = hc_bytealign_be_S (w[18], w[19], offset); w[44] = hc_bytealign_be_S (w[17], w[18], offset); w[43] = hc_bytealign_be_S (w[16], w[17], offset); w[42] = hc_bytealign_be_S (w[15], w[16], offset); w[41] = hc_bytealign_be_S (w[14], w[15], offset); w[40] = hc_bytealign_be_S (w[13], w[14], offset); w[39] = hc_bytealign_be_S (w[12], w[13], offset); w[38] = hc_bytealign_be_S (w[11], w[12], offset); w[37] = hc_bytealign_be_S (w[10], w[11], offset); w[36] = hc_bytealign_be_S (w[ 9], w[10], offset); w[35] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[34] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[33] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[32] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[31] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[30] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[29] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[28] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[27] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[26] = hc_bytealign_be_S ( 0, w[ 0], offset); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_bytealign_be_S (w[35], w[36], offset); w[62] = hc_bytealign_be_S (w[34], w[35], offset); w[61] = hc_bytealign_be_S (w[33], w[34], offset); w[60] = hc_bytealign_be_S (w[32], w[33], offset); w[59] = hc_bytealign_be_S (w[31], w[32], offset); w[58] = hc_bytealign_be_S (w[30], w[31], offset); w[57] = hc_bytealign_be_S (w[29], w[30], offset); w[56] = hc_bytealign_be_S (w[28], w[29], offset); w[55] = hc_bytealign_be_S (w[27], w[28], offset); w[54] = hc_bytealign_be_S (w[26], w[27], offset); w[53] = hc_bytealign_be_S (w[25], w[26], offset); w[52] = hc_bytealign_be_S (w[24], w[25], offset); w[51] = hc_bytealign_be_S (w[23], w[24], offset); w[50] = hc_bytealign_be_S (w[22], w[23], offset); w[49] = hc_bytealign_be_S (w[21], w[22], offset); w[48] = hc_bytealign_be_S (w[20], w[21], offset); w[47] = hc_bytealign_be_S (w[19], w[20], offset); w[46] = hc_bytealign_be_S (w[18], w[19], offset); w[45] = hc_bytealign_be_S (w[17], w[18], offset); w[44] = hc_bytealign_be_S (w[16], w[17], offset); w[43] = hc_bytealign_be_S (w[15], w[16], offset); w[42] = hc_bytealign_be_S (w[14], w[15], offset); w[41] = hc_bytealign_be_S (w[13], w[14], offset); w[40] = hc_bytealign_be_S (w[12], w[13], offset); w[39] = hc_bytealign_be_S (w[11], w[12], offset); w[38] = hc_bytealign_be_S (w[10], w[11], offset); w[37] = hc_bytealign_be_S (w[ 9], w[10], offset); w[36] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[35] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[34] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[33] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[32] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[31] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[30] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[29] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[28] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[27] = hc_bytealign_be_S ( 0, w[ 0], offset); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_bytealign_be_S (w[34], w[35], offset); w[62] = hc_bytealign_be_S (w[33], w[34], offset); w[61] = hc_bytealign_be_S (w[32], w[33], offset); w[60] = hc_bytealign_be_S (w[31], w[32], offset); w[59] = hc_bytealign_be_S (w[30], w[31], offset); w[58] = hc_bytealign_be_S (w[29], w[30], offset); w[57] = hc_bytealign_be_S (w[28], w[29], offset); w[56] = hc_bytealign_be_S (w[27], w[28], offset); w[55] = hc_bytealign_be_S (w[26], w[27], offset); w[54] = hc_bytealign_be_S (w[25], w[26], offset); w[53] = hc_bytealign_be_S (w[24], w[25], offset); w[52] = hc_bytealign_be_S (w[23], w[24], offset); w[51] = hc_bytealign_be_S (w[22], w[23], offset); w[50] = hc_bytealign_be_S (w[21], w[22], offset); w[49] = hc_bytealign_be_S (w[20], w[21], offset); w[48] = hc_bytealign_be_S (w[19], w[20], offset); w[47] = hc_bytealign_be_S (w[18], w[19], offset); w[46] = hc_bytealign_be_S (w[17], w[18], offset); w[45] = hc_bytealign_be_S (w[16], w[17], offset); w[44] = hc_bytealign_be_S (w[15], w[16], offset); w[43] = hc_bytealign_be_S (w[14], w[15], offset); w[42] = hc_bytealign_be_S (w[13], w[14], offset); w[41] = hc_bytealign_be_S (w[12], w[13], offset); w[40] = hc_bytealign_be_S (w[11], w[12], offset); w[39] = hc_bytealign_be_S (w[10], w[11], offset); w[38] = hc_bytealign_be_S (w[ 9], w[10], offset); w[37] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[36] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[35] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[34] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[33] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[32] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[31] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[30] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[29] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[28] = hc_bytealign_be_S ( 0, w[ 0], offset); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_bytealign_be_S (w[33], w[34], offset); w[62] = hc_bytealign_be_S (w[32], w[33], offset); w[61] = hc_bytealign_be_S (w[31], w[32], offset); w[60] = hc_bytealign_be_S (w[30], w[31], offset); w[59] = hc_bytealign_be_S (w[29], w[30], offset); w[58] = hc_bytealign_be_S (w[28], w[29], offset); w[57] = hc_bytealign_be_S (w[27], w[28], offset); w[56] = hc_bytealign_be_S (w[26], w[27], offset); w[55] = hc_bytealign_be_S (w[25], w[26], offset); w[54] = hc_bytealign_be_S (w[24], w[25], offset); w[53] = hc_bytealign_be_S (w[23], w[24], offset); w[52] = hc_bytealign_be_S (w[22], w[23], offset); w[51] = hc_bytealign_be_S (w[21], w[22], offset); w[50] = hc_bytealign_be_S (w[20], w[21], offset); w[49] = hc_bytealign_be_S (w[19], w[20], offset); w[48] = hc_bytealign_be_S (w[18], w[19], offset); w[47] = hc_bytealign_be_S (w[17], w[18], offset); w[46] = hc_bytealign_be_S (w[16], w[17], offset); w[45] = hc_bytealign_be_S (w[15], w[16], offset); w[44] = hc_bytealign_be_S (w[14], w[15], offset); w[43] = hc_bytealign_be_S (w[13], w[14], offset); w[42] = hc_bytealign_be_S (w[12], w[13], offset); w[41] = hc_bytealign_be_S (w[11], w[12], offset); w[40] = hc_bytealign_be_S (w[10], w[11], offset); w[39] = hc_bytealign_be_S (w[ 9], w[10], offset); w[38] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[37] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[36] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[35] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[34] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[33] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[32] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[31] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[30] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[29] = hc_bytealign_be_S ( 0, w[ 0], offset); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_bytealign_be_S (w[32], w[33], offset); w[62] = hc_bytealign_be_S (w[31], w[32], offset); w[61] = hc_bytealign_be_S (w[30], w[31], offset); w[60] = hc_bytealign_be_S (w[29], w[30], offset); w[59] = hc_bytealign_be_S (w[28], w[29], offset); w[58] = hc_bytealign_be_S (w[27], w[28], offset); w[57] = hc_bytealign_be_S (w[26], w[27], offset); w[56] = hc_bytealign_be_S (w[25], w[26], offset); w[55] = hc_bytealign_be_S (w[24], w[25], offset); w[54] = hc_bytealign_be_S (w[23], w[24], offset); w[53] = hc_bytealign_be_S (w[22], w[23], offset); w[52] = hc_bytealign_be_S (w[21], w[22], offset); w[51] = hc_bytealign_be_S (w[20], w[21], offset); w[50] = hc_bytealign_be_S (w[19], w[20], offset); w[49] = hc_bytealign_be_S (w[18], w[19], offset); w[48] = hc_bytealign_be_S (w[17], w[18], offset); w[47] = hc_bytealign_be_S (w[16], w[17], offset); w[46] = hc_bytealign_be_S (w[15], w[16], offset); w[45] = hc_bytealign_be_S (w[14], w[15], offset); w[44] = hc_bytealign_be_S (w[13], w[14], offset); w[43] = hc_bytealign_be_S (w[12], w[13], offset); w[42] = hc_bytealign_be_S (w[11], w[12], offset); w[41] = hc_bytealign_be_S (w[10], w[11], offset); w[40] = hc_bytealign_be_S (w[ 9], w[10], offset); w[39] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[38] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[37] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[36] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[35] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[34] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[33] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[32] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[31] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[30] = hc_bytealign_be_S ( 0, w[ 0], offset); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_bytealign_be_S (w[31], w[32], offset); w[62] = hc_bytealign_be_S (w[30], w[31], offset); w[61] = hc_bytealign_be_S (w[29], w[30], offset); w[60] = hc_bytealign_be_S (w[28], w[29], offset); w[59] = hc_bytealign_be_S (w[27], w[28], offset); w[58] = hc_bytealign_be_S (w[26], w[27], offset); w[57] = hc_bytealign_be_S (w[25], w[26], offset); w[56] = hc_bytealign_be_S (w[24], w[25], offset); w[55] = hc_bytealign_be_S (w[23], w[24], offset); w[54] = hc_bytealign_be_S (w[22], w[23], offset); w[53] = hc_bytealign_be_S (w[21], w[22], offset); w[52] = hc_bytealign_be_S (w[20], w[21], offset); w[51] = hc_bytealign_be_S (w[19], w[20], offset); w[50] = hc_bytealign_be_S (w[18], w[19], offset); w[49] = hc_bytealign_be_S (w[17], w[18], offset); w[48] = hc_bytealign_be_S (w[16], w[17], offset); w[47] = hc_bytealign_be_S (w[15], w[16], offset); w[46] = hc_bytealign_be_S (w[14], w[15], offset); w[45] = hc_bytealign_be_S (w[13], w[14], offset); w[44] = hc_bytealign_be_S (w[12], w[13], offset); w[43] = hc_bytealign_be_S (w[11], w[12], offset); w[42] = hc_bytealign_be_S (w[10], w[11], offset); w[41] = hc_bytealign_be_S (w[ 9], w[10], offset); w[40] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[39] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[38] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[37] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[36] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[35] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[34] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[33] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[32] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[31] = hc_bytealign_be_S ( 0, w[ 0], offset); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_bytealign_be_S (w[30], w[31], offset); w[62] = hc_bytealign_be_S (w[29], w[30], offset); w[61] = hc_bytealign_be_S (w[28], w[29], offset); w[60] = hc_bytealign_be_S (w[27], w[28], offset); w[59] = hc_bytealign_be_S (w[26], w[27], offset); w[58] = hc_bytealign_be_S (w[25], w[26], offset); w[57] = hc_bytealign_be_S (w[24], w[25], offset); w[56] = hc_bytealign_be_S (w[23], w[24], offset); w[55] = hc_bytealign_be_S (w[22], w[23], offset); w[54] = hc_bytealign_be_S (w[21], w[22], offset); w[53] = hc_bytealign_be_S (w[20], w[21], offset); w[52] = hc_bytealign_be_S (w[19], w[20], offset); w[51] = hc_bytealign_be_S (w[18], w[19], offset); w[50] = hc_bytealign_be_S (w[17], w[18], offset); w[49] = hc_bytealign_be_S (w[16], w[17], offset); w[48] = hc_bytealign_be_S (w[15], w[16], offset); w[47] = hc_bytealign_be_S (w[14], w[15], offset); w[46] = hc_bytealign_be_S (w[13], w[14], offset); w[45] = hc_bytealign_be_S (w[12], w[13], offset); w[44] = hc_bytealign_be_S (w[11], w[12], offset); w[43] = hc_bytealign_be_S (w[10], w[11], offset); w[42] = hc_bytealign_be_S (w[ 9], w[10], offset); w[41] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[40] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[39] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[38] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[37] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[36] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[35] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[34] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[33] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[32] = hc_bytealign_be_S ( 0, w[ 0], offset); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_bytealign_be_S (w[29], w[30], offset); w[62] = hc_bytealign_be_S (w[28], w[29], offset); w[61] = hc_bytealign_be_S (w[27], w[28], offset); w[60] = hc_bytealign_be_S (w[26], w[27], offset); w[59] = hc_bytealign_be_S (w[25], w[26], offset); w[58] = hc_bytealign_be_S (w[24], w[25], offset); w[57] = hc_bytealign_be_S (w[23], w[24], offset); w[56] = hc_bytealign_be_S (w[22], w[23], offset); w[55] = hc_bytealign_be_S (w[21], w[22], offset); w[54] = hc_bytealign_be_S (w[20], w[21], offset); w[53] = hc_bytealign_be_S (w[19], w[20], offset); w[52] = hc_bytealign_be_S (w[18], w[19], offset); w[51] = hc_bytealign_be_S (w[17], w[18], offset); w[50] = hc_bytealign_be_S (w[16], w[17], offset); w[49] = hc_bytealign_be_S (w[15], w[16], offset); w[48] = hc_bytealign_be_S (w[14], w[15], offset); w[47] = hc_bytealign_be_S (w[13], w[14], offset); w[46] = hc_bytealign_be_S (w[12], w[13], offset); w[45] = hc_bytealign_be_S (w[11], w[12], offset); w[44] = hc_bytealign_be_S (w[10], w[11], offset); w[43] = hc_bytealign_be_S (w[ 9], w[10], offset); w[42] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[41] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[40] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[39] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[38] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[37] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[36] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[35] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[34] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[33] = hc_bytealign_be_S ( 0, w[ 0], offset); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_bytealign_be_S (w[28], w[29], offset); w[62] = hc_bytealign_be_S (w[27], w[28], offset); w[61] = hc_bytealign_be_S (w[26], w[27], offset); w[60] = hc_bytealign_be_S (w[25], w[26], offset); w[59] = hc_bytealign_be_S (w[24], w[25], offset); w[58] = hc_bytealign_be_S (w[23], w[24], offset); w[57] = hc_bytealign_be_S (w[22], w[23], offset); w[56] = hc_bytealign_be_S (w[21], w[22], offset); w[55] = hc_bytealign_be_S (w[20], w[21], offset); w[54] = hc_bytealign_be_S (w[19], w[20], offset); w[53] = hc_bytealign_be_S (w[18], w[19], offset); w[52] = hc_bytealign_be_S (w[17], w[18], offset); w[51] = hc_bytealign_be_S (w[16], w[17], offset); w[50] = hc_bytealign_be_S (w[15], w[16], offset); w[49] = hc_bytealign_be_S (w[14], w[15], offset); w[48] = hc_bytealign_be_S (w[13], w[14], offset); w[47] = hc_bytealign_be_S (w[12], w[13], offset); w[46] = hc_bytealign_be_S (w[11], w[12], offset); w[45] = hc_bytealign_be_S (w[10], w[11], offset); w[44] = hc_bytealign_be_S (w[ 9], w[10], offset); w[43] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[42] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[41] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[40] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[39] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[38] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[37] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[36] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[35] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[34] = hc_bytealign_be_S ( 0, w[ 0], offset); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_bytealign_be_S (w[27], w[28], offset); w[62] = hc_bytealign_be_S (w[26], w[27], offset); w[61] = hc_bytealign_be_S (w[25], w[26], offset); w[60] = hc_bytealign_be_S (w[24], w[25], offset); w[59] = hc_bytealign_be_S (w[23], w[24], offset); w[58] = hc_bytealign_be_S (w[22], w[23], offset); w[57] = hc_bytealign_be_S (w[21], w[22], offset); w[56] = hc_bytealign_be_S (w[20], w[21], offset); w[55] = hc_bytealign_be_S (w[19], w[20], offset); w[54] = hc_bytealign_be_S (w[18], w[19], offset); w[53] = hc_bytealign_be_S (w[17], w[18], offset); w[52] = hc_bytealign_be_S (w[16], w[17], offset); w[51] = hc_bytealign_be_S (w[15], w[16], offset); w[50] = hc_bytealign_be_S (w[14], w[15], offset); w[49] = hc_bytealign_be_S (w[13], w[14], offset); w[48] = hc_bytealign_be_S (w[12], w[13], offset); w[47] = hc_bytealign_be_S (w[11], w[12], offset); w[46] = hc_bytealign_be_S (w[10], w[11], offset); w[45] = hc_bytealign_be_S (w[ 9], w[10], offset); w[44] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[43] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[42] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[41] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[40] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[39] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[38] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[37] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[36] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[35] = hc_bytealign_be_S ( 0, w[ 0], offset); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_bytealign_be_S (w[26], w[27], offset); w[62] = hc_bytealign_be_S (w[25], w[26], offset); w[61] = hc_bytealign_be_S (w[24], w[25], offset); w[60] = hc_bytealign_be_S (w[23], w[24], offset); w[59] = hc_bytealign_be_S (w[22], w[23], offset); w[58] = hc_bytealign_be_S (w[21], w[22], offset); w[57] = hc_bytealign_be_S (w[20], w[21], offset); w[56] = hc_bytealign_be_S (w[19], w[20], offset); w[55] = hc_bytealign_be_S (w[18], w[19], offset); w[54] = hc_bytealign_be_S (w[17], w[18], offset); w[53] = hc_bytealign_be_S (w[16], w[17], offset); w[52] = hc_bytealign_be_S (w[15], w[16], offset); w[51] = hc_bytealign_be_S (w[14], w[15], offset); w[50] = hc_bytealign_be_S (w[13], w[14], offset); w[49] = hc_bytealign_be_S (w[12], w[13], offset); w[48] = hc_bytealign_be_S (w[11], w[12], offset); w[47] = hc_bytealign_be_S (w[10], w[11], offset); w[46] = hc_bytealign_be_S (w[ 9], w[10], offset); w[45] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[44] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[43] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[42] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[41] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[40] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[39] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[38] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[37] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[36] = hc_bytealign_be_S ( 0, w[ 0], offset); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_bytealign_be_S (w[25], w[26], offset); w[62] = hc_bytealign_be_S (w[24], w[25], offset); w[61] = hc_bytealign_be_S (w[23], w[24], offset); w[60] = hc_bytealign_be_S (w[22], w[23], offset); w[59] = hc_bytealign_be_S (w[21], w[22], offset); w[58] = hc_bytealign_be_S (w[20], w[21], offset); w[57] = hc_bytealign_be_S (w[19], w[20], offset); w[56] = hc_bytealign_be_S (w[18], w[19], offset); w[55] = hc_bytealign_be_S (w[17], w[18], offset); w[54] = hc_bytealign_be_S (w[16], w[17], offset); w[53] = hc_bytealign_be_S (w[15], w[16], offset); w[52] = hc_bytealign_be_S (w[14], w[15], offset); w[51] = hc_bytealign_be_S (w[13], w[14], offset); w[50] = hc_bytealign_be_S (w[12], w[13], offset); w[49] = hc_bytealign_be_S (w[11], w[12], offset); w[48] = hc_bytealign_be_S (w[10], w[11], offset); w[47] = hc_bytealign_be_S (w[ 9], w[10], offset); w[46] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[45] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[44] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[43] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[42] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[41] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[40] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[39] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[38] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[37] = hc_bytealign_be_S ( 0, w[ 0], offset); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_bytealign_be_S (w[24], w[25], offset); w[62] = hc_bytealign_be_S (w[23], w[24], offset); w[61] = hc_bytealign_be_S (w[22], w[23], offset); w[60] = hc_bytealign_be_S (w[21], w[22], offset); w[59] = hc_bytealign_be_S (w[20], w[21], offset); w[58] = hc_bytealign_be_S (w[19], w[20], offset); w[57] = hc_bytealign_be_S (w[18], w[19], offset); w[56] = hc_bytealign_be_S (w[17], w[18], offset); w[55] = hc_bytealign_be_S (w[16], w[17], offset); w[54] = hc_bytealign_be_S (w[15], w[16], offset); w[53] = hc_bytealign_be_S (w[14], w[15], offset); w[52] = hc_bytealign_be_S (w[13], w[14], offset); w[51] = hc_bytealign_be_S (w[12], w[13], offset); w[50] = hc_bytealign_be_S (w[11], w[12], offset); w[49] = hc_bytealign_be_S (w[10], w[11], offset); w[48] = hc_bytealign_be_S (w[ 9], w[10], offset); w[47] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[46] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[45] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[44] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[43] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[42] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[41] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[40] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[39] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[38] = hc_bytealign_be_S ( 0, w[ 0], offset); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_bytealign_be_S (w[23], w[24], offset); w[62] = hc_bytealign_be_S (w[22], w[23], offset); w[61] = hc_bytealign_be_S (w[21], w[22], offset); w[60] = hc_bytealign_be_S (w[20], w[21], offset); w[59] = hc_bytealign_be_S (w[19], w[20], offset); w[58] = hc_bytealign_be_S (w[18], w[19], offset); w[57] = hc_bytealign_be_S (w[17], w[18], offset); w[56] = hc_bytealign_be_S (w[16], w[17], offset); w[55] = hc_bytealign_be_S (w[15], w[16], offset); w[54] = hc_bytealign_be_S (w[14], w[15], offset); w[53] = hc_bytealign_be_S (w[13], w[14], offset); w[52] = hc_bytealign_be_S (w[12], w[13], offset); w[51] = hc_bytealign_be_S (w[11], w[12], offset); w[50] = hc_bytealign_be_S (w[10], w[11], offset); w[49] = hc_bytealign_be_S (w[ 9], w[10], offset); w[48] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[47] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[46] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[45] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[44] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[43] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[42] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[41] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[40] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[39] = hc_bytealign_be_S ( 0, w[ 0], offset); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_bytealign_be_S (w[22], w[23], offset); w[62] = hc_bytealign_be_S (w[21], w[22], offset); w[61] = hc_bytealign_be_S (w[20], w[21], offset); w[60] = hc_bytealign_be_S (w[19], w[20], offset); w[59] = hc_bytealign_be_S (w[18], w[19], offset); w[58] = hc_bytealign_be_S (w[17], w[18], offset); w[57] = hc_bytealign_be_S (w[16], w[17], offset); w[56] = hc_bytealign_be_S (w[15], w[16], offset); w[55] = hc_bytealign_be_S (w[14], w[15], offset); w[54] = hc_bytealign_be_S (w[13], w[14], offset); w[53] = hc_bytealign_be_S (w[12], w[13], offset); w[52] = hc_bytealign_be_S (w[11], w[12], offset); w[51] = hc_bytealign_be_S (w[10], w[11], offset); w[50] = hc_bytealign_be_S (w[ 9], w[10], offset); w[49] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[48] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[47] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[46] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[45] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[44] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[43] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[42] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[41] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[40] = hc_bytealign_be_S ( 0, w[ 0], offset); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_bytealign_be_S (w[21], w[22], offset); w[62] = hc_bytealign_be_S (w[20], w[21], offset); w[61] = hc_bytealign_be_S (w[19], w[20], offset); w[60] = hc_bytealign_be_S (w[18], w[19], offset); w[59] = hc_bytealign_be_S (w[17], w[18], offset); w[58] = hc_bytealign_be_S (w[16], w[17], offset); w[57] = hc_bytealign_be_S (w[15], w[16], offset); w[56] = hc_bytealign_be_S (w[14], w[15], offset); w[55] = hc_bytealign_be_S (w[13], w[14], offset); w[54] = hc_bytealign_be_S (w[12], w[13], offset); w[53] = hc_bytealign_be_S (w[11], w[12], offset); w[52] = hc_bytealign_be_S (w[10], w[11], offset); w[51] = hc_bytealign_be_S (w[ 9], w[10], offset); w[50] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[49] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[48] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[47] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[46] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[45] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[44] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[43] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[42] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[41] = hc_bytealign_be_S ( 0, w[ 0], offset); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_bytealign_be_S (w[20], w[21], offset); w[62] = hc_bytealign_be_S (w[19], w[20], offset); w[61] = hc_bytealign_be_S (w[18], w[19], offset); w[60] = hc_bytealign_be_S (w[17], w[18], offset); w[59] = hc_bytealign_be_S (w[16], w[17], offset); w[58] = hc_bytealign_be_S (w[15], w[16], offset); w[57] = hc_bytealign_be_S (w[14], w[15], offset); w[56] = hc_bytealign_be_S (w[13], w[14], offset); w[55] = hc_bytealign_be_S (w[12], w[13], offset); w[54] = hc_bytealign_be_S (w[11], w[12], offset); w[53] = hc_bytealign_be_S (w[10], w[11], offset); w[52] = hc_bytealign_be_S (w[ 9], w[10], offset); w[51] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[50] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[49] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[48] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[47] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[46] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[45] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[44] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[43] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[42] = hc_bytealign_be_S ( 0, w[ 0], offset); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_bytealign_be_S (w[19], w[20], offset); w[62] = hc_bytealign_be_S (w[18], w[19], offset); w[61] = hc_bytealign_be_S (w[17], w[18], offset); w[60] = hc_bytealign_be_S (w[16], w[17], offset); w[59] = hc_bytealign_be_S (w[15], w[16], offset); w[58] = hc_bytealign_be_S (w[14], w[15], offset); w[57] = hc_bytealign_be_S (w[13], w[14], offset); w[56] = hc_bytealign_be_S (w[12], w[13], offset); w[55] = hc_bytealign_be_S (w[11], w[12], offset); w[54] = hc_bytealign_be_S (w[10], w[11], offset); w[53] = hc_bytealign_be_S (w[ 9], w[10], offset); w[52] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[51] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[50] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[49] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[48] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[47] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[46] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[45] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[44] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[43] = hc_bytealign_be_S ( 0, w[ 0], offset); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_bytealign_be_S (w[18], w[19], offset); w[62] = hc_bytealign_be_S (w[17], w[18], offset); w[61] = hc_bytealign_be_S (w[16], w[17], offset); w[60] = hc_bytealign_be_S (w[15], w[16], offset); w[59] = hc_bytealign_be_S (w[14], w[15], offset); w[58] = hc_bytealign_be_S (w[13], w[14], offset); w[57] = hc_bytealign_be_S (w[12], w[13], offset); w[56] = hc_bytealign_be_S (w[11], w[12], offset); w[55] = hc_bytealign_be_S (w[10], w[11], offset); w[54] = hc_bytealign_be_S (w[ 9], w[10], offset); w[53] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[52] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[51] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[50] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[49] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[48] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[47] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[46] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[45] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[44] = hc_bytealign_be_S ( 0, w[ 0], offset); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_bytealign_be_S (w[17], w[18], offset); w[62] = hc_bytealign_be_S (w[16], w[17], offset); w[61] = hc_bytealign_be_S (w[15], w[16], offset); w[60] = hc_bytealign_be_S (w[14], w[15], offset); w[59] = hc_bytealign_be_S (w[13], w[14], offset); w[58] = hc_bytealign_be_S (w[12], w[13], offset); w[57] = hc_bytealign_be_S (w[11], w[12], offset); w[56] = hc_bytealign_be_S (w[10], w[11], offset); w[55] = hc_bytealign_be_S (w[ 9], w[10], offset); w[54] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[53] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[52] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[51] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[50] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[49] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[48] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[47] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[46] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[45] = hc_bytealign_be_S ( 0, w[ 0], offset); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_bytealign_be_S (w[16], w[17], offset); w[62] = hc_bytealign_be_S (w[15], w[16], offset); w[61] = hc_bytealign_be_S (w[14], w[15], offset); w[60] = hc_bytealign_be_S (w[13], w[14], offset); w[59] = hc_bytealign_be_S (w[12], w[13], offset); w[58] = hc_bytealign_be_S (w[11], w[12], offset); w[57] = hc_bytealign_be_S (w[10], w[11], offset); w[56] = hc_bytealign_be_S (w[ 9], w[10], offset); w[55] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[54] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[53] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[52] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[51] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[50] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[49] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[48] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[47] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[46] = hc_bytealign_be_S ( 0, w[ 0], offset); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_bytealign_be_S (w[15], w[16], offset); w[62] = hc_bytealign_be_S (w[14], w[15], offset); w[61] = hc_bytealign_be_S (w[13], w[14], offset); w[60] = hc_bytealign_be_S (w[12], w[13], offset); w[59] = hc_bytealign_be_S (w[11], w[12], offset); w[58] = hc_bytealign_be_S (w[10], w[11], offset); w[57] = hc_bytealign_be_S (w[ 9], w[10], offset); w[56] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[55] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[54] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[53] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[52] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[51] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[50] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[49] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[48] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[47] = hc_bytealign_be_S ( 0, w[ 0], offset); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_bytealign_be_S (w[14], w[15], offset); w[62] = hc_bytealign_be_S (w[13], w[14], offset); w[61] = hc_bytealign_be_S (w[12], w[13], offset); w[60] = hc_bytealign_be_S (w[11], w[12], offset); w[59] = hc_bytealign_be_S (w[10], w[11], offset); w[58] = hc_bytealign_be_S (w[ 9], w[10], offset); w[57] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[56] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[55] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[54] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[53] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[52] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[51] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[50] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[49] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[48] = hc_bytealign_be_S ( 0, w[ 0], offset); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_bytealign_be_S (w[13], w[14], offset); w[62] = hc_bytealign_be_S (w[12], w[13], offset); w[61] = hc_bytealign_be_S (w[11], w[12], offset); w[60] = hc_bytealign_be_S (w[10], w[11], offset); w[59] = hc_bytealign_be_S (w[ 9], w[10], offset); w[58] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[57] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[56] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[55] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[54] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[53] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[52] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[51] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[50] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[49] = hc_bytealign_be_S ( 0, w[ 0], offset); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_bytealign_be_S (w[12], w[13], offset); w[62] = hc_bytealign_be_S (w[11], w[12], offset); w[61] = hc_bytealign_be_S (w[10], w[11], offset); w[60] = hc_bytealign_be_S (w[ 9], w[10], offset); w[59] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[58] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[57] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[56] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[55] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[54] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[53] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[52] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[51] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[50] = hc_bytealign_be_S ( 0, w[ 0], offset); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_bytealign_be_S (w[11], w[12], offset); w[62] = hc_bytealign_be_S (w[10], w[11], offset); w[61] = hc_bytealign_be_S (w[ 9], w[10], offset); w[60] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[59] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[58] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[57] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[56] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[55] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[54] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[53] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[52] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[51] = hc_bytealign_be_S ( 0, w[ 0], offset); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_bytealign_be_S (w[10], w[11], offset); w[62] = hc_bytealign_be_S (w[ 9], w[10], offset); w[61] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[60] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[59] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[58] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[57] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[56] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[55] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[54] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[53] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[52] = hc_bytealign_be_S ( 0, w[ 0], offset); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_bytealign_be_S (w[ 9], w[10], offset); w[62] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[61] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[60] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[59] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[58] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[57] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[56] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[55] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[54] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[53] = hc_bytealign_be_S ( 0, w[ 0], offset); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_bytealign_be_S (w[ 8], w[ 9], offset); w[62] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[61] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[60] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[59] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[58] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[57] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[56] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[55] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[54] = hc_bytealign_be_S ( 0, w[ 0], offset); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_bytealign_be_S (w[ 7], w[ 8], offset); w[62] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[61] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[60] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[59] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[58] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[57] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[56] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[55] = hc_bytealign_be_S ( 0, w[ 0], offset); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_bytealign_be_S (w[ 6], w[ 7], offset); w[62] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[61] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[60] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[59] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[58] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[57] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[56] = hc_bytealign_be_S ( 0, w[ 0], offset); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_bytealign_be_S (w[ 5], w[ 6], offset); w[62] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[61] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[60] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[59] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[58] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[57] = hc_bytealign_be_S ( 0, w[ 0], offset); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_bytealign_be_S (w[ 4], w[ 5], offset); w[62] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[61] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[60] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[59] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[58] = hc_bytealign_be_S ( 0, w[ 0], offset); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_bytealign_be_S (w[ 3], w[ 4], offset); w[62] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[61] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[60] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[59] = hc_bytealign_be_S ( 0, w[ 0], offset); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_bytealign_be_S (w[ 2], w[ 3], offset); w[62] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[61] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[60] = hc_bytealign_be_S ( 0, w[ 0], offset); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_bytealign_be_S (w[ 1], w[ 2], offset); w[62] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[61] = hc_bytealign_be_S ( 0, w[ 0], offset); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_bytealign_be_S (w[ 0], w[ 1], offset); w[62] = hc_bytealign_be_S ( 0, w[ 0], offset); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_bytealign_be_S ( 0, w[ 0], offset); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV #if defined IS_NV const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; #endif #if (defined IS_AMD || defined IS_HIP) const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); #endif switch (offset_switch) { case 0: w[63] = hc_byte_perm_S (w[63], w[62], selector); w[62] = hc_byte_perm_S (w[62], w[61], selector); w[61] = hc_byte_perm_S (w[61], w[60], selector); w[60] = hc_byte_perm_S (w[60], w[59], selector); w[59] = hc_byte_perm_S (w[59], w[58], selector); w[58] = hc_byte_perm_S (w[58], w[57], selector); w[57] = hc_byte_perm_S (w[57], w[56], selector); w[56] = hc_byte_perm_S (w[56], w[55], selector); w[55] = hc_byte_perm_S (w[55], w[54], selector); w[54] = hc_byte_perm_S (w[54], w[53], selector); w[53] = hc_byte_perm_S (w[53], w[52], selector); w[52] = hc_byte_perm_S (w[52], w[51], selector); w[51] = hc_byte_perm_S (w[51], w[50], selector); w[50] = hc_byte_perm_S (w[50], w[49], selector); w[49] = hc_byte_perm_S (w[49], w[48], selector); w[48] = hc_byte_perm_S (w[48], w[47], selector); w[47] = hc_byte_perm_S (w[47], w[46], selector); w[46] = hc_byte_perm_S (w[46], w[45], selector); w[45] = hc_byte_perm_S (w[45], w[44], selector); w[44] = hc_byte_perm_S (w[44], w[43], selector); w[43] = hc_byte_perm_S (w[43], w[42], selector); w[42] = hc_byte_perm_S (w[42], w[41], selector); w[41] = hc_byte_perm_S (w[41], w[40], selector); w[40] = hc_byte_perm_S (w[40], w[39], selector); w[39] = hc_byte_perm_S (w[39], w[38], selector); w[38] = hc_byte_perm_S (w[38], w[37], selector); w[37] = hc_byte_perm_S (w[37], w[36], selector); w[36] = hc_byte_perm_S (w[36], w[35], selector); w[35] = hc_byte_perm_S (w[35], w[34], selector); w[34] = hc_byte_perm_S (w[34], w[33], selector); w[33] = hc_byte_perm_S (w[33], w[32], selector); w[32] = hc_byte_perm_S (w[32], w[31], selector); w[31] = hc_byte_perm_S (w[31], w[30], selector); w[30] = hc_byte_perm_S (w[30], w[29], selector); w[29] = hc_byte_perm_S (w[29], w[28], selector); w[28] = hc_byte_perm_S (w[28], w[27], selector); w[27] = hc_byte_perm_S (w[27], w[26], selector); w[26] = hc_byte_perm_S (w[26], w[25], selector); w[25] = hc_byte_perm_S (w[25], w[24], selector); w[24] = hc_byte_perm_S (w[24], w[23], selector); w[23] = hc_byte_perm_S (w[23], w[22], selector); w[22] = hc_byte_perm_S (w[22], w[21], selector); w[21] = hc_byte_perm_S (w[21], w[20], selector); w[20] = hc_byte_perm_S (w[20], w[19], selector); w[19] = hc_byte_perm_S (w[19], w[18], selector); w[18] = hc_byte_perm_S (w[18], w[17], selector); w[17] = hc_byte_perm_S (w[17], w[16], selector); w[16] = hc_byte_perm_S (w[16], w[15], selector); w[15] = hc_byte_perm_S (w[15], w[14], selector); w[14] = hc_byte_perm_S (w[14], w[13], selector); w[13] = hc_byte_perm_S (w[13], w[12], selector); w[12] = hc_byte_perm_S (w[12], w[11], selector); w[11] = hc_byte_perm_S (w[11], w[10], selector); w[10] = hc_byte_perm_S (w[10], w[ 9], selector); w[ 9] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[ 8] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[ 7] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[ 6] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[ 5] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 4] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 3] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 2] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 1] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 0] = hc_byte_perm_S (w[ 0], 0, selector); break; case 1: w[63] = hc_byte_perm_S (w[62], w[61], selector); w[62] = hc_byte_perm_S (w[61], w[60], selector); w[61] = hc_byte_perm_S (w[60], w[59], selector); w[60] = hc_byte_perm_S (w[59], w[58], selector); w[59] = hc_byte_perm_S (w[58], w[57], selector); w[58] = hc_byte_perm_S (w[57], w[56], selector); w[57] = hc_byte_perm_S (w[56], w[55], selector); w[56] = hc_byte_perm_S (w[55], w[54], selector); w[55] = hc_byte_perm_S (w[54], w[53], selector); w[54] = hc_byte_perm_S (w[53], w[52], selector); w[53] = hc_byte_perm_S (w[52], w[51], selector); w[52] = hc_byte_perm_S (w[51], w[50], selector); w[51] = hc_byte_perm_S (w[50], w[49], selector); w[50] = hc_byte_perm_S (w[49], w[48], selector); w[49] = hc_byte_perm_S (w[48], w[47], selector); w[48] = hc_byte_perm_S (w[47], w[46], selector); w[47] = hc_byte_perm_S (w[46], w[45], selector); w[46] = hc_byte_perm_S (w[45], w[44], selector); w[45] = hc_byte_perm_S (w[44], w[43], selector); w[44] = hc_byte_perm_S (w[43], w[42], selector); w[43] = hc_byte_perm_S (w[42], w[41], selector); w[42] = hc_byte_perm_S (w[41], w[40], selector); w[41] = hc_byte_perm_S (w[40], w[39], selector); w[40] = hc_byte_perm_S (w[39], w[38], selector); w[39] = hc_byte_perm_S (w[38], w[37], selector); w[38] = hc_byte_perm_S (w[37], w[36], selector); w[37] = hc_byte_perm_S (w[36], w[35], selector); w[36] = hc_byte_perm_S (w[35], w[34], selector); w[35] = hc_byte_perm_S (w[34], w[33], selector); w[34] = hc_byte_perm_S (w[33], w[32], selector); w[33] = hc_byte_perm_S (w[32], w[31], selector); w[32] = hc_byte_perm_S (w[31], w[30], selector); w[31] = hc_byte_perm_S (w[30], w[29], selector); w[30] = hc_byte_perm_S (w[29], w[28], selector); w[29] = hc_byte_perm_S (w[28], w[27], selector); w[28] = hc_byte_perm_S (w[27], w[26], selector); w[27] = hc_byte_perm_S (w[26], w[25], selector); w[26] = hc_byte_perm_S (w[25], w[24], selector); w[25] = hc_byte_perm_S (w[24], w[23], selector); w[24] = hc_byte_perm_S (w[23], w[22], selector); w[23] = hc_byte_perm_S (w[22], w[21], selector); w[22] = hc_byte_perm_S (w[21], w[20], selector); w[21] = hc_byte_perm_S (w[20], w[19], selector); w[20] = hc_byte_perm_S (w[19], w[18], selector); w[19] = hc_byte_perm_S (w[18], w[17], selector); w[18] = hc_byte_perm_S (w[17], w[16], selector); w[17] = hc_byte_perm_S (w[16], w[15], selector); w[16] = hc_byte_perm_S (w[15], w[14], selector); w[15] = hc_byte_perm_S (w[14], w[13], selector); w[14] = hc_byte_perm_S (w[13], w[12], selector); w[13] = hc_byte_perm_S (w[12], w[11], selector); w[12] = hc_byte_perm_S (w[11], w[10], selector); w[11] = hc_byte_perm_S (w[10], w[ 9], selector); w[10] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[ 9] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[ 8] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[ 7] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[ 6] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 5] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 4] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 3] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 2] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 1] = hc_byte_perm_S (w[ 0], 0, selector); w[ 0] = 0; break; case 2: w[63] = hc_byte_perm_S (w[61], w[60], selector); w[62] = hc_byte_perm_S (w[60], w[59], selector); w[61] = hc_byte_perm_S (w[59], w[58], selector); w[60] = hc_byte_perm_S (w[58], w[57], selector); w[59] = hc_byte_perm_S (w[57], w[56], selector); w[58] = hc_byte_perm_S (w[56], w[55], selector); w[57] = hc_byte_perm_S (w[55], w[54], selector); w[56] = hc_byte_perm_S (w[54], w[53], selector); w[55] = hc_byte_perm_S (w[53], w[52], selector); w[54] = hc_byte_perm_S (w[52], w[51], selector); w[53] = hc_byte_perm_S (w[51], w[50], selector); w[52] = hc_byte_perm_S (w[50], w[49], selector); w[51] = hc_byte_perm_S (w[49], w[48], selector); w[50] = hc_byte_perm_S (w[48], w[47], selector); w[49] = hc_byte_perm_S (w[47], w[46], selector); w[48] = hc_byte_perm_S (w[46], w[45], selector); w[47] = hc_byte_perm_S (w[45], w[44], selector); w[46] = hc_byte_perm_S (w[44], w[43], selector); w[45] = hc_byte_perm_S (w[43], w[42], selector); w[44] = hc_byte_perm_S (w[42], w[41], selector); w[43] = hc_byte_perm_S (w[41], w[40], selector); w[42] = hc_byte_perm_S (w[40], w[39], selector); w[41] = hc_byte_perm_S (w[39], w[38], selector); w[40] = hc_byte_perm_S (w[38], w[37], selector); w[39] = hc_byte_perm_S (w[37], w[36], selector); w[38] = hc_byte_perm_S (w[36], w[35], selector); w[37] = hc_byte_perm_S (w[35], w[34], selector); w[36] = hc_byte_perm_S (w[34], w[33], selector); w[35] = hc_byte_perm_S (w[33], w[32], selector); w[34] = hc_byte_perm_S (w[32], w[31], selector); w[33] = hc_byte_perm_S (w[31], w[30], selector); w[32] = hc_byte_perm_S (w[30], w[29], selector); w[31] = hc_byte_perm_S (w[29], w[28], selector); w[30] = hc_byte_perm_S (w[28], w[27], selector); w[29] = hc_byte_perm_S (w[27], w[26], selector); w[28] = hc_byte_perm_S (w[26], w[25], selector); w[27] = hc_byte_perm_S (w[25], w[24], selector); w[26] = hc_byte_perm_S (w[24], w[23], selector); w[25] = hc_byte_perm_S (w[23], w[22], selector); w[24] = hc_byte_perm_S (w[22], w[21], selector); w[23] = hc_byte_perm_S (w[21], w[20], selector); w[22] = hc_byte_perm_S (w[20], w[19], selector); w[21] = hc_byte_perm_S (w[19], w[18], selector); w[20] = hc_byte_perm_S (w[18], w[17], selector); w[19] = hc_byte_perm_S (w[17], w[16], selector); w[18] = hc_byte_perm_S (w[16], w[15], selector); w[17] = hc_byte_perm_S (w[15], w[14], selector); w[16] = hc_byte_perm_S (w[14], w[13], selector); w[15] = hc_byte_perm_S (w[13], w[12], selector); w[14] = hc_byte_perm_S (w[12], w[11], selector); w[13] = hc_byte_perm_S (w[11], w[10], selector); w[12] = hc_byte_perm_S (w[10], w[ 9], selector); w[11] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[10] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[ 9] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[ 8] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[ 7] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 6] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 5] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 4] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 3] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 2] = hc_byte_perm_S (w[ 0], 0, selector); w[ 1] = 0; w[ 0] = 0; break; case 3: w[63] = hc_byte_perm_S (w[60], w[59], selector); w[62] = hc_byte_perm_S (w[59], w[58], selector); w[61] = hc_byte_perm_S (w[58], w[57], selector); w[60] = hc_byte_perm_S (w[57], w[56], selector); w[59] = hc_byte_perm_S (w[56], w[55], selector); w[58] = hc_byte_perm_S (w[55], w[54], selector); w[57] = hc_byte_perm_S (w[54], w[53], selector); w[56] = hc_byte_perm_S (w[53], w[52], selector); w[55] = hc_byte_perm_S (w[52], w[51], selector); w[54] = hc_byte_perm_S (w[51], w[50], selector); w[53] = hc_byte_perm_S (w[50], w[49], selector); w[52] = hc_byte_perm_S (w[49], w[48], selector); w[51] = hc_byte_perm_S (w[48], w[47], selector); w[50] = hc_byte_perm_S (w[47], w[46], selector); w[49] = hc_byte_perm_S (w[46], w[45], selector); w[48] = hc_byte_perm_S (w[45], w[44], selector); w[47] = hc_byte_perm_S (w[44], w[43], selector); w[46] = hc_byte_perm_S (w[43], w[42], selector); w[45] = hc_byte_perm_S (w[42], w[41], selector); w[44] = hc_byte_perm_S (w[41], w[40], selector); w[43] = hc_byte_perm_S (w[40], w[39], selector); w[42] = hc_byte_perm_S (w[39], w[38], selector); w[41] = hc_byte_perm_S (w[38], w[37], selector); w[40] = hc_byte_perm_S (w[37], w[36], selector); w[39] = hc_byte_perm_S (w[36], w[35], selector); w[38] = hc_byte_perm_S (w[35], w[34], selector); w[37] = hc_byte_perm_S (w[34], w[33], selector); w[36] = hc_byte_perm_S (w[33], w[32], selector); w[35] = hc_byte_perm_S (w[32], w[31], selector); w[34] = hc_byte_perm_S (w[31], w[30], selector); w[33] = hc_byte_perm_S (w[30], w[29], selector); w[32] = hc_byte_perm_S (w[29], w[28], selector); w[31] = hc_byte_perm_S (w[28], w[27], selector); w[30] = hc_byte_perm_S (w[27], w[26], selector); w[29] = hc_byte_perm_S (w[26], w[25], selector); w[28] = hc_byte_perm_S (w[25], w[24], selector); w[27] = hc_byte_perm_S (w[24], w[23], selector); w[26] = hc_byte_perm_S (w[23], w[22], selector); w[25] = hc_byte_perm_S (w[22], w[21], selector); w[24] = hc_byte_perm_S (w[21], w[20], selector); w[23] = hc_byte_perm_S (w[20], w[19], selector); w[22] = hc_byte_perm_S (w[19], w[18], selector); w[21] = hc_byte_perm_S (w[18], w[17], selector); w[20] = hc_byte_perm_S (w[17], w[16], selector); w[19] = hc_byte_perm_S (w[16], w[15], selector); w[18] = hc_byte_perm_S (w[15], w[14], selector); w[17] = hc_byte_perm_S (w[14], w[13], selector); w[16] = hc_byte_perm_S (w[13], w[12], selector); w[15] = hc_byte_perm_S (w[12], w[11], selector); w[14] = hc_byte_perm_S (w[11], w[10], selector); w[13] = hc_byte_perm_S (w[10], w[ 9], selector); w[12] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[11] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[10] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[ 9] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[ 8] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 7] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 6] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 5] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 4] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 3] = hc_byte_perm_S (w[ 0], 0, selector); w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 4: w[63] = hc_byte_perm_S (w[59], w[58], selector); w[62] = hc_byte_perm_S (w[58], w[57], selector); w[61] = hc_byte_perm_S (w[57], w[56], selector); w[60] = hc_byte_perm_S (w[56], w[55], selector); w[59] = hc_byte_perm_S (w[55], w[54], selector); w[58] = hc_byte_perm_S (w[54], w[53], selector); w[57] = hc_byte_perm_S (w[53], w[52], selector); w[56] = hc_byte_perm_S (w[52], w[51], selector); w[55] = hc_byte_perm_S (w[51], w[50], selector); w[54] = hc_byte_perm_S (w[50], w[49], selector); w[53] = hc_byte_perm_S (w[49], w[48], selector); w[52] = hc_byte_perm_S (w[48], w[47], selector); w[51] = hc_byte_perm_S (w[47], w[46], selector); w[50] = hc_byte_perm_S (w[46], w[45], selector); w[49] = hc_byte_perm_S (w[45], w[44], selector); w[48] = hc_byte_perm_S (w[44], w[43], selector); w[47] = hc_byte_perm_S (w[43], w[42], selector); w[46] = hc_byte_perm_S (w[42], w[41], selector); w[45] = hc_byte_perm_S (w[41], w[40], selector); w[44] = hc_byte_perm_S (w[40], w[39], selector); w[43] = hc_byte_perm_S (w[39], w[38], selector); w[42] = hc_byte_perm_S (w[38], w[37], selector); w[41] = hc_byte_perm_S (w[37], w[36], selector); w[40] = hc_byte_perm_S (w[36], w[35], selector); w[39] = hc_byte_perm_S (w[35], w[34], selector); w[38] = hc_byte_perm_S (w[34], w[33], selector); w[37] = hc_byte_perm_S (w[33], w[32], selector); w[36] = hc_byte_perm_S (w[32], w[31], selector); w[35] = hc_byte_perm_S (w[31], w[30], selector); w[34] = hc_byte_perm_S (w[30], w[29], selector); w[33] = hc_byte_perm_S (w[29], w[28], selector); w[32] = hc_byte_perm_S (w[28], w[27], selector); w[31] = hc_byte_perm_S (w[27], w[26], selector); w[30] = hc_byte_perm_S (w[26], w[25], selector); w[29] = hc_byte_perm_S (w[25], w[24], selector); w[28] = hc_byte_perm_S (w[24], w[23], selector); w[27] = hc_byte_perm_S (w[23], w[22], selector); w[26] = hc_byte_perm_S (w[22], w[21], selector); w[25] = hc_byte_perm_S (w[21], w[20], selector); w[24] = hc_byte_perm_S (w[20], w[19], selector); w[23] = hc_byte_perm_S (w[19], w[18], selector); w[22] = hc_byte_perm_S (w[18], w[17], selector); w[21] = hc_byte_perm_S (w[17], w[16], selector); w[20] = hc_byte_perm_S (w[16], w[15], selector); w[19] = hc_byte_perm_S (w[15], w[14], selector); w[18] = hc_byte_perm_S (w[14], w[13], selector); w[17] = hc_byte_perm_S (w[13], w[12], selector); w[16] = hc_byte_perm_S (w[12], w[11], selector); w[15] = hc_byte_perm_S (w[11], w[10], selector); w[14] = hc_byte_perm_S (w[10], w[ 9], selector); w[13] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[12] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[11] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[10] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[ 9] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 8] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 7] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 6] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 5] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 4] = hc_byte_perm_S (w[ 0], 0, selector); w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 5: w[63] = hc_byte_perm_S (w[58], w[57], selector); w[62] = hc_byte_perm_S (w[57], w[56], selector); w[61] = hc_byte_perm_S (w[56], w[55], selector); w[60] = hc_byte_perm_S (w[55], w[54], selector); w[59] = hc_byte_perm_S (w[54], w[53], selector); w[58] = hc_byte_perm_S (w[53], w[52], selector); w[57] = hc_byte_perm_S (w[52], w[51], selector); w[56] = hc_byte_perm_S (w[51], w[50], selector); w[55] = hc_byte_perm_S (w[50], w[49], selector); w[54] = hc_byte_perm_S (w[49], w[48], selector); w[53] = hc_byte_perm_S (w[48], w[47], selector); w[52] = hc_byte_perm_S (w[47], w[46], selector); w[51] = hc_byte_perm_S (w[46], w[45], selector); w[50] = hc_byte_perm_S (w[45], w[44], selector); w[49] = hc_byte_perm_S (w[44], w[43], selector); w[48] = hc_byte_perm_S (w[43], w[42], selector); w[47] = hc_byte_perm_S (w[42], w[41], selector); w[46] = hc_byte_perm_S (w[41], w[40], selector); w[45] = hc_byte_perm_S (w[40], w[39], selector); w[44] = hc_byte_perm_S (w[39], w[38], selector); w[43] = hc_byte_perm_S (w[38], w[37], selector); w[42] = hc_byte_perm_S (w[37], w[36], selector); w[41] = hc_byte_perm_S (w[36], w[35], selector); w[40] = hc_byte_perm_S (w[35], w[34], selector); w[39] = hc_byte_perm_S (w[34], w[33], selector); w[38] = hc_byte_perm_S (w[33], w[32], selector); w[37] = hc_byte_perm_S (w[32], w[31], selector); w[36] = hc_byte_perm_S (w[31], w[30], selector); w[35] = hc_byte_perm_S (w[30], w[29], selector); w[34] = hc_byte_perm_S (w[29], w[28], selector); w[33] = hc_byte_perm_S (w[28], w[27], selector); w[32] = hc_byte_perm_S (w[27], w[26], selector); w[31] = hc_byte_perm_S (w[26], w[25], selector); w[30] = hc_byte_perm_S (w[25], w[24], selector); w[29] = hc_byte_perm_S (w[24], w[23], selector); w[28] = hc_byte_perm_S (w[23], w[22], selector); w[27] = hc_byte_perm_S (w[22], w[21], selector); w[26] = hc_byte_perm_S (w[21], w[20], selector); w[25] = hc_byte_perm_S (w[20], w[19], selector); w[24] = hc_byte_perm_S (w[19], w[18], selector); w[23] = hc_byte_perm_S (w[18], w[17], selector); w[22] = hc_byte_perm_S (w[17], w[16], selector); w[21] = hc_byte_perm_S (w[16], w[15], selector); w[20] = hc_byte_perm_S (w[15], w[14], selector); w[19] = hc_byte_perm_S (w[14], w[13], selector); w[18] = hc_byte_perm_S (w[13], w[12], selector); w[17] = hc_byte_perm_S (w[12], w[11], selector); w[16] = hc_byte_perm_S (w[11], w[10], selector); w[15] = hc_byte_perm_S (w[10], w[ 9], selector); w[14] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[13] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[12] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[11] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[10] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[ 9] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 8] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 7] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 6] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 5] = hc_byte_perm_S (w[ 0], 0, selector); w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 6: w[63] = hc_byte_perm_S (w[57], w[56], selector); w[62] = hc_byte_perm_S (w[56], w[55], selector); w[61] = hc_byte_perm_S (w[55], w[54], selector); w[60] = hc_byte_perm_S (w[54], w[53], selector); w[59] = hc_byte_perm_S (w[53], w[52], selector); w[58] = hc_byte_perm_S (w[52], w[51], selector); w[57] = hc_byte_perm_S (w[51], w[50], selector); w[56] = hc_byte_perm_S (w[50], w[49], selector); w[55] = hc_byte_perm_S (w[49], w[48], selector); w[54] = hc_byte_perm_S (w[48], w[47], selector); w[53] = hc_byte_perm_S (w[47], w[46], selector); w[52] = hc_byte_perm_S (w[46], w[45], selector); w[51] = hc_byte_perm_S (w[45], w[44], selector); w[50] = hc_byte_perm_S (w[44], w[43], selector); w[49] = hc_byte_perm_S (w[43], w[42], selector); w[48] = hc_byte_perm_S (w[42], w[41], selector); w[47] = hc_byte_perm_S (w[41], w[40], selector); w[46] = hc_byte_perm_S (w[40], w[39], selector); w[45] = hc_byte_perm_S (w[39], w[38], selector); w[44] = hc_byte_perm_S (w[38], w[37], selector); w[43] = hc_byte_perm_S (w[37], w[36], selector); w[42] = hc_byte_perm_S (w[36], w[35], selector); w[41] = hc_byte_perm_S (w[35], w[34], selector); w[40] = hc_byte_perm_S (w[34], w[33], selector); w[39] = hc_byte_perm_S (w[33], w[32], selector); w[38] = hc_byte_perm_S (w[32], w[31], selector); w[37] = hc_byte_perm_S (w[31], w[30], selector); w[36] = hc_byte_perm_S (w[30], w[29], selector); w[35] = hc_byte_perm_S (w[29], w[28], selector); w[34] = hc_byte_perm_S (w[28], w[27], selector); w[33] = hc_byte_perm_S (w[27], w[26], selector); w[32] = hc_byte_perm_S (w[26], w[25], selector); w[31] = hc_byte_perm_S (w[25], w[24], selector); w[30] = hc_byte_perm_S (w[24], w[23], selector); w[29] = hc_byte_perm_S (w[23], w[22], selector); w[28] = hc_byte_perm_S (w[22], w[21], selector); w[27] = hc_byte_perm_S (w[21], w[20], selector); w[26] = hc_byte_perm_S (w[20], w[19], selector); w[25] = hc_byte_perm_S (w[19], w[18], selector); w[24] = hc_byte_perm_S (w[18], w[17], selector); w[23] = hc_byte_perm_S (w[17], w[16], selector); w[22] = hc_byte_perm_S (w[16], w[15], selector); w[21] = hc_byte_perm_S (w[15], w[14], selector); w[20] = hc_byte_perm_S (w[14], w[13], selector); w[19] = hc_byte_perm_S (w[13], w[12], selector); w[18] = hc_byte_perm_S (w[12], w[11], selector); w[17] = hc_byte_perm_S (w[11], w[10], selector); w[16] = hc_byte_perm_S (w[10], w[ 9], selector); w[15] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[14] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[13] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[12] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[11] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[10] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[ 9] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 8] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 7] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 6] = hc_byte_perm_S (w[ 0], 0, selector); w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 7: w[63] = hc_byte_perm_S (w[56], w[55], selector); w[62] = hc_byte_perm_S (w[55], w[54], selector); w[61] = hc_byte_perm_S (w[54], w[53], selector); w[60] = hc_byte_perm_S (w[53], w[52], selector); w[59] = hc_byte_perm_S (w[52], w[51], selector); w[58] = hc_byte_perm_S (w[51], w[50], selector); w[57] = hc_byte_perm_S (w[50], w[49], selector); w[56] = hc_byte_perm_S (w[49], w[48], selector); w[55] = hc_byte_perm_S (w[48], w[47], selector); w[54] = hc_byte_perm_S (w[47], w[46], selector); w[53] = hc_byte_perm_S (w[46], w[45], selector); w[52] = hc_byte_perm_S (w[45], w[44], selector); w[51] = hc_byte_perm_S (w[44], w[43], selector); w[50] = hc_byte_perm_S (w[43], w[42], selector); w[49] = hc_byte_perm_S (w[42], w[41], selector); w[48] = hc_byte_perm_S (w[41], w[40], selector); w[47] = hc_byte_perm_S (w[40], w[39], selector); w[46] = hc_byte_perm_S (w[39], w[38], selector); w[45] = hc_byte_perm_S (w[38], w[37], selector); w[44] = hc_byte_perm_S (w[37], w[36], selector); w[43] = hc_byte_perm_S (w[36], w[35], selector); w[42] = hc_byte_perm_S (w[35], w[34], selector); w[41] = hc_byte_perm_S (w[34], w[33], selector); w[40] = hc_byte_perm_S (w[33], w[32], selector); w[39] = hc_byte_perm_S (w[32], w[31], selector); w[38] = hc_byte_perm_S (w[31], w[30], selector); w[37] = hc_byte_perm_S (w[30], w[29], selector); w[36] = hc_byte_perm_S (w[29], w[28], selector); w[35] = hc_byte_perm_S (w[28], w[27], selector); w[34] = hc_byte_perm_S (w[27], w[26], selector); w[33] = hc_byte_perm_S (w[26], w[25], selector); w[32] = hc_byte_perm_S (w[25], w[24], selector); w[31] = hc_byte_perm_S (w[24], w[23], selector); w[30] = hc_byte_perm_S (w[23], w[22], selector); w[29] = hc_byte_perm_S (w[22], w[21], selector); w[28] = hc_byte_perm_S (w[21], w[20], selector); w[27] = hc_byte_perm_S (w[20], w[19], selector); w[26] = hc_byte_perm_S (w[19], w[18], selector); w[25] = hc_byte_perm_S (w[18], w[17], selector); w[24] = hc_byte_perm_S (w[17], w[16], selector); w[23] = hc_byte_perm_S (w[16], w[15], selector); w[22] = hc_byte_perm_S (w[15], w[14], selector); w[21] = hc_byte_perm_S (w[14], w[13], selector); w[20] = hc_byte_perm_S (w[13], w[12], selector); w[19] = hc_byte_perm_S (w[12], w[11], selector); w[18] = hc_byte_perm_S (w[11], w[10], selector); w[17] = hc_byte_perm_S (w[10], w[ 9], selector); w[16] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[15] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[14] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[13] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[12] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[11] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[10] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[ 9] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 8] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 7] = hc_byte_perm_S (w[ 0], 0, selector); w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 8: w[63] = hc_byte_perm_S (w[55], w[54], selector); w[62] = hc_byte_perm_S (w[54], w[53], selector); w[61] = hc_byte_perm_S (w[53], w[52], selector); w[60] = hc_byte_perm_S (w[52], w[51], selector); w[59] = hc_byte_perm_S (w[51], w[50], selector); w[58] = hc_byte_perm_S (w[50], w[49], selector); w[57] = hc_byte_perm_S (w[49], w[48], selector); w[56] = hc_byte_perm_S (w[48], w[47], selector); w[55] = hc_byte_perm_S (w[47], w[46], selector); w[54] = hc_byte_perm_S (w[46], w[45], selector); w[53] = hc_byte_perm_S (w[45], w[44], selector); w[52] = hc_byte_perm_S (w[44], w[43], selector); w[51] = hc_byte_perm_S (w[43], w[42], selector); w[50] = hc_byte_perm_S (w[42], w[41], selector); w[49] = hc_byte_perm_S (w[41], w[40], selector); w[48] = hc_byte_perm_S (w[40], w[39], selector); w[47] = hc_byte_perm_S (w[39], w[38], selector); w[46] = hc_byte_perm_S (w[38], w[37], selector); w[45] = hc_byte_perm_S (w[37], w[36], selector); w[44] = hc_byte_perm_S (w[36], w[35], selector); w[43] = hc_byte_perm_S (w[35], w[34], selector); w[42] = hc_byte_perm_S (w[34], w[33], selector); w[41] = hc_byte_perm_S (w[33], w[32], selector); w[40] = hc_byte_perm_S (w[32], w[31], selector); w[39] = hc_byte_perm_S (w[31], w[30], selector); w[38] = hc_byte_perm_S (w[30], w[29], selector); w[37] = hc_byte_perm_S (w[29], w[28], selector); w[36] = hc_byte_perm_S (w[28], w[27], selector); w[35] = hc_byte_perm_S (w[27], w[26], selector); w[34] = hc_byte_perm_S (w[26], w[25], selector); w[33] = hc_byte_perm_S (w[25], w[24], selector); w[32] = hc_byte_perm_S (w[24], w[23], selector); w[31] = hc_byte_perm_S (w[23], w[22], selector); w[30] = hc_byte_perm_S (w[22], w[21], selector); w[29] = hc_byte_perm_S (w[21], w[20], selector); w[28] = hc_byte_perm_S (w[20], w[19], selector); w[27] = hc_byte_perm_S (w[19], w[18], selector); w[26] = hc_byte_perm_S (w[18], w[17], selector); w[25] = hc_byte_perm_S (w[17], w[16], selector); w[24] = hc_byte_perm_S (w[16], w[15], selector); w[23] = hc_byte_perm_S (w[15], w[14], selector); w[22] = hc_byte_perm_S (w[14], w[13], selector); w[21] = hc_byte_perm_S (w[13], w[12], selector); w[20] = hc_byte_perm_S (w[12], w[11], selector); w[19] = hc_byte_perm_S (w[11], w[10], selector); w[18] = hc_byte_perm_S (w[10], w[ 9], selector); w[17] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[16] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[15] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[14] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[13] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[12] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[11] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[10] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[ 9] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 8] = hc_byte_perm_S (w[ 0], 0, selector); w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 9: w[63] = hc_byte_perm_S (w[54], w[53], selector); w[62] = hc_byte_perm_S (w[53], w[52], selector); w[61] = hc_byte_perm_S (w[52], w[51], selector); w[60] = hc_byte_perm_S (w[51], w[50], selector); w[59] = hc_byte_perm_S (w[50], w[49], selector); w[58] = hc_byte_perm_S (w[49], w[48], selector); w[57] = hc_byte_perm_S (w[48], w[47], selector); w[56] = hc_byte_perm_S (w[47], w[46], selector); w[55] = hc_byte_perm_S (w[46], w[45], selector); w[54] = hc_byte_perm_S (w[45], w[44], selector); w[53] = hc_byte_perm_S (w[44], w[43], selector); w[52] = hc_byte_perm_S (w[43], w[42], selector); w[51] = hc_byte_perm_S (w[42], w[41], selector); w[50] = hc_byte_perm_S (w[41], w[40], selector); w[49] = hc_byte_perm_S (w[40], w[39], selector); w[48] = hc_byte_perm_S (w[39], w[38], selector); w[47] = hc_byte_perm_S (w[38], w[37], selector); w[46] = hc_byte_perm_S (w[37], w[36], selector); w[45] = hc_byte_perm_S (w[36], w[35], selector); w[44] = hc_byte_perm_S (w[35], w[34], selector); w[43] = hc_byte_perm_S (w[34], w[33], selector); w[42] = hc_byte_perm_S (w[33], w[32], selector); w[41] = hc_byte_perm_S (w[32], w[31], selector); w[40] = hc_byte_perm_S (w[31], w[30], selector); w[39] = hc_byte_perm_S (w[30], w[29], selector); w[38] = hc_byte_perm_S (w[29], w[28], selector); w[37] = hc_byte_perm_S (w[28], w[27], selector); w[36] = hc_byte_perm_S (w[27], w[26], selector); w[35] = hc_byte_perm_S (w[26], w[25], selector); w[34] = hc_byte_perm_S (w[25], w[24], selector); w[33] = hc_byte_perm_S (w[24], w[23], selector); w[32] = hc_byte_perm_S (w[23], w[22], selector); w[31] = hc_byte_perm_S (w[22], w[21], selector); w[30] = hc_byte_perm_S (w[21], w[20], selector); w[29] = hc_byte_perm_S (w[20], w[19], selector); w[28] = hc_byte_perm_S (w[19], w[18], selector); w[27] = hc_byte_perm_S (w[18], w[17], selector); w[26] = hc_byte_perm_S (w[17], w[16], selector); w[25] = hc_byte_perm_S (w[16], w[15], selector); w[24] = hc_byte_perm_S (w[15], w[14], selector); w[23] = hc_byte_perm_S (w[14], w[13], selector); w[22] = hc_byte_perm_S (w[13], w[12], selector); w[21] = hc_byte_perm_S (w[12], w[11], selector); w[20] = hc_byte_perm_S (w[11], w[10], selector); w[19] = hc_byte_perm_S (w[10], w[ 9], selector); w[18] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[17] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[16] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[15] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[14] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[13] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[12] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[11] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[10] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[ 9] = hc_byte_perm_S (w[ 0], 0, selector); w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 10: w[63] = hc_byte_perm_S (w[53], w[52], selector); w[62] = hc_byte_perm_S (w[52], w[51], selector); w[61] = hc_byte_perm_S (w[51], w[50], selector); w[60] = hc_byte_perm_S (w[50], w[49], selector); w[59] = hc_byte_perm_S (w[49], w[48], selector); w[58] = hc_byte_perm_S (w[48], w[47], selector); w[57] = hc_byte_perm_S (w[47], w[46], selector); w[56] = hc_byte_perm_S (w[46], w[45], selector); w[55] = hc_byte_perm_S (w[45], w[44], selector); w[54] = hc_byte_perm_S (w[44], w[43], selector); w[53] = hc_byte_perm_S (w[43], w[42], selector); w[52] = hc_byte_perm_S (w[42], w[41], selector); w[51] = hc_byte_perm_S (w[41], w[40], selector); w[50] = hc_byte_perm_S (w[40], w[39], selector); w[49] = hc_byte_perm_S (w[39], w[38], selector); w[48] = hc_byte_perm_S (w[38], w[37], selector); w[47] = hc_byte_perm_S (w[37], w[36], selector); w[46] = hc_byte_perm_S (w[36], w[35], selector); w[45] = hc_byte_perm_S (w[35], w[34], selector); w[44] = hc_byte_perm_S (w[34], w[33], selector); w[43] = hc_byte_perm_S (w[33], w[32], selector); w[42] = hc_byte_perm_S (w[32], w[31], selector); w[41] = hc_byte_perm_S (w[31], w[30], selector); w[40] = hc_byte_perm_S (w[30], w[29], selector); w[39] = hc_byte_perm_S (w[29], w[28], selector); w[38] = hc_byte_perm_S (w[28], w[27], selector); w[37] = hc_byte_perm_S (w[27], w[26], selector); w[36] = hc_byte_perm_S (w[26], w[25], selector); w[35] = hc_byte_perm_S (w[25], w[24], selector); w[34] = hc_byte_perm_S (w[24], w[23], selector); w[33] = hc_byte_perm_S (w[23], w[22], selector); w[32] = hc_byte_perm_S (w[22], w[21], selector); w[31] = hc_byte_perm_S (w[21], w[20], selector); w[30] = hc_byte_perm_S (w[20], w[19], selector); w[29] = hc_byte_perm_S (w[19], w[18], selector); w[28] = hc_byte_perm_S (w[18], w[17], selector); w[27] = hc_byte_perm_S (w[17], w[16], selector); w[26] = hc_byte_perm_S (w[16], w[15], selector); w[25] = hc_byte_perm_S (w[15], w[14], selector); w[24] = hc_byte_perm_S (w[14], w[13], selector); w[23] = hc_byte_perm_S (w[13], w[12], selector); w[22] = hc_byte_perm_S (w[12], w[11], selector); w[21] = hc_byte_perm_S (w[11], w[10], selector); w[20] = hc_byte_perm_S (w[10], w[ 9], selector); w[19] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[18] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[17] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[16] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[15] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[14] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[13] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[12] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[11] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[10] = hc_byte_perm_S (w[ 0], 0, selector); w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 11: w[63] = hc_byte_perm_S (w[52], w[51], selector); w[62] = hc_byte_perm_S (w[51], w[50], selector); w[61] = hc_byte_perm_S (w[50], w[49], selector); w[60] = hc_byte_perm_S (w[49], w[48], selector); w[59] = hc_byte_perm_S (w[48], w[47], selector); w[58] = hc_byte_perm_S (w[47], w[46], selector); w[57] = hc_byte_perm_S (w[46], w[45], selector); w[56] = hc_byte_perm_S (w[45], w[44], selector); w[55] = hc_byte_perm_S (w[44], w[43], selector); w[54] = hc_byte_perm_S (w[43], w[42], selector); w[53] = hc_byte_perm_S (w[42], w[41], selector); w[52] = hc_byte_perm_S (w[41], w[40], selector); w[51] = hc_byte_perm_S (w[40], w[39], selector); w[50] = hc_byte_perm_S (w[39], w[38], selector); w[49] = hc_byte_perm_S (w[38], w[37], selector); w[48] = hc_byte_perm_S (w[37], w[36], selector); w[47] = hc_byte_perm_S (w[36], w[35], selector); w[46] = hc_byte_perm_S (w[35], w[34], selector); w[45] = hc_byte_perm_S (w[34], w[33], selector); w[44] = hc_byte_perm_S (w[33], w[32], selector); w[43] = hc_byte_perm_S (w[32], w[31], selector); w[42] = hc_byte_perm_S (w[31], w[30], selector); w[41] = hc_byte_perm_S (w[30], w[29], selector); w[40] = hc_byte_perm_S (w[29], w[28], selector); w[39] = hc_byte_perm_S (w[28], w[27], selector); w[38] = hc_byte_perm_S (w[27], w[26], selector); w[37] = hc_byte_perm_S (w[26], w[25], selector); w[36] = hc_byte_perm_S (w[25], w[24], selector); w[35] = hc_byte_perm_S (w[24], w[23], selector); w[34] = hc_byte_perm_S (w[23], w[22], selector); w[33] = hc_byte_perm_S (w[22], w[21], selector); w[32] = hc_byte_perm_S (w[21], w[20], selector); w[31] = hc_byte_perm_S (w[20], w[19], selector); w[30] = hc_byte_perm_S (w[19], w[18], selector); w[29] = hc_byte_perm_S (w[18], w[17], selector); w[28] = hc_byte_perm_S (w[17], w[16], selector); w[27] = hc_byte_perm_S (w[16], w[15], selector); w[26] = hc_byte_perm_S (w[15], w[14], selector); w[25] = hc_byte_perm_S (w[14], w[13], selector); w[24] = hc_byte_perm_S (w[13], w[12], selector); w[23] = hc_byte_perm_S (w[12], w[11], selector); w[22] = hc_byte_perm_S (w[11], w[10], selector); w[21] = hc_byte_perm_S (w[10], w[ 9], selector); w[20] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[19] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[18] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[17] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[16] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[15] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[14] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[13] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[12] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[11] = hc_byte_perm_S (w[ 0], 0, selector); w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 12: w[63] = hc_byte_perm_S (w[51], w[50], selector); w[62] = hc_byte_perm_S (w[50], w[49], selector); w[61] = hc_byte_perm_S (w[49], w[48], selector); w[60] = hc_byte_perm_S (w[48], w[47], selector); w[59] = hc_byte_perm_S (w[47], w[46], selector); w[58] = hc_byte_perm_S (w[46], w[45], selector); w[57] = hc_byte_perm_S (w[45], w[44], selector); w[56] = hc_byte_perm_S (w[44], w[43], selector); w[55] = hc_byte_perm_S (w[43], w[42], selector); w[54] = hc_byte_perm_S (w[42], w[41], selector); w[53] = hc_byte_perm_S (w[41], w[40], selector); w[52] = hc_byte_perm_S (w[40], w[39], selector); w[51] = hc_byte_perm_S (w[39], w[38], selector); w[50] = hc_byte_perm_S (w[38], w[37], selector); w[49] = hc_byte_perm_S (w[37], w[36], selector); w[48] = hc_byte_perm_S (w[36], w[35], selector); w[47] = hc_byte_perm_S (w[35], w[34], selector); w[46] = hc_byte_perm_S (w[34], w[33], selector); w[45] = hc_byte_perm_S (w[33], w[32], selector); w[44] = hc_byte_perm_S (w[32], w[31], selector); w[43] = hc_byte_perm_S (w[31], w[30], selector); w[42] = hc_byte_perm_S (w[30], w[29], selector); w[41] = hc_byte_perm_S (w[29], w[28], selector); w[40] = hc_byte_perm_S (w[28], w[27], selector); w[39] = hc_byte_perm_S (w[27], w[26], selector); w[38] = hc_byte_perm_S (w[26], w[25], selector); w[37] = hc_byte_perm_S (w[25], w[24], selector); w[36] = hc_byte_perm_S (w[24], w[23], selector); w[35] = hc_byte_perm_S (w[23], w[22], selector); w[34] = hc_byte_perm_S (w[22], w[21], selector); w[33] = hc_byte_perm_S (w[21], w[20], selector); w[32] = hc_byte_perm_S (w[20], w[19], selector); w[31] = hc_byte_perm_S (w[19], w[18], selector); w[30] = hc_byte_perm_S (w[18], w[17], selector); w[29] = hc_byte_perm_S (w[17], w[16], selector); w[28] = hc_byte_perm_S (w[16], w[15], selector); w[27] = hc_byte_perm_S (w[15], w[14], selector); w[26] = hc_byte_perm_S (w[14], w[13], selector); w[25] = hc_byte_perm_S (w[13], w[12], selector); w[24] = hc_byte_perm_S (w[12], w[11], selector); w[23] = hc_byte_perm_S (w[11], w[10], selector); w[22] = hc_byte_perm_S (w[10], w[ 9], selector); w[21] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[20] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[19] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[18] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[17] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[16] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[15] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[14] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[13] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[12] = hc_byte_perm_S (w[ 0], 0, selector); w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 13: w[63] = hc_byte_perm_S (w[50], w[49], selector); w[62] = hc_byte_perm_S (w[49], w[48], selector); w[61] = hc_byte_perm_S (w[48], w[47], selector); w[60] = hc_byte_perm_S (w[47], w[46], selector); w[59] = hc_byte_perm_S (w[46], w[45], selector); w[58] = hc_byte_perm_S (w[45], w[44], selector); w[57] = hc_byte_perm_S (w[44], w[43], selector); w[56] = hc_byte_perm_S (w[43], w[42], selector); w[55] = hc_byte_perm_S (w[42], w[41], selector); w[54] = hc_byte_perm_S (w[41], w[40], selector); w[53] = hc_byte_perm_S (w[40], w[39], selector); w[52] = hc_byte_perm_S (w[39], w[38], selector); w[51] = hc_byte_perm_S (w[38], w[37], selector); w[50] = hc_byte_perm_S (w[37], w[36], selector); w[49] = hc_byte_perm_S (w[36], w[35], selector); w[48] = hc_byte_perm_S (w[35], w[34], selector); w[47] = hc_byte_perm_S (w[34], w[33], selector); w[46] = hc_byte_perm_S (w[33], w[32], selector); w[45] = hc_byte_perm_S (w[32], w[31], selector); w[44] = hc_byte_perm_S (w[31], w[30], selector); w[43] = hc_byte_perm_S (w[30], w[29], selector); w[42] = hc_byte_perm_S (w[29], w[28], selector); w[41] = hc_byte_perm_S (w[28], w[27], selector); w[40] = hc_byte_perm_S (w[27], w[26], selector); w[39] = hc_byte_perm_S (w[26], w[25], selector); w[38] = hc_byte_perm_S (w[25], w[24], selector); w[37] = hc_byte_perm_S (w[24], w[23], selector); w[36] = hc_byte_perm_S (w[23], w[22], selector); w[35] = hc_byte_perm_S (w[22], w[21], selector); w[34] = hc_byte_perm_S (w[21], w[20], selector); w[33] = hc_byte_perm_S (w[20], w[19], selector); w[32] = hc_byte_perm_S (w[19], w[18], selector); w[31] = hc_byte_perm_S (w[18], w[17], selector); w[30] = hc_byte_perm_S (w[17], w[16], selector); w[29] = hc_byte_perm_S (w[16], w[15], selector); w[28] = hc_byte_perm_S (w[15], w[14], selector); w[27] = hc_byte_perm_S (w[14], w[13], selector); w[26] = hc_byte_perm_S (w[13], w[12], selector); w[25] = hc_byte_perm_S (w[12], w[11], selector); w[24] = hc_byte_perm_S (w[11], w[10], selector); w[23] = hc_byte_perm_S (w[10], w[ 9], selector); w[22] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[21] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[20] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[19] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[18] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[17] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[16] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[15] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[14] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[13] = hc_byte_perm_S (w[ 0], 0, selector); w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 14: w[63] = hc_byte_perm_S (w[49], w[48], selector); w[62] = hc_byte_perm_S (w[48], w[47], selector); w[61] = hc_byte_perm_S (w[47], w[46], selector); w[60] = hc_byte_perm_S (w[46], w[45], selector); w[59] = hc_byte_perm_S (w[45], w[44], selector); w[58] = hc_byte_perm_S (w[44], w[43], selector); w[57] = hc_byte_perm_S (w[43], w[42], selector); w[56] = hc_byte_perm_S (w[42], w[41], selector); w[55] = hc_byte_perm_S (w[41], w[40], selector); w[54] = hc_byte_perm_S (w[40], w[39], selector); w[53] = hc_byte_perm_S (w[39], w[38], selector); w[52] = hc_byte_perm_S (w[38], w[37], selector); w[51] = hc_byte_perm_S (w[37], w[36], selector); w[50] = hc_byte_perm_S (w[36], w[35], selector); w[49] = hc_byte_perm_S (w[35], w[34], selector); w[48] = hc_byte_perm_S (w[34], w[33], selector); w[47] = hc_byte_perm_S (w[33], w[32], selector); w[46] = hc_byte_perm_S (w[32], w[31], selector); w[45] = hc_byte_perm_S (w[31], w[30], selector); w[44] = hc_byte_perm_S (w[30], w[29], selector); w[43] = hc_byte_perm_S (w[29], w[28], selector); w[42] = hc_byte_perm_S (w[28], w[27], selector); w[41] = hc_byte_perm_S (w[27], w[26], selector); w[40] = hc_byte_perm_S (w[26], w[25], selector); w[39] = hc_byte_perm_S (w[25], w[24], selector); w[38] = hc_byte_perm_S (w[24], w[23], selector); w[37] = hc_byte_perm_S (w[23], w[22], selector); w[36] = hc_byte_perm_S (w[22], w[21], selector); w[35] = hc_byte_perm_S (w[21], w[20], selector); w[34] = hc_byte_perm_S (w[20], w[19], selector); w[33] = hc_byte_perm_S (w[19], w[18], selector); w[32] = hc_byte_perm_S (w[18], w[17], selector); w[31] = hc_byte_perm_S (w[17], w[16], selector); w[30] = hc_byte_perm_S (w[16], w[15], selector); w[29] = hc_byte_perm_S (w[15], w[14], selector); w[28] = hc_byte_perm_S (w[14], w[13], selector); w[27] = hc_byte_perm_S (w[13], w[12], selector); w[26] = hc_byte_perm_S (w[12], w[11], selector); w[25] = hc_byte_perm_S (w[11], w[10], selector); w[24] = hc_byte_perm_S (w[10], w[ 9], selector); w[23] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[22] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[21] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[20] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[19] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[18] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[17] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[16] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[15] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[14] = hc_byte_perm_S (w[ 0], 0, selector); w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 15: w[63] = hc_byte_perm_S (w[48], w[47], selector); w[62] = hc_byte_perm_S (w[47], w[46], selector); w[61] = hc_byte_perm_S (w[46], w[45], selector); w[60] = hc_byte_perm_S (w[45], w[44], selector); w[59] = hc_byte_perm_S (w[44], w[43], selector); w[58] = hc_byte_perm_S (w[43], w[42], selector); w[57] = hc_byte_perm_S (w[42], w[41], selector); w[56] = hc_byte_perm_S (w[41], w[40], selector); w[55] = hc_byte_perm_S (w[40], w[39], selector); w[54] = hc_byte_perm_S (w[39], w[38], selector); w[53] = hc_byte_perm_S (w[38], w[37], selector); w[52] = hc_byte_perm_S (w[37], w[36], selector); w[51] = hc_byte_perm_S (w[36], w[35], selector); w[50] = hc_byte_perm_S (w[35], w[34], selector); w[49] = hc_byte_perm_S (w[34], w[33], selector); w[48] = hc_byte_perm_S (w[33], w[32], selector); w[47] = hc_byte_perm_S (w[32], w[31], selector); w[46] = hc_byte_perm_S (w[31], w[30], selector); w[45] = hc_byte_perm_S (w[30], w[29], selector); w[44] = hc_byte_perm_S (w[29], w[28], selector); w[43] = hc_byte_perm_S (w[28], w[27], selector); w[42] = hc_byte_perm_S (w[27], w[26], selector); w[41] = hc_byte_perm_S (w[26], w[25], selector); w[40] = hc_byte_perm_S (w[25], w[24], selector); w[39] = hc_byte_perm_S (w[24], w[23], selector); w[38] = hc_byte_perm_S (w[23], w[22], selector); w[37] = hc_byte_perm_S (w[22], w[21], selector); w[36] = hc_byte_perm_S (w[21], w[20], selector); w[35] = hc_byte_perm_S (w[20], w[19], selector); w[34] = hc_byte_perm_S (w[19], w[18], selector); w[33] = hc_byte_perm_S (w[18], w[17], selector); w[32] = hc_byte_perm_S (w[17], w[16], selector); w[31] = hc_byte_perm_S (w[16], w[15], selector); w[30] = hc_byte_perm_S (w[15], w[14], selector); w[29] = hc_byte_perm_S (w[14], w[13], selector); w[28] = hc_byte_perm_S (w[13], w[12], selector); w[27] = hc_byte_perm_S (w[12], w[11], selector); w[26] = hc_byte_perm_S (w[11], w[10], selector); w[25] = hc_byte_perm_S (w[10], w[ 9], selector); w[24] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[23] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[22] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[21] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[20] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[19] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[18] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[17] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[16] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[15] = hc_byte_perm_S (w[ 0], 0, selector); w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 16: w[63] = hc_byte_perm_S (w[47], w[46], selector); w[62] = hc_byte_perm_S (w[46], w[45], selector); w[61] = hc_byte_perm_S (w[45], w[44], selector); w[60] = hc_byte_perm_S (w[44], w[43], selector); w[59] = hc_byte_perm_S (w[43], w[42], selector); w[58] = hc_byte_perm_S (w[42], w[41], selector); w[57] = hc_byte_perm_S (w[41], w[40], selector); w[56] = hc_byte_perm_S (w[40], w[39], selector); w[55] = hc_byte_perm_S (w[39], w[38], selector); w[54] = hc_byte_perm_S (w[38], w[37], selector); w[53] = hc_byte_perm_S (w[37], w[36], selector); w[52] = hc_byte_perm_S (w[36], w[35], selector); w[51] = hc_byte_perm_S (w[35], w[34], selector); w[50] = hc_byte_perm_S (w[34], w[33], selector); w[49] = hc_byte_perm_S (w[33], w[32], selector); w[48] = hc_byte_perm_S (w[32], w[31], selector); w[47] = hc_byte_perm_S (w[31], w[30], selector); w[46] = hc_byte_perm_S (w[30], w[29], selector); w[45] = hc_byte_perm_S (w[29], w[28], selector); w[44] = hc_byte_perm_S (w[28], w[27], selector); w[43] = hc_byte_perm_S (w[27], w[26], selector); w[42] = hc_byte_perm_S (w[26], w[25], selector); w[41] = hc_byte_perm_S (w[25], w[24], selector); w[40] = hc_byte_perm_S (w[24], w[23], selector); w[39] = hc_byte_perm_S (w[23], w[22], selector); w[38] = hc_byte_perm_S (w[22], w[21], selector); w[37] = hc_byte_perm_S (w[21], w[20], selector); w[36] = hc_byte_perm_S (w[20], w[19], selector); w[35] = hc_byte_perm_S (w[19], w[18], selector); w[34] = hc_byte_perm_S (w[18], w[17], selector); w[33] = hc_byte_perm_S (w[17], w[16], selector); w[32] = hc_byte_perm_S (w[16], w[15], selector); w[31] = hc_byte_perm_S (w[15], w[14], selector); w[30] = hc_byte_perm_S (w[14], w[13], selector); w[29] = hc_byte_perm_S (w[13], w[12], selector); w[28] = hc_byte_perm_S (w[12], w[11], selector); w[27] = hc_byte_perm_S (w[11], w[10], selector); w[26] = hc_byte_perm_S (w[10], w[ 9], selector); w[25] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[24] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[23] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[22] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[21] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[20] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[19] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[18] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[17] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[16] = hc_byte_perm_S (w[ 0], 0, selector); w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 17: w[63] = hc_byte_perm_S (w[46], w[45], selector); w[62] = hc_byte_perm_S (w[45], w[44], selector); w[61] = hc_byte_perm_S (w[44], w[43], selector); w[60] = hc_byte_perm_S (w[43], w[42], selector); w[59] = hc_byte_perm_S (w[42], w[41], selector); w[58] = hc_byte_perm_S (w[41], w[40], selector); w[57] = hc_byte_perm_S (w[40], w[39], selector); w[56] = hc_byte_perm_S (w[39], w[38], selector); w[55] = hc_byte_perm_S (w[38], w[37], selector); w[54] = hc_byte_perm_S (w[37], w[36], selector); w[53] = hc_byte_perm_S (w[36], w[35], selector); w[52] = hc_byte_perm_S (w[35], w[34], selector); w[51] = hc_byte_perm_S (w[34], w[33], selector); w[50] = hc_byte_perm_S (w[33], w[32], selector); w[49] = hc_byte_perm_S (w[32], w[31], selector); w[48] = hc_byte_perm_S (w[31], w[30], selector); w[47] = hc_byte_perm_S (w[30], w[29], selector); w[46] = hc_byte_perm_S (w[29], w[28], selector); w[45] = hc_byte_perm_S (w[28], w[27], selector); w[44] = hc_byte_perm_S (w[27], w[26], selector); w[43] = hc_byte_perm_S (w[26], w[25], selector); w[42] = hc_byte_perm_S (w[25], w[24], selector); w[41] = hc_byte_perm_S (w[24], w[23], selector); w[40] = hc_byte_perm_S (w[23], w[22], selector); w[39] = hc_byte_perm_S (w[22], w[21], selector); w[38] = hc_byte_perm_S (w[21], w[20], selector); w[37] = hc_byte_perm_S (w[20], w[19], selector); w[36] = hc_byte_perm_S (w[19], w[18], selector); w[35] = hc_byte_perm_S (w[18], w[17], selector); w[34] = hc_byte_perm_S (w[17], w[16], selector); w[33] = hc_byte_perm_S (w[16], w[15], selector); w[32] = hc_byte_perm_S (w[15], w[14], selector); w[31] = hc_byte_perm_S (w[14], w[13], selector); w[30] = hc_byte_perm_S (w[13], w[12], selector); w[29] = hc_byte_perm_S (w[12], w[11], selector); w[28] = hc_byte_perm_S (w[11], w[10], selector); w[27] = hc_byte_perm_S (w[10], w[ 9], selector); w[26] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[25] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[24] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[23] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[22] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[21] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[20] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[19] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[18] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[17] = hc_byte_perm_S (w[ 0], 0, selector); w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 18: w[63] = hc_byte_perm_S (w[45], w[44], selector); w[62] = hc_byte_perm_S (w[44], w[43], selector); w[61] = hc_byte_perm_S (w[43], w[42], selector); w[60] = hc_byte_perm_S (w[42], w[41], selector); w[59] = hc_byte_perm_S (w[41], w[40], selector); w[58] = hc_byte_perm_S (w[40], w[39], selector); w[57] = hc_byte_perm_S (w[39], w[38], selector); w[56] = hc_byte_perm_S (w[38], w[37], selector); w[55] = hc_byte_perm_S (w[37], w[36], selector); w[54] = hc_byte_perm_S (w[36], w[35], selector); w[53] = hc_byte_perm_S (w[35], w[34], selector); w[52] = hc_byte_perm_S (w[34], w[33], selector); w[51] = hc_byte_perm_S (w[33], w[32], selector); w[50] = hc_byte_perm_S (w[32], w[31], selector); w[49] = hc_byte_perm_S (w[31], w[30], selector); w[48] = hc_byte_perm_S (w[30], w[29], selector); w[47] = hc_byte_perm_S (w[29], w[28], selector); w[46] = hc_byte_perm_S (w[28], w[27], selector); w[45] = hc_byte_perm_S (w[27], w[26], selector); w[44] = hc_byte_perm_S (w[26], w[25], selector); w[43] = hc_byte_perm_S (w[25], w[24], selector); w[42] = hc_byte_perm_S (w[24], w[23], selector); w[41] = hc_byte_perm_S (w[23], w[22], selector); w[40] = hc_byte_perm_S (w[22], w[21], selector); w[39] = hc_byte_perm_S (w[21], w[20], selector); w[38] = hc_byte_perm_S (w[20], w[19], selector); w[37] = hc_byte_perm_S (w[19], w[18], selector); w[36] = hc_byte_perm_S (w[18], w[17], selector); w[35] = hc_byte_perm_S (w[17], w[16], selector); w[34] = hc_byte_perm_S (w[16], w[15], selector); w[33] = hc_byte_perm_S (w[15], w[14], selector); w[32] = hc_byte_perm_S (w[14], w[13], selector); w[31] = hc_byte_perm_S (w[13], w[12], selector); w[30] = hc_byte_perm_S (w[12], w[11], selector); w[29] = hc_byte_perm_S (w[11], w[10], selector); w[28] = hc_byte_perm_S (w[10], w[ 9], selector); w[27] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[26] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[25] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[24] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[23] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[22] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[21] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[20] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[19] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[18] = hc_byte_perm_S (w[ 0], 0, selector); w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 19: w[63] = hc_byte_perm_S (w[44], w[43], selector); w[62] = hc_byte_perm_S (w[43], w[42], selector); w[61] = hc_byte_perm_S (w[42], w[41], selector); w[60] = hc_byte_perm_S (w[41], w[40], selector); w[59] = hc_byte_perm_S (w[40], w[39], selector); w[58] = hc_byte_perm_S (w[39], w[38], selector); w[57] = hc_byte_perm_S (w[38], w[37], selector); w[56] = hc_byte_perm_S (w[37], w[36], selector); w[55] = hc_byte_perm_S (w[36], w[35], selector); w[54] = hc_byte_perm_S (w[35], w[34], selector); w[53] = hc_byte_perm_S (w[34], w[33], selector); w[52] = hc_byte_perm_S (w[33], w[32], selector); w[51] = hc_byte_perm_S (w[32], w[31], selector); w[50] = hc_byte_perm_S (w[31], w[30], selector); w[49] = hc_byte_perm_S (w[30], w[29], selector); w[48] = hc_byte_perm_S (w[29], w[28], selector); w[47] = hc_byte_perm_S (w[28], w[27], selector); w[46] = hc_byte_perm_S (w[27], w[26], selector); w[45] = hc_byte_perm_S (w[26], w[25], selector); w[44] = hc_byte_perm_S (w[25], w[24], selector); w[43] = hc_byte_perm_S (w[24], w[23], selector); w[42] = hc_byte_perm_S (w[23], w[22], selector); w[41] = hc_byte_perm_S (w[22], w[21], selector); w[40] = hc_byte_perm_S (w[21], w[20], selector); w[39] = hc_byte_perm_S (w[20], w[19], selector); w[38] = hc_byte_perm_S (w[19], w[18], selector); w[37] = hc_byte_perm_S (w[18], w[17], selector); w[36] = hc_byte_perm_S (w[17], w[16], selector); w[35] = hc_byte_perm_S (w[16], w[15], selector); w[34] = hc_byte_perm_S (w[15], w[14], selector); w[33] = hc_byte_perm_S (w[14], w[13], selector); w[32] = hc_byte_perm_S (w[13], w[12], selector); w[31] = hc_byte_perm_S (w[12], w[11], selector); w[30] = hc_byte_perm_S (w[11], w[10], selector); w[29] = hc_byte_perm_S (w[10], w[ 9], selector); w[28] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[27] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[26] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[25] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[24] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[23] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[22] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[21] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[20] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[19] = hc_byte_perm_S (w[ 0], 0, selector); w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 20: w[63] = hc_byte_perm_S (w[43], w[42], selector); w[62] = hc_byte_perm_S (w[42], w[41], selector); w[61] = hc_byte_perm_S (w[41], w[40], selector); w[60] = hc_byte_perm_S (w[40], w[39], selector); w[59] = hc_byte_perm_S (w[39], w[38], selector); w[58] = hc_byte_perm_S (w[38], w[37], selector); w[57] = hc_byte_perm_S (w[37], w[36], selector); w[56] = hc_byte_perm_S (w[36], w[35], selector); w[55] = hc_byte_perm_S (w[35], w[34], selector); w[54] = hc_byte_perm_S (w[34], w[33], selector); w[53] = hc_byte_perm_S (w[33], w[32], selector); w[52] = hc_byte_perm_S (w[32], w[31], selector); w[51] = hc_byte_perm_S (w[31], w[30], selector); w[50] = hc_byte_perm_S (w[30], w[29], selector); w[49] = hc_byte_perm_S (w[29], w[28], selector); w[48] = hc_byte_perm_S (w[28], w[27], selector); w[47] = hc_byte_perm_S (w[27], w[26], selector); w[46] = hc_byte_perm_S (w[26], w[25], selector); w[45] = hc_byte_perm_S (w[25], w[24], selector); w[44] = hc_byte_perm_S (w[24], w[23], selector); w[43] = hc_byte_perm_S (w[23], w[22], selector); w[42] = hc_byte_perm_S (w[22], w[21], selector); w[41] = hc_byte_perm_S (w[21], w[20], selector); w[40] = hc_byte_perm_S (w[20], w[19], selector); w[39] = hc_byte_perm_S (w[19], w[18], selector); w[38] = hc_byte_perm_S (w[18], w[17], selector); w[37] = hc_byte_perm_S (w[17], w[16], selector); w[36] = hc_byte_perm_S (w[16], w[15], selector); w[35] = hc_byte_perm_S (w[15], w[14], selector); w[34] = hc_byte_perm_S (w[14], w[13], selector); w[33] = hc_byte_perm_S (w[13], w[12], selector); w[32] = hc_byte_perm_S (w[12], w[11], selector); w[31] = hc_byte_perm_S (w[11], w[10], selector); w[30] = hc_byte_perm_S (w[10], w[ 9], selector); w[29] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[28] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[27] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[26] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[25] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[24] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[23] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[22] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[21] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[20] = hc_byte_perm_S (w[ 0], 0, selector); w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 21: w[63] = hc_byte_perm_S (w[42], w[41], selector); w[62] = hc_byte_perm_S (w[41], w[40], selector); w[61] = hc_byte_perm_S (w[40], w[39], selector); w[60] = hc_byte_perm_S (w[39], w[38], selector); w[59] = hc_byte_perm_S (w[38], w[37], selector); w[58] = hc_byte_perm_S (w[37], w[36], selector); w[57] = hc_byte_perm_S (w[36], w[35], selector); w[56] = hc_byte_perm_S (w[35], w[34], selector); w[55] = hc_byte_perm_S (w[34], w[33], selector); w[54] = hc_byte_perm_S (w[33], w[32], selector); w[53] = hc_byte_perm_S (w[32], w[31], selector); w[52] = hc_byte_perm_S (w[31], w[30], selector); w[51] = hc_byte_perm_S (w[30], w[29], selector); w[50] = hc_byte_perm_S (w[29], w[28], selector); w[49] = hc_byte_perm_S (w[28], w[27], selector); w[48] = hc_byte_perm_S (w[27], w[26], selector); w[47] = hc_byte_perm_S (w[26], w[25], selector); w[46] = hc_byte_perm_S (w[25], w[24], selector); w[45] = hc_byte_perm_S (w[24], w[23], selector); w[44] = hc_byte_perm_S (w[23], w[22], selector); w[43] = hc_byte_perm_S (w[22], w[21], selector); w[42] = hc_byte_perm_S (w[21], w[20], selector); w[41] = hc_byte_perm_S (w[20], w[19], selector); w[40] = hc_byte_perm_S (w[19], w[18], selector); w[39] = hc_byte_perm_S (w[18], w[17], selector); w[38] = hc_byte_perm_S (w[17], w[16], selector); w[37] = hc_byte_perm_S (w[16], w[15], selector); w[36] = hc_byte_perm_S (w[15], w[14], selector); w[35] = hc_byte_perm_S (w[14], w[13], selector); w[34] = hc_byte_perm_S (w[13], w[12], selector); w[33] = hc_byte_perm_S (w[12], w[11], selector); w[32] = hc_byte_perm_S (w[11], w[10], selector); w[31] = hc_byte_perm_S (w[10], w[ 9], selector); w[30] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[29] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[28] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[27] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[26] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[25] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[24] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[23] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[22] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[21] = hc_byte_perm_S (w[ 0], 0, selector); w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 22: w[63] = hc_byte_perm_S (w[41], w[40], selector); w[62] = hc_byte_perm_S (w[40], w[39], selector); w[61] = hc_byte_perm_S (w[39], w[38], selector); w[60] = hc_byte_perm_S (w[38], w[37], selector); w[59] = hc_byte_perm_S (w[37], w[36], selector); w[58] = hc_byte_perm_S (w[36], w[35], selector); w[57] = hc_byte_perm_S (w[35], w[34], selector); w[56] = hc_byte_perm_S (w[34], w[33], selector); w[55] = hc_byte_perm_S (w[33], w[32], selector); w[54] = hc_byte_perm_S (w[32], w[31], selector); w[53] = hc_byte_perm_S (w[31], w[30], selector); w[52] = hc_byte_perm_S (w[30], w[29], selector); w[51] = hc_byte_perm_S (w[29], w[28], selector); w[50] = hc_byte_perm_S (w[28], w[27], selector); w[49] = hc_byte_perm_S (w[27], w[26], selector); w[48] = hc_byte_perm_S (w[26], w[25], selector); w[47] = hc_byte_perm_S (w[25], w[24], selector); w[46] = hc_byte_perm_S (w[24], w[23], selector); w[45] = hc_byte_perm_S (w[23], w[22], selector); w[44] = hc_byte_perm_S (w[22], w[21], selector); w[43] = hc_byte_perm_S (w[21], w[20], selector); w[42] = hc_byte_perm_S (w[20], w[19], selector); w[41] = hc_byte_perm_S (w[19], w[18], selector); w[40] = hc_byte_perm_S (w[18], w[17], selector); w[39] = hc_byte_perm_S (w[17], w[16], selector); w[38] = hc_byte_perm_S (w[16], w[15], selector); w[37] = hc_byte_perm_S (w[15], w[14], selector); w[36] = hc_byte_perm_S (w[14], w[13], selector); w[35] = hc_byte_perm_S (w[13], w[12], selector); w[34] = hc_byte_perm_S (w[12], w[11], selector); w[33] = hc_byte_perm_S (w[11], w[10], selector); w[32] = hc_byte_perm_S (w[10], w[ 9], selector); w[31] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[30] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[29] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[28] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[27] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[26] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[25] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[24] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[23] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[22] = hc_byte_perm_S (w[ 0], 0, selector); w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 23: w[63] = hc_byte_perm_S (w[40], w[39], selector); w[62] = hc_byte_perm_S (w[39], w[38], selector); w[61] = hc_byte_perm_S (w[38], w[37], selector); w[60] = hc_byte_perm_S (w[37], w[36], selector); w[59] = hc_byte_perm_S (w[36], w[35], selector); w[58] = hc_byte_perm_S (w[35], w[34], selector); w[57] = hc_byte_perm_S (w[34], w[33], selector); w[56] = hc_byte_perm_S (w[33], w[32], selector); w[55] = hc_byte_perm_S (w[32], w[31], selector); w[54] = hc_byte_perm_S (w[31], w[30], selector); w[53] = hc_byte_perm_S (w[30], w[29], selector); w[52] = hc_byte_perm_S (w[29], w[28], selector); w[51] = hc_byte_perm_S (w[28], w[27], selector); w[50] = hc_byte_perm_S (w[27], w[26], selector); w[49] = hc_byte_perm_S (w[26], w[25], selector); w[48] = hc_byte_perm_S (w[25], w[24], selector); w[47] = hc_byte_perm_S (w[24], w[23], selector); w[46] = hc_byte_perm_S (w[23], w[22], selector); w[45] = hc_byte_perm_S (w[22], w[21], selector); w[44] = hc_byte_perm_S (w[21], w[20], selector); w[43] = hc_byte_perm_S (w[20], w[19], selector); w[42] = hc_byte_perm_S (w[19], w[18], selector); w[41] = hc_byte_perm_S (w[18], w[17], selector); w[40] = hc_byte_perm_S (w[17], w[16], selector); w[39] = hc_byte_perm_S (w[16], w[15], selector); w[38] = hc_byte_perm_S (w[15], w[14], selector); w[37] = hc_byte_perm_S (w[14], w[13], selector); w[36] = hc_byte_perm_S (w[13], w[12], selector); w[35] = hc_byte_perm_S (w[12], w[11], selector); w[34] = hc_byte_perm_S (w[11], w[10], selector); w[33] = hc_byte_perm_S (w[10], w[ 9], selector); w[32] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[31] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[30] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[29] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[28] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[27] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[26] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[25] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[24] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[23] = hc_byte_perm_S (w[ 0], 0, selector); w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 24: w[63] = hc_byte_perm_S (w[39], w[38], selector); w[62] = hc_byte_perm_S (w[38], w[37], selector); w[61] = hc_byte_perm_S (w[37], w[36], selector); w[60] = hc_byte_perm_S (w[36], w[35], selector); w[59] = hc_byte_perm_S (w[35], w[34], selector); w[58] = hc_byte_perm_S (w[34], w[33], selector); w[57] = hc_byte_perm_S (w[33], w[32], selector); w[56] = hc_byte_perm_S (w[32], w[31], selector); w[55] = hc_byte_perm_S (w[31], w[30], selector); w[54] = hc_byte_perm_S (w[30], w[29], selector); w[53] = hc_byte_perm_S (w[29], w[28], selector); w[52] = hc_byte_perm_S (w[28], w[27], selector); w[51] = hc_byte_perm_S (w[27], w[26], selector); w[50] = hc_byte_perm_S (w[26], w[25], selector); w[49] = hc_byte_perm_S (w[25], w[24], selector); w[48] = hc_byte_perm_S (w[24], w[23], selector); w[47] = hc_byte_perm_S (w[23], w[22], selector); w[46] = hc_byte_perm_S (w[22], w[21], selector); w[45] = hc_byte_perm_S (w[21], w[20], selector); w[44] = hc_byte_perm_S (w[20], w[19], selector); w[43] = hc_byte_perm_S (w[19], w[18], selector); w[42] = hc_byte_perm_S (w[18], w[17], selector); w[41] = hc_byte_perm_S (w[17], w[16], selector); w[40] = hc_byte_perm_S (w[16], w[15], selector); w[39] = hc_byte_perm_S (w[15], w[14], selector); w[38] = hc_byte_perm_S (w[14], w[13], selector); w[37] = hc_byte_perm_S (w[13], w[12], selector); w[36] = hc_byte_perm_S (w[12], w[11], selector); w[35] = hc_byte_perm_S (w[11], w[10], selector); w[34] = hc_byte_perm_S (w[10], w[ 9], selector); w[33] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[32] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[31] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[30] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[29] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[28] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[27] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[26] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[25] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[24] = hc_byte_perm_S (w[ 0], 0, selector); w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 25: w[63] = hc_byte_perm_S (w[38], w[37], selector); w[62] = hc_byte_perm_S (w[37], w[36], selector); w[61] = hc_byte_perm_S (w[36], w[35], selector); w[60] = hc_byte_perm_S (w[35], w[34], selector); w[59] = hc_byte_perm_S (w[34], w[33], selector); w[58] = hc_byte_perm_S (w[33], w[32], selector); w[57] = hc_byte_perm_S (w[32], w[31], selector); w[56] = hc_byte_perm_S (w[31], w[30], selector); w[55] = hc_byte_perm_S (w[30], w[29], selector); w[54] = hc_byte_perm_S (w[29], w[28], selector); w[53] = hc_byte_perm_S (w[28], w[27], selector); w[52] = hc_byte_perm_S (w[27], w[26], selector); w[51] = hc_byte_perm_S (w[26], w[25], selector); w[50] = hc_byte_perm_S (w[25], w[24], selector); w[49] = hc_byte_perm_S (w[24], w[23], selector); w[48] = hc_byte_perm_S (w[23], w[22], selector); w[47] = hc_byte_perm_S (w[22], w[21], selector); w[46] = hc_byte_perm_S (w[21], w[20], selector); w[45] = hc_byte_perm_S (w[20], w[19], selector); w[44] = hc_byte_perm_S (w[19], w[18], selector); w[43] = hc_byte_perm_S (w[18], w[17], selector); w[42] = hc_byte_perm_S (w[17], w[16], selector); w[41] = hc_byte_perm_S (w[16], w[15], selector); w[40] = hc_byte_perm_S (w[15], w[14], selector); w[39] = hc_byte_perm_S (w[14], w[13], selector); w[38] = hc_byte_perm_S (w[13], w[12], selector); w[37] = hc_byte_perm_S (w[12], w[11], selector); w[36] = hc_byte_perm_S (w[11], w[10], selector); w[35] = hc_byte_perm_S (w[10], w[ 9], selector); w[34] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[33] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[32] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[31] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[30] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[29] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[28] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[27] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[26] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[25] = hc_byte_perm_S (w[ 0], 0, selector); w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 26: w[63] = hc_byte_perm_S (w[37], w[36], selector); w[62] = hc_byte_perm_S (w[36], w[35], selector); w[61] = hc_byte_perm_S (w[35], w[34], selector); w[60] = hc_byte_perm_S (w[34], w[33], selector); w[59] = hc_byte_perm_S (w[33], w[32], selector); w[58] = hc_byte_perm_S (w[32], w[31], selector); w[57] = hc_byte_perm_S (w[31], w[30], selector); w[56] = hc_byte_perm_S (w[30], w[29], selector); w[55] = hc_byte_perm_S (w[29], w[28], selector); w[54] = hc_byte_perm_S (w[28], w[27], selector); w[53] = hc_byte_perm_S (w[27], w[26], selector); w[52] = hc_byte_perm_S (w[26], w[25], selector); w[51] = hc_byte_perm_S (w[25], w[24], selector); w[50] = hc_byte_perm_S (w[24], w[23], selector); w[49] = hc_byte_perm_S (w[23], w[22], selector); w[48] = hc_byte_perm_S (w[22], w[21], selector); w[47] = hc_byte_perm_S (w[21], w[20], selector); w[46] = hc_byte_perm_S (w[20], w[19], selector); w[45] = hc_byte_perm_S (w[19], w[18], selector); w[44] = hc_byte_perm_S (w[18], w[17], selector); w[43] = hc_byte_perm_S (w[17], w[16], selector); w[42] = hc_byte_perm_S (w[16], w[15], selector); w[41] = hc_byte_perm_S (w[15], w[14], selector); w[40] = hc_byte_perm_S (w[14], w[13], selector); w[39] = hc_byte_perm_S (w[13], w[12], selector); w[38] = hc_byte_perm_S (w[12], w[11], selector); w[37] = hc_byte_perm_S (w[11], w[10], selector); w[36] = hc_byte_perm_S (w[10], w[ 9], selector); w[35] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[34] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[33] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[32] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[31] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[30] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[29] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[28] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[27] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[26] = hc_byte_perm_S (w[ 0], 0, selector); w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 27: w[63] = hc_byte_perm_S (w[36], w[35], selector); w[62] = hc_byte_perm_S (w[35], w[34], selector); w[61] = hc_byte_perm_S (w[34], w[33], selector); w[60] = hc_byte_perm_S (w[33], w[32], selector); w[59] = hc_byte_perm_S (w[32], w[31], selector); w[58] = hc_byte_perm_S (w[31], w[30], selector); w[57] = hc_byte_perm_S (w[30], w[29], selector); w[56] = hc_byte_perm_S (w[29], w[28], selector); w[55] = hc_byte_perm_S (w[28], w[27], selector); w[54] = hc_byte_perm_S (w[27], w[26], selector); w[53] = hc_byte_perm_S (w[26], w[25], selector); w[52] = hc_byte_perm_S (w[25], w[24], selector); w[51] = hc_byte_perm_S (w[24], w[23], selector); w[50] = hc_byte_perm_S (w[23], w[22], selector); w[49] = hc_byte_perm_S (w[22], w[21], selector); w[48] = hc_byte_perm_S (w[21], w[20], selector); w[47] = hc_byte_perm_S (w[20], w[19], selector); w[46] = hc_byte_perm_S (w[19], w[18], selector); w[45] = hc_byte_perm_S (w[18], w[17], selector); w[44] = hc_byte_perm_S (w[17], w[16], selector); w[43] = hc_byte_perm_S (w[16], w[15], selector); w[42] = hc_byte_perm_S (w[15], w[14], selector); w[41] = hc_byte_perm_S (w[14], w[13], selector); w[40] = hc_byte_perm_S (w[13], w[12], selector); w[39] = hc_byte_perm_S (w[12], w[11], selector); w[38] = hc_byte_perm_S (w[11], w[10], selector); w[37] = hc_byte_perm_S (w[10], w[ 9], selector); w[36] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[35] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[34] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[33] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[32] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[31] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[30] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[29] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[28] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[27] = hc_byte_perm_S (w[ 0], 0, selector); w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 28: w[63] = hc_byte_perm_S (w[35], w[34], selector); w[62] = hc_byte_perm_S (w[34], w[33], selector); w[61] = hc_byte_perm_S (w[33], w[32], selector); w[60] = hc_byte_perm_S (w[32], w[31], selector); w[59] = hc_byte_perm_S (w[31], w[30], selector); w[58] = hc_byte_perm_S (w[30], w[29], selector); w[57] = hc_byte_perm_S (w[29], w[28], selector); w[56] = hc_byte_perm_S (w[28], w[27], selector); w[55] = hc_byte_perm_S (w[27], w[26], selector); w[54] = hc_byte_perm_S (w[26], w[25], selector); w[53] = hc_byte_perm_S (w[25], w[24], selector); w[52] = hc_byte_perm_S (w[24], w[23], selector); w[51] = hc_byte_perm_S (w[23], w[22], selector); w[50] = hc_byte_perm_S (w[22], w[21], selector); w[49] = hc_byte_perm_S (w[21], w[20], selector); w[48] = hc_byte_perm_S (w[20], w[19], selector); w[47] = hc_byte_perm_S (w[19], w[18], selector); w[46] = hc_byte_perm_S (w[18], w[17], selector); w[45] = hc_byte_perm_S (w[17], w[16], selector); w[44] = hc_byte_perm_S (w[16], w[15], selector); w[43] = hc_byte_perm_S (w[15], w[14], selector); w[42] = hc_byte_perm_S (w[14], w[13], selector); w[41] = hc_byte_perm_S (w[13], w[12], selector); w[40] = hc_byte_perm_S (w[12], w[11], selector); w[39] = hc_byte_perm_S (w[11], w[10], selector); w[38] = hc_byte_perm_S (w[10], w[ 9], selector); w[37] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[36] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[35] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[34] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[33] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[32] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[31] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[30] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[29] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[28] = hc_byte_perm_S (w[ 0], 0, selector); w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 29: w[63] = hc_byte_perm_S (w[34], w[33], selector); w[62] = hc_byte_perm_S (w[33], w[32], selector); w[61] = hc_byte_perm_S (w[32], w[31], selector); w[60] = hc_byte_perm_S (w[31], w[30], selector); w[59] = hc_byte_perm_S (w[30], w[29], selector); w[58] = hc_byte_perm_S (w[29], w[28], selector); w[57] = hc_byte_perm_S (w[28], w[27], selector); w[56] = hc_byte_perm_S (w[27], w[26], selector); w[55] = hc_byte_perm_S (w[26], w[25], selector); w[54] = hc_byte_perm_S (w[25], w[24], selector); w[53] = hc_byte_perm_S (w[24], w[23], selector); w[52] = hc_byte_perm_S (w[23], w[22], selector); w[51] = hc_byte_perm_S (w[22], w[21], selector); w[50] = hc_byte_perm_S (w[21], w[20], selector); w[49] = hc_byte_perm_S (w[20], w[19], selector); w[48] = hc_byte_perm_S (w[19], w[18], selector); w[47] = hc_byte_perm_S (w[18], w[17], selector); w[46] = hc_byte_perm_S (w[17], w[16], selector); w[45] = hc_byte_perm_S (w[16], w[15], selector); w[44] = hc_byte_perm_S (w[15], w[14], selector); w[43] = hc_byte_perm_S (w[14], w[13], selector); w[42] = hc_byte_perm_S (w[13], w[12], selector); w[41] = hc_byte_perm_S (w[12], w[11], selector); w[40] = hc_byte_perm_S (w[11], w[10], selector); w[39] = hc_byte_perm_S (w[10], w[ 9], selector); w[38] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[37] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[36] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[35] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[34] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[33] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[32] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[31] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[30] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[29] = hc_byte_perm_S (w[ 0], 0, selector); w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 30: w[63] = hc_byte_perm_S (w[33], w[32], selector); w[62] = hc_byte_perm_S (w[32], w[31], selector); w[61] = hc_byte_perm_S (w[31], w[30], selector); w[60] = hc_byte_perm_S (w[30], w[29], selector); w[59] = hc_byte_perm_S (w[29], w[28], selector); w[58] = hc_byte_perm_S (w[28], w[27], selector); w[57] = hc_byte_perm_S (w[27], w[26], selector); w[56] = hc_byte_perm_S (w[26], w[25], selector); w[55] = hc_byte_perm_S (w[25], w[24], selector); w[54] = hc_byte_perm_S (w[24], w[23], selector); w[53] = hc_byte_perm_S (w[23], w[22], selector); w[52] = hc_byte_perm_S (w[22], w[21], selector); w[51] = hc_byte_perm_S (w[21], w[20], selector); w[50] = hc_byte_perm_S (w[20], w[19], selector); w[49] = hc_byte_perm_S (w[19], w[18], selector); w[48] = hc_byte_perm_S (w[18], w[17], selector); w[47] = hc_byte_perm_S (w[17], w[16], selector); w[46] = hc_byte_perm_S (w[16], w[15], selector); w[45] = hc_byte_perm_S (w[15], w[14], selector); w[44] = hc_byte_perm_S (w[14], w[13], selector); w[43] = hc_byte_perm_S (w[13], w[12], selector); w[42] = hc_byte_perm_S (w[12], w[11], selector); w[41] = hc_byte_perm_S (w[11], w[10], selector); w[40] = hc_byte_perm_S (w[10], w[ 9], selector); w[39] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[38] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[37] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[36] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[35] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[34] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[33] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[32] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[31] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[30] = hc_byte_perm_S (w[ 0], 0, selector); w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 31: w[63] = hc_byte_perm_S (w[32], w[31], selector); w[62] = hc_byte_perm_S (w[31], w[30], selector); w[61] = hc_byte_perm_S (w[30], w[29], selector); w[60] = hc_byte_perm_S (w[29], w[28], selector); w[59] = hc_byte_perm_S (w[28], w[27], selector); w[58] = hc_byte_perm_S (w[27], w[26], selector); w[57] = hc_byte_perm_S (w[26], w[25], selector); w[56] = hc_byte_perm_S (w[25], w[24], selector); w[55] = hc_byte_perm_S (w[24], w[23], selector); w[54] = hc_byte_perm_S (w[23], w[22], selector); w[53] = hc_byte_perm_S (w[22], w[21], selector); w[52] = hc_byte_perm_S (w[21], w[20], selector); w[51] = hc_byte_perm_S (w[20], w[19], selector); w[50] = hc_byte_perm_S (w[19], w[18], selector); w[49] = hc_byte_perm_S (w[18], w[17], selector); w[48] = hc_byte_perm_S (w[17], w[16], selector); w[47] = hc_byte_perm_S (w[16], w[15], selector); w[46] = hc_byte_perm_S (w[15], w[14], selector); w[45] = hc_byte_perm_S (w[14], w[13], selector); w[44] = hc_byte_perm_S (w[13], w[12], selector); w[43] = hc_byte_perm_S (w[12], w[11], selector); w[42] = hc_byte_perm_S (w[11], w[10], selector); w[41] = hc_byte_perm_S (w[10], w[ 9], selector); w[40] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[39] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[38] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[37] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[36] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[35] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[34] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[33] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[32] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[31] = hc_byte_perm_S (w[ 0], 0, selector); w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 32: w[63] = hc_byte_perm_S (w[31], w[30], selector); w[62] = hc_byte_perm_S (w[30], w[29], selector); w[61] = hc_byte_perm_S (w[29], w[28], selector); w[60] = hc_byte_perm_S (w[28], w[27], selector); w[59] = hc_byte_perm_S (w[27], w[26], selector); w[58] = hc_byte_perm_S (w[26], w[25], selector); w[57] = hc_byte_perm_S (w[25], w[24], selector); w[56] = hc_byte_perm_S (w[24], w[23], selector); w[55] = hc_byte_perm_S (w[23], w[22], selector); w[54] = hc_byte_perm_S (w[22], w[21], selector); w[53] = hc_byte_perm_S (w[21], w[20], selector); w[52] = hc_byte_perm_S (w[20], w[19], selector); w[51] = hc_byte_perm_S (w[19], w[18], selector); w[50] = hc_byte_perm_S (w[18], w[17], selector); w[49] = hc_byte_perm_S (w[17], w[16], selector); w[48] = hc_byte_perm_S (w[16], w[15], selector); w[47] = hc_byte_perm_S (w[15], w[14], selector); w[46] = hc_byte_perm_S (w[14], w[13], selector); w[45] = hc_byte_perm_S (w[13], w[12], selector); w[44] = hc_byte_perm_S (w[12], w[11], selector); w[43] = hc_byte_perm_S (w[11], w[10], selector); w[42] = hc_byte_perm_S (w[10], w[ 9], selector); w[41] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[40] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[39] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[38] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[37] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[36] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[35] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[34] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[33] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[32] = hc_byte_perm_S (w[ 0], 0, selector); w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 33: w[63] = hc_byte_perm_S (w[30], w[29], selector); w[62] = hc_byte_perm_S (w[29], w[28], selector); w[61] = hc_byte_perm_S (w[28], w[27], selector); w[60] = hc_byte_perm_S (w[27], w[26], selector); w[59] = hc_byte_perm_S (w[26], w[25], selector); w[58] = hc_byte_perm_S (w[25], w[24], selector); w[57] = hc_byte_perm_S (w[24], w[23], selector); w[56] = hc_byte_perm_S (w[23], w[22], selector); w[55] = hc_byte_perm_S (w[22], w[21], selector); w[54] = hc_byte_perm_S (w[21], w[20], selector); w[53] = hc_byte_perm_S (w[20], w[19], selector); w[52] = hc_byte_perm_S (w[19], w[18], selector); w[51] = hc_byte_perm_S (w[18], w[17], selector); w[50] = hc_byte_perm_S (w[17], w[16], selector); w[49] = hc_byte_perm_S (w[16], w[15], selector); w[48] = hc_byte_perm_S (w[15], w[14], selector); w[47] = hc_byte_perm_S (w[14], w[13], selector); w[46] = hc_byte_perm_S (w[13], w[12], selector); w[45] = hc_byte_perm_S (w[12], w[11], selector); w[44] = hc_byte_perm_S (w[11], w[10], selector); w[43] = hc_byte_perm_S (w[10], w[ 9], selector); w[42] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[41] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[40] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[39] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[38] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[37] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[36] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[35] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[34] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[33] = hc_byte_perm_S (w[ 0], 0, selector); w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 34: w[63] = hc_byte_perm_S (w[29], w[28], selector); w[62] = hc_byte_perm_S (w[28], w[27], selector); w[61] = hc_byte_perm_S (w[27], w[26], selector); w[60] = hc_byte_perm_S (w[26], w[25], selector); w[59] = hc_byte_perm_S (w[25], w[24], selector); w[58] = hc_byte_perm_S (w[24], w[23], selector); w[57] = hc_byte_perm_S (w[23], w[22], selector); w[56] = hc_byte_perm_S (w[22], w[21], selector); w[55] = hc_byte_perm_S (w[21], w[20], selector); w[54] = hc_byte_perm_S (w[20], w[19], selector); w[53] = hc_byte_perm_S (w[19], w[18], selector); w[52] = hc_byte_perm_S (w[18], w[17], selector); w[51] = hc_byte_perm_S (w[17], w[16], selector); w[50] = hc_byte_perm_S (w[16], w[15], selector); w[49] = hc_byte_perm_S (w[15], w[14], selector); w[48] = hc_byte_perm_S (w[14], w[13], selector); w[47] = hc_byte_perm_S (w[13], w[12], selector); w[46] = hc_byte_perm_S (w[12], w[11], selector); w[45] = hc_byte_perm_S (w[11], w[10], selector); w[44] = hc_byte_perm_S (w[10], w[ 9], selector); w[43] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[42] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[41] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[40] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[39] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[38] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[37] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[36] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[35] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[34] = hc_byte_perm_S (w[ 0], 0, selector); w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 35: w[63] = hc_byte_perm_S (w[28], w[27], selector); w[62] = hc_byte_perm_S (w[27], w[26], selector); w[61] = hc_byte_perm_S (w[26], w[25], selector); w[60] = hc_byte_perm_S (w[25], w[24], selector); w[59] = hc_byte_perm_S (w[24], w[23], selector); w[58] = hc_byte_perm_S (w[23], w[22], selector); w[57] = hc_byte_perm_S (w[22], w[21], selector); w[56] = hc_byte_perm_S (w[21], w[20], selector); w[55] = hc_byte_perm_S (w[20], w[19], selector); w[54] = hc_byte_perm_S (w[19], w[18], selector); w[53] = hc_byte_perm_S (w[18], w[17], selector); w[52] = hc_byte_perm_S (w[17], w[16], selector); w[51] = hc_byte_perm_S (w[16], w[15], selector); w[50] = hc_byte_perm_S (w[15], w[14], selector); w[49] = hc_byte_perm_S (w[14], w[13], selector); w[48] = hc_byte_perm_S (w[13], w[12], selector); w[47] = hc_byte_perm_S (w[12], w[11], selector); w[46] = hc_byte_perm_S (w[11], w[10], selector); w[45] = hc_byte_perm_S (w[10], w[ 9], selector); w[44] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[43] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[42] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[41] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[40] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[39] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[38] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[37] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[36] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[35] = hc_byte_perm_S (w[ 0], 0, selector); w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 36: w[63] = hc_byte_perm_S (w[27], w[26], selector); w[62] = hc_byte_perm_S (w[26], w[25], selector); w[61] = hc_byte_perm_S (w[25], w[24], selector); w[60] = hc_byte_perm_S (w[24], w[23], selector); w[59] = hc_byte_perm_S (w[23], w[22], selector); w[58] = hc_byte_perm_S (w[22], w[21], selector); w[57] = hc_byte_perm_S (w[21], w[20], selector); w[56] = hc_byte_perm_S (w[20], w[19], selector); w[55] = hc_byte_perm_S (w[19], w[18], selector); w[54] = hc_byte_perm_S (w[18], w[17], selector); w[53] = hc_byte_perm_S (w[17], w[16], selector); w[52] = hc_byte_perm_S (w[16], w[15], selector); w[51] = hc_byte_perm_S (w[15], w[14], selector); w[50] = hc_byte_perm_S (w[14], w[13], selector); w[49] = hc_byte_perm_S (w[13], w[12], selector); w[48] = hc_byte_perm_S (w[12], w[11], selector); w[47] = hc_byte_perm_S (w[11], w[10], selector); w[46] = hc_byte_perm_S (w[10], w[ 9], selector); w[45] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[44] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[43] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[42] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[41] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[40] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[39] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[38] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[37] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[36] = hc_byte_perm_S (w[ 0], 0, selector); w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 37: w[63] = hc_byte_perm_S (w[26], w[25], selector); w[62] = hc_byte_perm_S (w[25], w[24], selector); w[61] = hc_byte_perm_S (w[24], w[23], selector); w[60] = hc_byte_perm_S (w[23], w[22], selector); w[59] = hc_byte_perm_S (w[22], w[21], selector); w[58] = hc_byte_perm_S (w[21], w[20], selector); w[57] = hc_byte_perm_S (w[20], w[19], selector); w[56] = hc_byte_perm_S (w[19], w[18], selector); w[55] = hc_byte_perm_S (w[18], w[17], selector); w[54] = hc_byte_perm_S (w[17], w[16], selector); w[53] = hc_byte_perm_S (w[16], w[15], selector); w[52] = hc_byte_perm_S (w[15], w[14], selector); w[51] = hc_byte_perm_S (w[14], w[13], selector); w[50] = hc_byte_perm_S (w[13], w[12], selector); w[49] = hc_byte_perm_S (w[12], w[11], selector); w[48] = hc_byte_perm_S (w[11], w[10], selector); w[47] = hc_byte_perm_S (w[10], w[ 9], selector); w[46] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[45] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[44] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[43] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[42] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[41] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[40] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[39] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[38] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[37] = hc_byte_perm_S (w[ 0], 0, selector); w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 38: w[63] = hc_byte_perm_S (w[25], w[24], selector); w[62] = hc_byte_perm_S (w[24], w[23], selector); w[61] = hc_byte_perm_S (w[23], w[22], selector); w[60] = hc_byte_perm_S (w[22], w[21], selector); w[59] = hc_byte_perm_S (w[21], w[20], selector); w[58] = hc_byte_perm_S (w[20], w[19], selector); w[57] = hc_byte_perm_S (w[19], w[18], selector); w[56] = hc_byte_perm_S (w[18], w[17], selector); w[55] = hc_byte_perm_S (w[17], w[16], selector); w[54] = hc_byte_perm_S (w[16], w[15], selector); w[53] = hc_byte_perm_S (w[15], w[14], selector); w[52] = hc_byte_perm_S (w[14], w[13], selector); w[51] = hc_byte_perm_S (w[13], w[12], selector); w[50] = hc_byte_perm_S (w[12], w[11], selector); w[49] = hc_byte_perm_S (w[11], w[10], selector); w[48] = hc_byte_perm_S (w[10], w[ 9], selector); w[47] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[46] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[45] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[44] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[43] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[42] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[41] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[40] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[39] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[38] = hc_byte_perm_S (w[ 0], 0, selector); w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 39: w[63] = hc_byte_perm_S (w[24], w[23], selector); w[62] = hc_byte_perm_S (w[23], w[22], selector); w[61] = hc_byte_perm_S (w[22], w[21], selector); w[60] = hc_byte_perm_S (w[21], w[20], selector); w[59] = hc_byte_perm_S (w[20], w[19], selector); w[58] = hc_byte_perm_S (w[19], w[18], selector); w[57] = hc_byte_perm_S (w[18], w[17], selector); w[56] = hc_byte_perm_S (w[17], w[16], selector); w[55] = hc_byte_perm_S (w[16], w[15], selector); w[54] = hc_byte_perm_S (w[15], w[14], selector); w[53] = hc_byte_perm_S (w[14], w[13], selector); w[52] = hc_byte_perm_S (w[13], w[12], selector); w[51] = hc_byte_perm_S (w[12], w[11], selector); w[50] = hc_byte_perm_S (w[11], w[10], selector); w[49] = hc_byte_perm_S (w[10], w[ 9], selector); w[48] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[47] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[46] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[45] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[44] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[43] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[42] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[41] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[40] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[39] = hc_byte_perm_S (w[ 0], 0, selector); w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 40: w[63] = hc_byte_perm_S (w[23], w[22], selector); w[62] = hc_byte_perm_S (w[22], w[21], selector); w[61] = hc_byte_perm_S (w[21], w[20], selector); w[60] = hc_byte_perm_S (w[20], w[19], selector); w[59] = hc_byte_perm_S (w[19], w[18], selector); w[58] = hc_byte_perm_S (w[18], w[17], selector); w[57] = hc_byte_perm_S (w[17], w[16], selector); w[56] = hc_byte_perm_S (w[16], w[15], selector); w[55] = hc_byte_perm_S (w[15], w[14], selector); w[54] = hc_byte_perm_S (w[14], w[13], selector); w[53] = hc_byte_perm_S (w[13], w[12], selector); w[52] = hc_byte_perm_S (w[12], w[11], selector); w[51] = hc_byte_perm_S (w[11], w[10], selector); w[50] = hc_byte_perm_S (w[10], w[ 9], selector); w[49] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[48] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[47] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[46] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[45] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[44] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[43] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[42] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[41] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[40] = hc_byte_perm_S (w[ 0], 0, selector); w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 41: w[63] = hc_byte_perm_S (w[22], w[21], selector); w[62] = hc_byte_perm_S (w[21], w[20], selector); w[61] = hc_byte_perm_S (w[20], w[19], selector); w[60] = hc_byte_perm_S (w[19], w[18], selector); w[59] = hc_byte_perm_S (w[18], w[17], selector); w[58] = hc_byte_perm_S (w[17], w[16], selector); w[57] = hc_byte_perm_S (w[16], w[15], selector); w[56] = hc_byte_perm_S (w[15], w[14], selector); w[55] = hc_byte_perm_S (w[14], w[13], selector); w[54] = hc_byte_perm_S (w[13], w[12], selector); w[53] = hc_byte_perm_S (w[12], w[11], selector); w[52] = hc_byte_perm_S (w[11], w[10], selector); w[51] = hc_byte_perm_S (w[10], w[ 9], selector); w[50] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[49] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[48] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[47] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[46] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[45] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[44] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[43] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[42] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[41] = hc_byte_perm_S (w[ 0], 0, selector); w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 42: w[63] = hc_byte_perm_S (w[21], w[20], selector); w[62] = hc_byte_perm_S (w[20], w[19], selector); w[61] = hc_byte_perm_S (w[19], w[18], selector); w[60] = hc_byte_perm_S (w[18], w[17], selector); w[59] = hc_byte_perm_S (w[17], w[16], selector); w[58] = hc_byte_perm_S (w[16], w[15], selector); w[57] = hc_byte_perm_S (w[15], w[14], selector); w[56] = hc_byte_perm_S (w[14], w[13], selector); w[55] = hc_byte_perm_S (w[13], w[12], selector); w[54] = hc_byte_perm_S (w[12], w[11], selector); w[53] = hc_byte_perm_S (w[11], w[10], selector); w[52] = hc_byte_perm_S (w[10], w[ 9], selector); w[51] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[50] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[49] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[48] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[47] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[46] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[45] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[44] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[43] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[42] = hc_byte_perm_S (w[ 0], 0, selector); w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 43: w[63] = hc_byte_perm_S (w[20], w[19], selector); w[62] = hc_byte_perm_S (w[19], w[18], selector); w[61] = hc_byte_perm_S (w[18], w[17], selector); w[60] = hc_byte_perm_S (w[17], w[16], selector); w[59] = hc_byte_perm_S (w[16], w[15], selector); w[58] = hc_byte_perm_S (w[15], w[14], selector); w[57] = hc_byte_perm_S (w[14], w[13], selector); w[56] = hc_byte_perm_S (w[13], w[12], selector); w[55] = hc_byte_perm_S (w[12], w[11], selector); w[54] = hc_byte_perm_S (w[11], w[10], selector); w[53] = hc_byte_perm_S (w[10], w[ 9], selector); w[52] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[51] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[50] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[49] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[48] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[47] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[46] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[45] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[44] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[43] = hc_byte_perm_S (w[ 0], 0, selector); w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 44: w[63] = hc_byte_perm_S (w[19], w[18], selector); w[62] = hc_byte_perm_S (w[18], w[17], selector); w[61] = hc_byte_perm_S (w[17], w[16], selector); w[60] = hc_byte_perm_S (w[16], w[15], selector); w[59] = hc_byte_perm_S (w[15], w[14], selector); w[58] = hc_byte_perm_S (w[14], w[13], selector); w[57] = hc_byte_perm_S (w[13], w[12], selector); w[56] = hc_byte_perm_S (w[12], w[11], selector); w[55] = hc_byte_perm_S (w[11], w[10], selector); w[54] = hc_byte_perm_S (w[10], w[ 9], selector); w[53] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[52] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[51] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[50] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[49] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[48] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[47] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[46] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[45] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[44] = hc_byte_perm_S (w[ 0], 0, selector); w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 45: w[63] = hc_byte_perm_S (w[18], w[17], selector); w[62] = hc_byte_perm_S (w[17], w[16], selector); w[61] = hc_byte_perm_S (w[16], w[15], selector); w[60] = hc_byte_perm_S (w[15], w[14], selector); w[59] = hc_byte_perm_S (w[14], w[13], selector); w[58] = hc_byte_perm_S (w[13], w[12], selector); w[57] = hc_byte_perm_S (w[12], w[11], selector); w[56] = hc_byte_perm_S (w[11], w[10], selector); w[55] = hc_byte_perm_S (w[10], w[ 9], selector); w[54] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[53] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[52] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[51] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[50] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[49] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[48] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[47] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[46] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[45] = hc_byte_perm_S (w[ 0], 0, selector); w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 46: w[63] = hc_byte_perm_S (w[17], w[16], selector); w[62] = hc_byte_perm_S (w[16], w[15], selector); w[61] = hc_byte_perm_S (w[15], w[14], selector); w[60] = hc_byte_perm_S (w[14], w[13], selector); w[59] = hc_byte_perm_S (w[13], w[12], selector); w[58] = hc_byte_perm_S (w[12], w[11], selector); w[57] = hc_byte_perm_S (w[11], w[10], selector); w[56] = hc_byte_perm_S (w[10], w[ 9], selector); w[55] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[54] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[53] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[52] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[51] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[50] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[49] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[48] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[47] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[46] = hc_byte_perm_S (w[ 0], 0, selector); w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 47: w[63] = hc_byte_perm_S (w[16], w[15], selector); w[62] = hc_byte_perm_S (w[15], w[14], selector); w[61] = hc_byte_perm_S (w[14], w[13], selector); w[60] = hc_byte_perm_S (w[13], w[12], selector); w[59] = hc_byte_perm_S (w[12], w[11], selector); w[58] = hc_byte_perm_S (w[11], w[10], selector); w[57] = hc_byte_perm_S (w[10], w[ 9], selector); w[56] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[55] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[54] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[53] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[52] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[51] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[50] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[49] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[48] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[47] = hc_byte_perm_S (w[ 0], 0, selector); w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 48: w[63] = hc_byte_perm_S (w[15], w[14], selector); w[62] = hc_byte_perm_S (w[14], w[13], selector); w[61] = hc_byte_perm_S (w[13], w[12], selector); w[60] = hc_byte_perm_S (w[12], w[11], selector); w[59] = hc_byte_perm_S (w[11], w[10], selector); w[58] = hc_byte_perm_S (w[10], w[ 9], selector); w[57] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[56] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[55] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[54] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[53] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[52] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[51] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[50] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[49] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[48] = hc_byte_perm_S (w[ 0], 0, selector); w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 49: w[63] = hc_byte_perm_S (w[14], w[13], selector); w[62] = hc_byte_perm_S (w[13], w[12], selector); w[61] = hc_byte_perm_S (w[12], w[11], selector); w[60] = hc_byte_perm_S (w[11], w[10], selector); w[59] = hc_byte_perm_S (w[10], w[ 9], selector); w[58] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[57] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[56] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[55] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[54] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[53] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[52] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[51] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[50] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[49] = hc_byte_perm_S (w[ 0], 0, selector); w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 50: w[63] = hc_byte_perm_S (w[13], w[12], selector); w[62] = hc_byte_perm_S (w[12], w[11], selector); w[61] = hc_byte_perm_S (w[11], w[10], selector); w[60] = hc_byte_perm_S (w[10], w[ 9], selector); w[59] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[58] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[57] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[56] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[55] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[54] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[53] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[52] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[51] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[50] = hc_byte_perm_S (w[ 0], 0, selector); w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 51: w[63] = hc_byte_perm_S (w[12], w[11], selector); w[62] = hc_byte_perm_S (w[11], w[10], selector); w[61] = hc_byte_perm_S (w[10], w[ 9], selector); w[60] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[59] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[58] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[57] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[56] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[55] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[54] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[53] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[52] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[51] = hc_byte_perm_S (w[ 0], 0, selector); w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 52: w[63] = hc_byte_perm_S (w[11], w[10], selector); w[62] = hc_byte_perm_S (w[10], w[ 9], selector); w[61] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[60] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[59] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[58] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[57] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[56] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[55] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[54] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[53] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[52] = hc_byte_perm_S (w[ 0], 0, selector); w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 53: w[63] = hc_byte_perm_S (w[10], w[ 9], selector); w[62] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[61] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[60] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[59] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[58] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[57] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[56] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[55] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[54] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[53] = hc_byte_perm_S (w[ 0], 0, selector); w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 54: w[63] = hc_byte_perm_S (w[ 9], w[ 8], selector); w[62] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[61] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[60] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[59] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[58] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[57] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[56] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[55] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[54] = hc_byte_perm_S (w[ 0], 0, selector); w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 55: w[63] = hc_byte_perm_S (w[ 8], w[ 7], selector); w[62] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[61] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[60] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[59] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[58] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[57] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[56] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[55] = hc_byte_perm_S (w[ 0], 0, selector); w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 56: w[63] = hc_byte_perm_S (w[ 7], w[ 6], selector); w[62] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[61] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[60] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[59] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[58] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[57] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[56] = hc_byte_perm_S (w[ 0], 0, selector); w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 57: w[63] = hc_byte_perm_S (w[ 6], w[ 5], selector); w[62] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[61] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[60] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[59] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[58] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[57] = hc_byte_perm_S (w[ 0], 0, selector); w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 58: w[63] = hc_byte_perm_S (w[ 5], w[ 4], selector); w[62] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[61] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[60] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[59] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[58] = hc_byte_perm_S (w[ 0], 0, selector); w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 59: w[63] = hc_byte_perm_S (w[ 4], w[ 3], selector); w[62] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[61] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[60] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[59] = hc_byte_perm_S (w[ 0], 0, selector); w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 60: w[63] = hc_byte_perm_S (w[ 3], w[ 2], selector); w[62] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[61] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[60] = hc_byte_perm_S (w[ 0], 0, selector); w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 61: w[63] = hc_byte_perm_S (w[ 2], w[ 1], selector); w[62] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[61] = hc_byte_perm_S (w[ 0], 0, selector); w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 62: w[63] = hc_byte_perm_S (w[ 1], w[ 0], selector); w[62] = hc_byte_perm_S (w[ 0], 0, selector); w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; case 63: w[63] = hc_byte_perm_S (w[ 0], 0, selector); w[62] = 0; w[61] = 0; w[60] = 0; w[59] = 0; w[58] = 0; w[57] = 0; w[56] = 0; w[55] = 0; w[54] = 0; w[53] = 0; w[52] = 0; w[51] = 0; w[50] = 0; w[49] = 0; w[48] = 0; w[47] = 0; w[46] = 0; w[45] = 0; w[44] = 0; w[43] = 0; w[42] = 0; w[41] = 0; w[40] = 0; w[39] = 0; w[38] = 0; w[37] = 0; w[36] = 0; w[35] = 0; w[34] = 0; w[33] = 0; w[32] = 0; w[31] = 0; w[30] = 0; w[29] = 0; w[28] = 0; w[27] = 0; w[26] = 0; w[25] = 0; w[24] = 0; w[23] = 0; w[22] = 0; w[21] = 0; w[20] = 0; w[19] = 0; w[18] = 0; w[17] = 0; w[16] = 0; w[15] = 0; w[14] = 0; w[13] = 0; w[12] = 0; w[11] = 0; w[10] = 0; w[ 9] = 0; w[ 8] = 0; w[ 7] = 0; w[ 6] = 0; w[ 5] = 0; w[ 4] = 0; w[ 3] = 0; w[ 2] = 0; w[ 1] = 0; w[ 0] = 0; break; } #endif } /** * vector functions on scalar types (for inner loop usage) */ #define PACKVS2(sn,vn,e) \ sn[0] = vn[0].s##e; \ sn[1] = vn[1].s##e; #define PACKSV2(sn,vn,e) \ vn[0].s##e = sn[0]; \ vn[1].s##e = sn[1]; #define PACKVS24(s0,s1,v0,v1,e) \ PACKVS4 (s0, v0, e); \ PACKVS4 (s1, v1, e); #define PACKSV24(s0,s1,v0,v1,e) \ PACKSV4 (s0, v0, e); \ PACKSV4 (s1, v1, e); #define PACKVS4(sn,vn,e) \ sn[0] = vn[0].s##e; \ sn[1] = vn[1].s##e; \ sn[2] = vn[2].s##e; \ sn[3] = vn[3].s##e; #define PACKSV4(sn,vn,e) \ vn[0].s##e = sn[0]; \ vn[1].s##e = sn[1]; \ vn[2].s##e = sn[2]; \ vn[3].s##e = sn[3]; #define PACKVS44(s0,s1,s2,s3,v0,v1,v2,v3,e) \ PACKVS4 (s0, v0, e); \ PACKVS4 (s1, v1, e); \ PACKVS4 (s2, v2, e); \ PACKVS4 (s3, v3, e); #define PACKSV44(s0,s1,s2,s3,v0,v1,v2,v3,e) \ PACKSV4 (s0, v0, e); \ PACKSV4 (s1, v1, e); \ PACKSV4 (s2, v2, e); \ PACKSV4 (s3, v3, e); #define PACKVS84(s0,s1,s2,s3,s4,s5,s6,s7,v0,v1,v2,v3,v4,v5,v6,v7,e) \ PACKVS4 (s0, v0, e); \ PACKVS4 (s1, v1, e); \ PACKVS4 (s2, v2, e); \ PACKVS4 (s3, v3, e); \ PACKVS4 (s4, v4, e); \ PACKVS4 (s5, v5, e); \ PACKVS4 (s6, v6, e); \ PACKVS4 (s7, v7, e); #define PACKSV84(s0,s1,s2,s3,s4,s5,s6,s7,v0,v1,v2,v3,v4,v5,v6,v7,e) \ PACKSV4 (s0, v0, e); \ PACKSV4 (s1, v1, e); \ PACKSV4 (s2, v2, e); \ PACKSV4 (s3, v3, e); \ PACKSV4 (s4, v4, e); \ PACKSV4 (s5, v5, e); \ PACKSV4 (s6, v6, e); \ PACKSV4 (s7, v7, e); DECLSPEC void switch_buffer_by_offset_le_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset) { #if VECT_SIZE == 1 switch_buffer_by_offset_le_S (w0, w1, w2, w3, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; #endif #if VECT_SIZE == 2 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); #elif VECT_SIZE == 4 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); #elif VECT_SIZE == 8 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); #elif VECT_SIZE == 16 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); switch_buffer_by_offset_le_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f); #endif } DECLSPEC void switch_buffer_by_offset_8x4_le_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32x offset) { #if VECT_SIZE == 1 switch_buffer_by_offset_8x4_le_S (w0, w1, w2, w3, w4, w5, w6, w7, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; u32 t4[4]; u32 t5[4]; u32 t6[4]; u32 t7[4]; #endif #if VECT_SIZE == 2 // 1 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); // 2 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); #elif VECT_SIZE == 4 // 1 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); // 2 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); // 3 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); // 4 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); #elif VECT_SIZE == 8 // 1 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); // 2 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); // 3 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); // 4 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); // 5 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s4); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4); // 6 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s5); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5); // 7 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s6); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6); // 8 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s7); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7); #elif VECT_SIZE == 16 // 1 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s0); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 0); // 2 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s1); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 1); // 3 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s2); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 2); // 4 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s3); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 3); // 5 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s4); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 4); // 6 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s5); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 5); // 7 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s6); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 6); // 8 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s7); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 7); // 9 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 8); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s8); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 8); // 10 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 9); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.s9); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, 9); // 11 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, a); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sa); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, a); // 12 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, b); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sb); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, b); // 13 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, c); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sc); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, c); // 14 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, d); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sd); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, d); // 15 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, e); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.se); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, e); // 16 PACKVS84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, f); switch_buffer_by_offset_8x4_le_S (t0, t1, t2, t3, t4, t5, t6, t7, offset.sf); PACKSV84 (t0, t1, t2, t3, t4, t5, t6, t7, w0, w1, w2, w3, w4, w5, w6, w7, f); #endif } DECLSPEC void append_0x01_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset) { #if VECT_SIZE == 1 append_0x01_2x4_S (w0, w1, offset); #else u32 t0[4]; u32 t1[4]; #endif #if VECT_SIZE == 2 PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); #elif VECT_SIZE == 4 PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); #elif VECT_SIZE == 8 PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x01_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x01_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x01_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x01_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); #elif VECT_SIZE == 16 PACKVS24 (t0, t1, w0, w1, 0); append_0x01_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x01_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x01_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x01_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x01_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x01_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x01_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x01_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); PACKVS24 (t0, t1, w0, w1, 8); append_0x01_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8); PACKVS24 (t0, t1, w0, w1, 9); append_0x01_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9); PACKVS24 (t0, t1, w0, w1, a); append_0x01_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a); PACKVS24 (t0, t1, w0, w1, b); append_0x01_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b); PACKVS24 (t0, t1, w0, w1, c); append_0x01_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c); PACKVS24 (t0, t1, w0, w1, d); append_0x01_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d); PACKVS24 (t0, t1, w0, w1, e); append_0x01_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e); PACKVS24 (t0, t1, w0, w1, f); append_0x01_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f); #endif } DECLSPEC void append_0x01_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset) { #if VECT_SIZE == 1 append_0x01_4x4_S (w0, w1, w2, w3, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; #endif #if VECT_SIZE == 2 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x01_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x01_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); #elif VECT_SIZE == 4 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x01_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x01_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x01_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x01_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); #elif VECT_SIZE == 8 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x01_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x01_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x01_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x01_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x01_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x01_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x01_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x01_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); #elif VECT_SIZE == 16 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x01_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x01_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x01_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x01_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x01_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x01_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x01_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x01_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); append_0x01_4x4_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); append_0x01_4x4_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); append_0x01_4x4_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); append_0x01_4x4_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); append_0x01_4x4_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); append_0x01_4x4_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); append_0x01_4x4_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); append_0x01_4x4_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f); #endif } DECLSPEC void append_0x06_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset) { #if VECT_SIZE == 1 append_0x06_2x4_S (w0, w1, offset); #else u32 t0[4]; u32 t1[4]; #endif #if VECT_SIZE == 2 PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); #elif VECT_SIZE == 4 PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); #elif VECT_SIZE == 8 PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); #elif VECT_SIZE == 16 PACKVS24 (t0, t1, w0, w1, 0); append_0x06_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x06_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x06_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x06_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x06_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x06_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x06_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x06_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); PACKVS24 (t0, t1, w0, w1, 8); append_0x06_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8); PACKVS24 (t0, t1, w0, w1, 9); append_0x06_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9); PACKVS24 (t0, t1, w0, w1, a); append_0x06_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a); PACKVS24 (t0, t1, w0, w1, b); append_0x06_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b); PACKVS24 (t0, t1, w0, w1, c); append_0x06_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c); PACKVS24 (t0, t1, w0, w1, d); append_0x06_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d); PACKVS24 (t0, t1, w0, w1, e); append_0x06_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e); PACKVS24 (t0, t1, w0, w1, f); append_0x06_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f); #endif } DECLSPEC void append_0x80_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset) { #if VECT_SIZE == 1 append_0x80_2x4_S (w0, w1, offset); #else u32 t0[4]; u32 t1[4]; #endif #if VECT_SIZE == 2 PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); #elif VECT_SIZE == 4 PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); #elif VECT_SIZE == 8 PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x80_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x80_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x80_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x80_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); #elif VECT_SIZE == 16 PACKVS24 (t0, t1, w0, w1, 0); append_0x80_2x4_S (t0, t1, offset.s0); PACKSV24 (t0, t1, w0, w1, 0); PACKVS24 (t0, t1, w0, w1, 1); append_0x80_2x4_S (t0, t1, offset.s1); PACKSV24 (t0, t1, w0, w1, 1); PACKVS24 (t0, t1, w0, w1, 2); append_0x80_2x4_S (t0, t1, offset.s2); PACKSV24 (t0, t1, w0, w1, 2); PACKVS24 (t0, t1, w0, w1, 3); append_0x80_2x4_S (t0, t1, offset.s3); PACKSV24 (t0, t1, w0, w1, 3); PACKVS24 (t0, t1, w0, w1, 4); append_0x80_2x4_S (t0, t1, offset.s4); PACKSV24 (t0, t1, w0, w1, 4); PACKVS24 (t0, t1, w0, w1, 5); append_0x80_2x4_S (t0, t1, offset.s5); PACKSV24 (t0, t1, w0, w1, 5); PACKVS24 (t0, t1, w0, w1, 6); append_0x80_2x4_S (t0, t1, offset.s6); PACKSV24 (t0, t1, w0, w1, 6); PACKVS24 (t0, t1, w0, w1, 7); append_0x80_2x4_S (t0, t1, offset.s7); PACKSV24 (t0, t1, w0, w1, 7); PACKVS24 (t0, t1, w0, w1, 8); append_0x80_2x4_S (t0, t1, offset.s8); PACKSV24 (t0, t1, w0, w1, 8); PACKVS24 (t0, t1, w0, w1, 9); append_0x80_2x4_S (t0, t1, offset.s9); PACKSV24 (t0, t1, w0, w1, 9); PACKVS24 (t0, t1, w0, w1, a); append_0x80_2x4_S (t0, t1, offset.sa); PACKSV24 (t0, t1, w0, w1, a); PACKVS24 (t0, t1, w0, w1, b); append_0x80_2x4_S (t0, t1, offset.sb); PACKSV24 (t0, t1, w0, w1, b); PACKVS24 (t0, t1, w0, w1, c); append_0x80_2x4_S (t0, t1, offset.sc); PACKSV24 (t0, t1, w0, w1, c); PACKVS24 (t0, t1, w0, w1, d); append_0x80_2x4_S (t0, t1, offset.sd); PACKSV24 (t0, t1, w0, w1, d); PACKVS24 (t0, t1, w0, w1, e); append_0x80_2x4_S (t0, t1, offset.se); PACKSV24 (t0, t1, w0, w1, e); PACKVS24 (t0, t1, w0, w1, f); append_0x80_2x4_S (t0, t1, offset.sf); PACKSV24 (t0, t1, w0, w1, f); #endif } DECLSPEC void append_0x80_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset) { #if VECT_SIZE == 1 append_0x80_4x4_S (w0, w1, w2, w3, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; #endif #if VECT_SIZE == 2 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); #elif VECT_SIZE == 4 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); #elif VECT_SIZE == 8 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x80_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x80_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x80_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x80_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); #elif VECT_SIZE == 16 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x80_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x80_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x80_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x80_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x80_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x80_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x80_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x80_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); append_0x80_4x4_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); append_0x80_4x4_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); append_0x80_4x4_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); append_0x80_4x4_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); append_0x80_4x4_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); append_0x80_4x4_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); append_0x80_4x4_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); append_0x80_4x4_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f); #endif } DECLSPEC void append_0x2d_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset) { #if VECT_SIZE == 1 append_0x2d_4x4_S (w0, w1, w2, w3, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; #endif #if VECT_SIZE == 2 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); #elif VECT_SIZE == 4 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); #elif VECT_SIZE == 8 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); #elif VECT_SIZE == 16 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); append_0x2d_4x4_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); append_0x2d_4x4_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); append_0x2d_4x4_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); append_0x2d_4x4_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); append_0x2d_4x4_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); append_0x2d_4x4_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); append_0x2d_4x4_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f); #endif } DECLSPEC void append_0x3a_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset) { #if VECT_SIZE == 1 append_0x3a_4x4_S (w0, w1, w2, w3, offset); #else u32 t0[4]; u32 t1[4]; u32 t2[4]; u32 t3[4]; #endif #if VECT_SIZE == 2 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); #elif VECT_SIZE == 4 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); #elif VECT_SIZE == 8 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); #elif VECT_SIZE == 16 PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s0); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 0); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s1); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 1); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s2); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 2); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s3); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 3); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s4); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 4); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s5); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 5); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s6); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 6); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s7); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 7); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s8); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 8); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); append_0x3a_4x4_S (t0, t1, t2, t3, offset.s9); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, 9); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, a); append_0x3a_4x4_S (t0, t1, t2, t3, offset.sa); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, a); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, b); append_0x3a_4x4_S (t0, t1, t2, t3, offset.sb); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, b); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, c); append_0x3a_4x4_S (t0, t1, t2, t3, offset.sc); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, c); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, d); append_0x3a_4x4_S (t0, t1, t2, t3, offset.sd); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, d); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, e); append_0x3a_4x4_S (t0, t1, t2, t3, offset.se); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, e); PACKVS44 (t0, t1, t2, t3, w0, w1, w2, w3, f); append_0x3a_4x4_S (t0, t1, t2, t3, offset.sf); PACKSV44 (t0, t1, t2, t3, w0, w1, w2, w3, f); #endif }