From 892bb5183cf2b377f5183bfde62c895f8eec352a Mon Sep 17 00:00:00 2001 From: philsmd Date: Sun, 7 Jan 2024 11:56:16 +0100 Subject: [PATCH 01/57] update license year to 2024 --- docs/license.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/license.txt b/docs/license.txt index 93d2971ae..88a5d2361 100644 --- a/docs/license.txt +++ b/docs/license.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2023 Jens Steube +Copyright (c) 2015-2024 Jens Steube Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From c4bb788ad5cda4f656680f67e3caa0c84a57d175 Mon Sep 17 00:00:00 2001 From: its5Q Date: Thu, 22 Feb 2024 03:39:05 +1000 Subject: [PATCH 02/57] Add hashmodes 26620 and 26630 with dynamic iteration count --- OpenCL/m26620-pure.cl | 375 +++++++++++++++++++++++++++++++++ OpenCL/m26630-pure.cl | 413 ++++++++++++++++++++++++++++++++++++ src/modules/module_26620.c | 415 +++++++++++++++++++++++++++++++++++++ src/modules/module_26630.c | 395 +++++++++++++++++++++++++++++++++++ tools/metamask2hashcat.py | 16 +- 5 files changed, 1612 insertions(+), 2 deletions(-) create mode 100644 OpenCL/m26620-pure.cl create mode 100644 OpenCL/m26630-pure.cl create mode 100644 src/modules/module_26620.c create mode 100644 src/modules/module_26630.c mode change 100755 => 100644 tools/metamask2hashcat.py diff --git a/OpenCL/m26620-pure.cl b/OpenCL/m26620-pure.cl new file mode 100644 index 000000000..26ad55595 --- /dev/null +++ b/OpenCL/m26620-pure.cl @@ -0,0 +1,375 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha256.cl) +#include M2S(INCLUDE_PATH/inc_cipher_aes.cl) +#include M2S(INCLUDE_PATH/inc_cipher_aes-gcm.cl) +#endif + +#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) +#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[784]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *ipad, PRIVATE_AS u32x *opad, PRIVATE_AS u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26620_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m26620_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= GID_CNT) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < LOOP_CNT; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m26620_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= GID_CNT) return; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key_len = 32 * 8; + + u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; + + AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + u32 iv[4]; + + iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[0]; + iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[1]; + iv[2] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[2]; + iv[3] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[3]; + + const u32 iv_len = esalt_bufs[DIGESTS_OFFSET_HOST].iv_len; + + u32 J0[4] = { 0 }; + + AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); + + // ct + + u32 T[4] = { 0 }; + u32 S[4] = { 0 }; + + u32 S_len = 16; + u32 aad_buf[4] = { 0 }; + u32 aad_len = 0; + + AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf, esalt_bufs[DIGESTS_OFFSET_HOST].ct_len, S); + + AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); + + /* compare tag */ + + const u32 r0 = T[0]; + const u32 r1 = T[1]; + const u32 r2 = T[2]; + const u32 r3 = T[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/OpenCL/m26630-pure.cl b/OpenCL/m26630-pure.cl new file mode 100644 index 000000000..39727b730 --- /dev/null +++ b/OpenCL/m26630-pure.cl @@ -0,0 +1,413 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_sha256.cl) +#include M2S(INCLUDE_PATH/inc_cipher_aes.cl) +#include M2S(INCLUDE_PATH/inc_cipher_aes-gcm.cl) +#endif + +#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) +#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[16]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +DECLSPEC void hmac_sha256_run_V (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *ipad, PRIVATE_AS u32x *opad, PRIVATE_AS u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); +} + +KERNEL_FQ void m26630_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id (0); + + if (gid >= GID_CNT) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final (&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m26630_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + + if ((gid * VECT_SIZE) >= GID_CNT) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); + + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); + + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); + + for (u32 j = 0; j < LOOP_CNT; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); + + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m26630_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) +{ + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS (); + + #else + + CONSTANT_AS u32a *s_te0 = te0; + CONSTANT_AS u32a *s_te1 = te1; + CONSTANT_AS u32a *s_te2 = te2; + CONSTANT_AS u32a *s_te3 = te3; + CONSTANT_AS u32a *s_te4 = te4; + + #endif + + if (gid >= GID_CNT) return; + + const u32 digest_pos = LOOP_POS; + + const u32 digest_cur = DIGESTS_OFFSET_HOST + digest_pos; + + GLOBAL_AS const pbkdf2_sha256_aes_gcm_t *pbkdf2_sha256_aes_gcm = &esalt_bufs[digest_cur]; + + // keys + + u32 ukey[8]; + + ukey[0] = tmps[gid].out[0]; + ukey[1] = tmps[gid].out[1]; + ukey[2] = tmps[gid].out[2]; + ukey[3] = tmps[gid].out[3]; + ukey[4] = tmps[gid].out[4]; + ukey[5] = tmps[gid].out[5]; + ukey[6] = tmps[gid].out[6]; + ukey[7] = tmps[gid].out[7]; + + u32 key_len = 32 * 8; + + u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; + + AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); + + // iv + + u32 iv[4]; + + iv[0] = pbkdf2_sha256_aes_gcm->iv_buf[0]; + iv[1] = pbkdf2_sha256_aes_gcm->iv_buf[1]; + iv[2] = pbkdf2_sha256_aes_gcm->iv_buf[2]; + iv[3] = pbkdf2_sha256_aes_gcm->iv_buf[3]; + + const u32 iv_len = pbkdf2_sha256_aes_gcm->iv_len; + + u32 J0[4] = { 0 }; + + AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); + + u32 ct[8]; + + ct[0] = pbkdf2_sha256_aes_gcm->ct_buf[0]; // first block of ciphertext + ct[1] = pbkdf2_sha256_aes_gcm->ct_buf[1]; + ct[2] = pbkdf2_sha256_aes_gcm->ct_buf[2]; + ct[3] = pbkdf2_sha256_aes_gcm->ct_buf[3]; + ct[4] = pbkdf2_sha256_aes_gcm->ct_buf[4]; // second block of ciphertext + ct[5] = pbkdf2_sha256_aes_gcm->ct_buf[5]; + ct[6] = pbkdf2_sha256_aes_gcm->ct_buf[6]; + ct[7] = pbkdf2_sha256_aes_gcm->ct_buf[7]; + + u32 pt[8] = { 0 }; + + AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4); + + const int correct = is_valid_printable_32 (pt[0]) + + is_valid_printable_32 (pt[1]) + + is_valid_printable_32 (pt[2]) + + is_valid_printable_32 (pt[3]) + + is_valid_printable_32 (pt[4]) + + is_valid_printable_32 (pt[5]) + + is_valid_printable_32 (pt[6]) + + is_valid_printable_32 (pt[7]); + + if (correct != 8) return; + + /* + const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte + // only we can't guarantee it is not in the middle of a UTF8 byte stream at that point + + if (hc_enc_scan (pt, pt_len)) + { + hc_enc_t hc_enc; + + hc_enc_init (&hc_enc); + + while (hc_enc_has_next (&hc_enc, pt_len)) + { + u32 enc_buf[16] = { 0 }; + + const int enc_len = hc_enc_next (&hc_enc, pt, pt_len, 32, enc_buf, sizeof (enc_buf)); + + if (enc_len == -1) return; + } + } + */ + + const u32 r0 = ct[0]; + const u32 r1 = ct[1]; + const u32 r2 = ct[2]; + const u32 r3 = ct[3]; + + #define il_pos 0 + + #ifdef KERNEL_STATIC + #include COMPARE_M + #endif +} diff --git a/src/modules/module_26620.c b/src/modules/module_26620.c new file mode 100644 index 000000000..f3c4509ad --- /dev/null +++ b/src/modules/module_26620.c @@ -0,0 +1,415 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" +#include "memory.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_CRYPTOCURRENCY_WALLET; +static const char *HASH_NAME = "MetaMask Wallet (needs all data, checks AES-GCM tag, dynamic iterations)"; +static const u64 KERN_TYPE = 26620; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat1"; +// hash generated using with python3 tools/metamask2hashcat.py --vault tools/2hashcat_tests/metamask2hashcat.json +static const char *ST_HASH = "$metamask$600000$MBdUsmTcBHGCASECYr3gmD8XaJROwjhOegSWweCFhco=$N1aDRjt2ZD5x15Q1X9zVUw==$G2Qsp8BtQNz9E1vFlSebykcGkps5iJ53fBjJV8GIE7eolAQpz5m25MTZv0b0t2tpqXvfgEYKYZKsoLvuWWRHqj2ommt+U0l6fEGPY6lTn5PYzDtMKNpPWHKCIS/QYhwEA9/X5RtyIwDL6VqcRp6owV+/icmRlQa+TI5buHWZ+99Z1kBbpTDVYLBwhMRsODM1vYizQDg0vFIo3cQDtpRWUqpAKXhFcpgRD+9grS3pP/zdlIUn//87DZ3ue6Sn6WFOe08EvuY8sYZqTiN3GxcfESOltNbZJGcedMubt/jGsk+qIwUWC/f456UgeX9DN7i2pQBBI+L7qkY5v1WT5Y0i8uho1c2M2G8M9miO8HSm/j4bpMN1J6lPtjFhfAzEPS1go1w2vTmOtr1y+2A4M6HEOcxIrPJ8lUUH9pcN7Xpd+u/tQv8BYFxd6RlNYKLyA6OChbF+TD5Zz6oFZQtkprXqzZUFOlxeWJ373kHMISZtXOx44YGaiT2178fXgHFXavw="; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[784]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +static const char *SIGNATURE_METAMASK_WALLET = "$metamask$"; + +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + // Extra treatment for Apple systems + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + { + return jit_build_options; + } + + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + + // ROCM + if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + + return jit_build_options; +} + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (pbkdf2_sha256_aes_gcm_t); + + return esalt_size; +} + +u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 tmp_size = (const u64) sizeof (pbkdf2_sha256_tmp_t); + + return tmp_size; +} + +u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 8; + + return pw_min; +} + +u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // this overrides the reductions of PW_MAX in case optimized kernel is selected + // IOW, even in optimized kernel mode it support length 256 + + const u32 pw_max = PW_MAX; + + return pw_max; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + pbkdf2_sha256_aes_gcm_t *metamask = (pbkdf2_sha256_aes_gcm_t *) esalt_buf; + + #define CT_MAX_LEN_BASE64 (((3136+16) * 8) / 6) + 3 + + hc_token_t token; + + memset (&token, 0, sizeof (hc_token_t)); + + token.token_cnt = 5; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_METAMASK_WALLET; + + token.len[0] = 10; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + token.sep[1] = '$'; + token.len_min[1] = 1; + token.len_max[1] = 6; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.sep[2] = '$'; + token.len[2] = 44; + token.attr[2] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + token.sep[3] = '$'; + token.len[3] = 24; + token.attr[3] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + token.sep[4] = '$'; + token.len_min[4] = 64; + token.len_max[4] = CT_MAX_LEN_BASE64; + token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + u8 tmp_buf[CT_MAX_LEN_BASE64] = { 0 }; + + size_t tmp_len = 0; + + // iter + + const u8 *iter_pos = token.buf[1]; + + const int iter = strtol ((const char *) iter_pos, NULL, 10); + + if (iter < 1) return (PARSER_SALT_ITERATION); + + salt->salt_iter = iter - 1; + + // salt + + const u8 *salt_pos = token.buf[2]; + const int salt_len = token.len[2]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + tmp_len = base64_decode (base64_to_int, salt_pos, salt_len, tmp_buf); + + if (tmp_len != 32) return (PARSER_SALT_LENGTH); + + memcpy (salt->salt_buf, tmp_buf, tmp_len); + + salt->salt_len = tmp_len; + + metamask->salt_buf[0] = salt->salt_buf[0]; + metamask->salt_buf[1] = salt->salt_buf[1]; + metamask->salt_buf[2] = salt->salt_buf[2]; + metamask->salt_buf[3] = salt->salt_buf[3]; + metamask->salt_buf[4] = salt->salt_buf[4]; + metamask->salt_buf[5] = salt->salt_buf[5]; + metamask->salt_buf[6] = salt->salt_buf[6]; + metamask->salt_buf[7] = salt->salt_buf[7]; + + // iv + + const u8 *iv_pos = token.buf[3]; + const int iv_len = token.len[3]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + tmp_len = base64_decode (base64_to_int, iv_pos, iv_len, tmp_buf); + + if (tmp_len != 16) return (PARSER_IV_LENGTH); + + memcpy ((u8 *) metamask->iv_buf, tmp_buf, tmp_len); + + metamask->iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); + metamask->iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); + metamask->iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); + metamask->iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); + + metamask->iv_len = tmp_len; + + // ciphertext + + const u8 *ct_pos = token.buf[4]; + const int ct_len = token.len[4]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + tmp_len = base64_decode (base64_to_int, ct_pos, ct_len, tmp_buf); + + if (tmp_len <= 16) return (PARSER_CT_LENGTH); + + tmp_len -= 16; + + if (tmp_len < 30 || tmp_len > 3136) return (PARSER_CT_LENGTH); + + memcpy ((u8 *) metamask->ct_buf, tmp_buf, tmp_len); + + u32 j = tmp_len / 4; + + if ((tmp_len % 4) > 0) j++; + + for (u32 i = 0; i < j; i++) metamask->ct_buf[i] = byte_swap_32 (metamask->ct_buf[i]); + + metamask->ct_len = tmp_len; + + // tag + + u32 tag_buf[4] = { 0 }; + + memcpy ((u8 *) tag_buf, tmp_buf+metamask->ct_len, 16); + + digest[0] = byte_swap_32 (tag_buf[0]); + digest[1] = byte_swap_32 (tag_buf[1]); + digest[2] = byte_swap_32 (tag_buf[2]); + digest[3] = byte_swap_32 (tag_buf[3]); + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + const pbkdf2_sha256_aes_gcm_t *metamask = (const pbkdf2_sha256_aes_gcm_t *) esalt_buf; + + // salt + + #define SALT_LEN_BASE64 ((32 * 8) / 6) + 3 + #define IV_LEN_BASE64 ((16 * 8) / 6) + 3 + #define CT_MAX_LEN_BASE64 (((3136+16) * 8) / 6) + 3 + + u8 salt_buf[SALT_LEN_BASE64] = { 0 }; + + base64_encode (int_to_base64, (const u8 *) salt->salt_buf, (const int) salt->salt_len, salt_buf); + + // iv + + u32 tmp_iv_buf[4] = { 0 }; + + tmp_iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); + tmp_iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); + tmp_iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); + tmp_iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); + + u8 iv_buf[IV_LEN_BASE64+1] = { 0 }; + + base64_encode (int_to_base64, (const u8 *) tmp_iv_buf, (const int) metamask->iv_len, iv_buf); + + // ct + + u32 ct_len = metamask->ct_len; + + u32 j = ct_len / 4; + + if ((ct_len % 4) > 0) j++; + + u32 tmp_buf[788] = { 0 }; + + for (u32 i = 0; i < j; i++) tmp_buf[i] = byte_swap_32 (metamask->ct_buf[i]); + + u32 tmp_tag[4] = { 0 }; + + tmp_tag[0] = byte_swap_32 (digest[0]); + tmp_tag[1] = byte_swap_32 (digest[1]); + tmp_tag[2] = byte_swap_32 (digest[2]); + tmp_tag[3] = byte_swap_32 (digest[3]); + + u8 *tmp_buf_str = (u8 *) tmp_buf; + u8 *tmp_tag_str = (u8 *) tmp_tag; + + memcpy (tmp_buf_str+metamask->ct_len, tmp_tag_str, 16); + + u8 ct_buf[CT_MAX_LEN_BASE64] = { 0 }; + + base64_encode (int_to_base64, (const u8 *) tmp_buf, (const int) metamask->ct_len+16, ct_buf); + + u8 *out_buf = (u8 *) line_buf; + + int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt_buf, + iv_buf, + ct_buf); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_charset = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = module_pw_max; + module_ctx->module_pw_min = module_pw_min; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/src/modules/module_26630.c b/src/modules/module_26630.c new file mode 100644 index 000000000..f6d6f596f --- /dev/null +++ b/src/modules/module_26630.c @@ -0,0 +1,395 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" +#include "memory.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_CRYPTOCURRENCY_WALLET; +static const char *HASH_NAME = "MetaMask Wallet (short hash, plaintext check, dynamic iterations)"; +static const u64 KERN_TYPE = 26630; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE + | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_DEEP_COMP_KERNEL; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat1"; +static const char *ST_HASH = "$metamask-short$600000$MBdUsmTcBHGCASECYr3gmD8XaJROwjhOegSWweCFhco=$N1aDRjt2ZD5x15Q1X9zVUw==$G2Qsp8BtQNz9E1vFlSebykcGkps5iJ53fBjJV8GIE7eolAQpz5m25MTZv0b0t2tpqXvfgEYKYZKsoLvuWWRHqg=="; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +typedef struct pbkdf2_sha256_aes_gcm +{ + u32 salt_buf[64]; + u32 iv_buf[4]; + u32 iv_len; + u32 ct_buf[16]; + u32 ct_len; + +} pbkdf2_sha256_aes_gcm_t; + +static const char *SIGNATURE_METAMASK_WALLET = "$metamask-short$"; + +char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char *jit_build_options = NULL; + + // Extra treatment for Apple systems + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + { + return jit_build_options; + } + + // HIP + if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + + // ROCM + if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) + { + hc_asprintf (&jit_build_options, "-D _unroll"); + } + + return jit_build_options; +} + +u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) +{ + return KERN_RUN_3; +} + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (pbkdf2_sha256_aes_gcm_t); + + return esalt_size; +} + +u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 tmp_size = (const u64) sizeof (pbkdf2_sha256_tmp_t); + + return tmp_size; +} + +u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 8; + + return pw_min; +} + +u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // this overrides the reductions of PW_MAX in case optimized kernel is selected + // IOW, even in optimized kernel mode it support length 256 + + const u32 pw_max = PW_MAX; + + return pw_max; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + pbkdf2_sha256_aes_gcm_t *metamask = (pbkdf2_sha256_aes_gcm_t *) esalt_buf; + + hc_token_t token; + + memset (&token, 0, sizeof (hc_token_t)); + + token.token_cnt = 5; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_METAMASK_WALLET; + + token.len[0] = 16; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + token.sep[1] = '$'; + token.len_min[1] = 1; + token.len_max[1] = 6; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_DIGIT; + + token.sep[2] = '$'; + token.len[2] = 44; + token.attr[2] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + token.sep[3] = '$'; + token.len[3] = 24; + token.attr[3] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + token.sep[4] = '$'; + token.len_min[4] = 88; + token.len_max[4] = 88; + token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + // iter + + const u8 *iter_pos = token.buf[1]; + + const int iter = strtol ((const char *) iter_pos, NULL, 10); + + if (iter < 1) return (PARSER_SALT_ITERATION); + + salt->salt_iter = iter - 1; + + // salt + + const u8 *salt_pos = token.buf[2]; + const int salt_len = token.len[2]; + + u8 tmp_buf[88+1]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + size_t tmp_len = base64_decode (base64_to_int, salt_pos, salt_len, tmp_buf); + + if (tmp_len != 32) return (PARSER_SALT_LENGTH); + + memcpy (salt->salt_buf, tmp_buf, tmp_len); + + salt->salt_len = tmp_len; + + metamask->salt_buf[0] = salt->salt_buf[0]; + metamask->salt_buf[1] = salt->salt_buf[1]; + metamask->salt_buf[2] = salt->salt_buf[2]; + metamask->salt_buf[3] = salt->salt_buf[3]; + metamask->salt_buf[4] = salt->salt_buf[4]; + metamask->salt_buf[5] = salt->salt_buf[5]; + metamask->salt_buf[6] = salt->salt_buf[6]; + metamask->salt_buf[7] = salt->salt_buf[7]; + + // iv + + const u8 *iv_pos = token.buf[3]; + const int iv_len = token.len[3]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + tmp_len = base64_decode (base64_to_int, iv_pos, iv_len, tmp_buf); + + if (tmp_len != 16) return (PARSER_IV_LENGTH); + + memcpy ((u8 *) metamask->iv_buf, tmp_buf, tmp_len); + + metamask->iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); + metamask->iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); + metamask->iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); + metamask->iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); + + metamask->iv_len = tmp_len; + + // ciphertext + + const u8 *ct_pos = token.buf[4]; + const int ct_len = token.len[4]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + tmp_len = base64_decode (base64_to_int, ct_pos, ct_len, tmp_buf); + + if (tmp_len != 64) return (PARSER_CT_LENGTH); + + memcpy ((u8 *) metamask->ct_buf, tmp_buf, tmp_len); + + u32 j = tmp_len / 4; + + for (u32 i = 0; i < j; i++) + { + metamask->ct_buf[i] = byte_swap_32 (metamask->ct_buf[i]); + } + + metamask->ct_len = tmp_len; + + digest[0] = metamask->ct_buf[0]; + digest[1] = metamask->ct_buf[1]; + digest[2] = metamask->ct_buf[2]; + digest[3] = metamask->ct_buf[3]; + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const pbkdf2_sha256_aes_gcm_t *metamask = (const pbkdf2_sha256_aes_gcm_t *) esalt_buf; + + // salt + + u8 salt_buf[44+1]; + + memset (salt_buf, 0, sizeof (salt_buf)); + + base64_encode (int_to_base64, (const u8 *) salt->salt_buf, (const int) salt->salt_len, salt_buf); + + // iv + + u32 tmp_iv_buf[4]; + + tmp_iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); + tmp_iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); + tmp_iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); + tmp_iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); + + u8 iv_buf[24+1]; + + memset (iv_buf, 0, sizeof (iv_buf)); + + base64_encode (int_to_base64, (const u8 *) tmp_iv_buf, (const int) metamask->iv_len, iv_buf); + + // ct + + u32 tmp_buf[16]; + + memset (tmp_buf, 0, sizeof (tmp_buf)); + + u32 ct_len = metamask->ct_len; + + u32 j = ct_len / 4; + + for (u32 i = 0; i < j; i++) tmp_buf[i] = byte_swap_32 (metamask->ct_buf[i]); + + u8 ct_buf[88+1]; + + memset (ct_buf, 0, sizeof (ct_buf)); + + base64_encode (int_to_base64, (const u8 *) tmp_buf, (const int) metamask->ct_len, ct_buf); + + u8 *out_buf = (u8 *) line_buf; + + int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt_buf, + iv_buf, + ct_buf); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_charset = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = module_pw_max; + module_ctx->module_pw_min = module_pw_min; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/metamask2hashcat.py b/tools/metamask2hashcat.py old mode 100755 new mode 100644 index a2aa99774..6c32e82c7 --- a/tools/metamask2hashcat.py +++ b/tools/metamask2hashcat.py @@ -51,8 +51,14 @@ def metamask_parser(file, shortdata): parser.print_help() exit(1) + if isMobile is False: + if 'keyMetadata' in j and 'params' in j['keyMetadata'] and 'iterations' in j['keyMetadata']['params']: + iter_count = j['keyMetadata']['params']['iterations'] + else: + iter_count = 10000 + if((len(j['data']) > 3000) or shortdata): data_bin = base64.b64decode(j['data']) # TODO limit data to 16 bytes, we only check the first block of data, so we don't need more data. @@ -60,9 +66,15 @@ def metamask_parser(file, shortdata): # Still the pbkdf 10k iter will be taking the most time by far probably. j['data'] = base64.b64encode(data_bin[0:64]).decode("ascii") - print('$metamask-short$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + if iter_count != 10000: + print('$metamask-short$' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + else: + print('$metamask-short$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) else: - print('$metamask$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + if iter_count != 10000: + print('$metamask$' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + else: + print('$metamask$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) else: From 205e7f6e71da6509005b06dde2833584f8b84dbf Mon Sep 17 00:00:00 2001 From: its5Q Date: Thu, 22 Feb 2024 03:40:36 +1000 Subject: [PATCH 03/57] Update metamask2hashcat help string --- tools/metamask2hashcat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/metamask2hashcat.py b/tools/metamask2hashcat.py index 6c32e82c7..1e4de119c 100644 --- a/tools/metamask2hashcat.py +++ b/tools/metamask2hashcat.py @@ -94,7 +94,7 @@ def metamask_parser(file, shortdata): if __name__ == "__main__": parser = argparse.ArgumentParser(description="metamask2hashcat.py extraction tool") parser.add_argument('--vault', required=True, help='set metamask vault (json) file from path', type=str) - parser.add_argument('--shortdata', help='force short data, can only be used with m26610, ', action='store_true') + parser.add_argument('--shortdata', help='force short data, can only be used with m26610/m26630, ', action='store_true') args = parser.parse_args() From 55406d8342732f6eafa86b92820ace5e4fe32912 Mon Sep 17 00:00:00 2001 From: its5Q Date: Thu, 22 Feb 2024 22:48:00 +1000 Subject: [PATCH 04/57] Cleaner JSON key handling in metamask2hashcat Co-authored-by: kgolawski --- tools/metamask2hashcat.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/metamask2hashcat.py b/tools/metamask2hashcat.py index 1e4de119c..a31d600f8 100644 --- a/tools/metamask2hashcat.py +++ b/tools/metamask2hashcat.py @@ -54,11 +54,10 @@ def metamask_parser(file, shortdata): if isMobile is False: - if 'keyMetadata' in j and 'params' in j['keyMetadata'] and 'iterations' in j['keyMetadata']['params']: + try: iter_count = j['keyMetadata']['params']['iterations'] - else: - iter_count = 10000 - + except KeyError: + iter_count = 10_000 if((len(j['data']) > 3000) or shortdata): data_bin = base64.b64decode(j['data']) # TODO limit data to 16 bytes, we only check the first block of data, so we don't need more data. From d725b4b928d5a0487a58c310235228e2b26ce0ab Mon Sep 17 00:00:00 2001 From: its5Q Date: Fri, 23 Feb 2024 06:22:27 +1000 Subject: [PATCH 05/57] Switch to using TOKEN_ATTR_OPTIONAL_ROUNDS for Metamask hashmodes --- src/modules/module_26600.c | 38 ++-- src/modules/module_26610.c | 36 +++- src/modules/module_26620.c | 415 ------------------------------------- src/modules/module_26630.c | 395 ----------------------------------- tools/metamask2hashcat.py | 2 +- 5 files changed, 52 insertions(+), 834 deletions(-) delete mode 100644 src/modules/module_26620.c delete mode 100644 src/modules/module_26630.c diff --git a/src/modules/module_26600.c b/src/modules/module_26600.c index 4f6d0de3b..1f211310d 100644 --- a/src/modules/module_26600.c +++ b/src/modules/module_26600.c @@ -29,6 +29,8 @@ static const char *ST_PASS = "hashcat1"; // hash generated using with python3 tools/metamask2hashcat.py --vault tools/2hashcat_tests/metamask2hashcat.json static const char *ST_HASH = "$metamask$jfGI3TXguhb8GPnKSXFrMzRk2NCEc131Gt5G3kZr5+s=$h+BoIf2CQ5BEjaIOShFE7g==$R95fzGt4UQ0uwrcrVYnIi4UcSlWn9wlmer+//526ZDwYAp50K82F1u1oacYcdjjhuEvbZnWk/uBG00UkgLLlO3WbINljqmu2QWdDEwjTgo/qWR6MU9d/82rxNiONHQE8UrZ8SV+htVr6XIB0ze3aCV0E+fwI93EeP79ZeDxuOEhuHoiYT0bHWMv5nA48AdluG4DbOo7SrDAWBVCBsEdXsOfYsS3/TIh0a/iFCMX4uhxY2824JwcWp4H36SFWyBYMZCJ3/U4DYFbbjWZtGRthoJlIik5BJq4FLu3Y1jEgza0AWlAvu4MKTEqrYSpUIghfxf1a1f+kPvxsHNq0as0kRwCXu09DObbdsiggbmeoBkxMZiFq0d9ar/3Gon0r3hfc3c124Wlivzbzu1JcZ3wURhLSsUS7b5cfG86aXHJkxmQDA5urBz6lw3bsIvlEUB2ErkQy/zD+cPwCG1Rs/WKt7KNh45lppCUkHccbf+xlpdc8OfUwj01Xp7BdH8LMR7Vx1C4hZCvSdtURVl0VaAMxHDX0MjRkwmqS"; +static const u32 ROUNDS_METAMASK = 10000; + u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } @@ -144,9 +146,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE | TOKEN_ATTR_VERIFY_SIGNATURE; token.sep[1] = '$'; - token.len[1] = 44; - token.attr[1] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; + token.len_min[1] = 0; + token.len_max[1] = 60; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_OPTIONAL_ROUNDS; token.sep[2] = '$'; token.len[2] = 24; @@ -169,7 +172,12 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // iter - salt->salt_iter = 10000 - 1; + salt->salt_iter = ROUNDS_METAMASK - 1; + + if (token.opt_len != -1) + { + salt->salt_iter = hc_strtoul ((const char *) token.opt_buf + 7, NULL, 10) - 1; // 7 = "rounds=" + } // salt @@ -312,14 +320,20 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE base64_encode (int_to_base64, (const u8 *) tmp_buf, (const int) metamask->ct_len+16, ct_buf); u8 *out_buf = (u8 *) line_buf; - - int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", - SIGNATURE_METAMASK_WALLET, - salt_buf, - iv_buf, - ct_buf); - - return out_len; + + if (salt->salt_iter + 1 != ROUNDS_METAMASK) + return snprintf ((char *) out_buf, line_size, "%srounds=%d$%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt->salt_iter + 1, + salt_buf, + iv_buf, + ct_buf); + else + return snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt_buf, + iv_buf, + ct_buf); } void module_init (module_ctx_t *module_ctx) diff --git a/src/modules/module_26610.c b/src/modules/module_26610.c index 532246128..64cf20f4b 100644 --- a/src/modules/module_26610.c +++ b/src/modules/module_26610.c @@ -29,6 +29,8 @@ static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat1"; static const char *ST_HASH = "$metamask-short$jfGI3TXguhb8GPnKSXFrMzRk2NCEc131Gt5G3kZr5+s=$h+BoIf2CQ5BEjaIOShFE7g==$R95fzGt4UQ0uwrcrVYnIi4UcSlWn9wlmer+//526ZDwYAp50K82F1u1oacYcdjjhuEvbZnWk/uBG00UkgLLlOw=="; +static const u32 ROUNDS_METAMASK = 10000; + u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } @@ -147,9 +149,10 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE | TOKEN_ATTR_VERIFY_SIGNATURE; token.sep[1] = '$'; - token.len[1] = 44; - token.attr[1] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; + token.len_min[1] = 0; + token.len_max[1] = 60; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_OPTIONAL_ROUNDS; token.sep[2] = '$'; token.len[2] = 24; @@ -168,7 +171,12 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE // iter - salt->salt_iter = 10000 - 1; + salt->salt_iter = ROUNDS_METAMASK - 1; + + if (token.opt_len != -1) + { + salt->salt_iter = hc_strtoul ((const char *) token.opt_buf + 7, NULL, 10) - 1; // 7 = "rounds=" + } // salt @@ -293,13 +301,19 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE u8 *out_buf = (u8 *) line_buf; - int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", - SIGNATURE_METAMASK_WALLET, - salt_buf, - iv_buf, - ct_buf); - - return out_len; + if (salt->salt_iter + 1 != ROUNDS_METAMASK) + return snprintf ((char *) out_buf, line_size, "%srounds=%d$%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt->salt_iter + 1, + salt_buf, + iv_buf, + ct_buf); + else + return snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", + SIGNATURE_METAMASK_WALLET, + salt_buf, + iv_buf, + ct_buf); } void module_init (module_ctx_t *module_ctx) diff --git a/src/modules/module_26620.c b/src/modules/module_26620.c deleted file mode 100644 index f3c4509ad..000000000 --- a/src/modules/module_26620.c +++ /dev/null @@ -1,415 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#include "common.h" -#include "types.h" -#include "modules.h" -#include "bitops.h" -#include "convert.h" -#include "shared.h" -#include "memory.h" - -static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; -static const u32 DGST_POS0 = 0; -static const u32 DGST_POS1 = 1; -static const u32 DGST_POS2 = 2; -static const u32 DGST_POS3 = 3; -static const u32 DGST_SIZE = DGST_SIZE_4_4; -static const u32 HASH_CATEGORY = HASH_CATEGORY_CRYPTOCURRENCY_WALLET; -static const char *HASH_NAME = "MetaMask Wallet (needs all data, checks AES-GCM tag, dynamic iterations)"; -static const u64 KERN_TYPE = 26620; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE - | OPTS_TYPE_PT_GENERATE_LE; -static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; -static const char *ST_PASS = "hashcat1"; -// hash generated using with python3 tools/metamask2hashcat.py --vault tools/2hashcat_tests/metamask2hashcat.json -static const char *ST_HASH = "$metamask$600000$MBdUsmTcBHGCASECYr3gmD8XaJROwjhOegSWweCFhco=$N1aDRjt2ZD5x15Q1X9zVUw==$G2Qsp8BtQNz9E1vFlSebykcGkps5iJ53fBjJV8GIE7eolAQpz5m25MTZv0b0t2tpqXvfgEYKYZKsoLvuWWRHqj2ommt+U0l6fEGPY6lTn5PYzDtMKNpPWHKCIS/QYhwEA9/X5RtyIwDL6VqcRp6owV+/icmRlQa+TI5buHWZ+99Z1kBbpTDVYLBwhMRsODM1vYizQDg0vFIo3cQDtpRWUqpAKXhFcpgRD+9grS3pP/zdlIUn//87DZ3ue6Sn6WFOe08EvuY8sYZqTiN3GxcfESOltNbZJGcedMubt/jGsk+qIwUWC/f456UgeX9DN7i2pQBBI+L7qkY5v1WT5Y0i8uho1c2M2G8M9miO8HSm/j4bpMN1J6lPtjFhfAzEPS1go1w2vTmOtr1y+2A4M6HEOcxIrPJ8lUUH9pcN7Xpd+u/tQv8BYFxd6RlNYKLyA6OChbF+TD5Zz6oFZQtkprXqzZUFOlxeWJ373kHMISZtXOx44YGaiT2178fXgHFXavw="; - -u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } -u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } -u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } -u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } -u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } -u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } -u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } -const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } -u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } -u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } -u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } -u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } -const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } -const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } - -typedef struct pbkdf2_sha256_tmp -{ - u32 ipad[8]; - u32 opad[8]; - - u32 dgst[32]; - u32 out[32]; - -} pbkdf2_sha256_tmp_t; - -typedef struct pbkdf2_sha256_aes_gcm -{ - u32 salt_buf[64]; - u32 iv_buf[4]; - u32 iv_len; - u32 ct_buf[784]; - u32 ct_len; - -} pbkdf2_sha256_aes_gcm_t; - -static const char *SIGNATURE_METAMASK_WALLET = "$metamask$"; - -char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - char *jit_build_options = NULL; - - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } - - // HIP - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - return jit_build_options; -} - -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 esalt_size = (const u64) sizeof (pbkdf2_sha256_aes_gcm_t); - - return esalt_size; -} - -u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 tmp_size = (const u64) sizeof (pbkdf2_sha256_tmp_t); - - return tmp_size; -} - -u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 pw_min = 8; - - return pw_min; -} - -u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - // this overrides the reductions of PW_MAX in case optimized kernel is selected - // IOW, even in optimized kernel mode it support length 256 - - const u32 pw_max = PW_MAX; - - return pw_max; -} - -int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) -{ - u32 *digest = (u32 *) digest_buf; - - pbkdf2_sha256_aes_gcm_t *metamask = (pbkdf2_sha256_aes_gcm_t *) esalt_buf; - - #define CT_MAX_LEN_BASE64 (((3136+16) * 8) / 6) + 3 - - hc_token_t token; - - memset (&token, 0, sizeof (hc_token_t)); - - token.token_cnt = 5; - - token.signatures_cnt = 1; - token.signatures_buf[0] = SIGNATURE_METAMASK_WALLET; - - token.len[0] = 10; - token.attr[0] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_SIGNATURE; - - token.sep[1] = '$'; - token.len_min[1] = 1; - token.len_max[1] = 6; - token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH - | TOKEN_ATTR_VERIFY_DIGIT; - - token.sep[2] = '$'; - token.len[2] = 44; - token.attr[2] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - token.sep[3] = '$'; - token.len[3] = 24; - token.attr[3] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - token.sep[4] = '$'; - token.len_min[4] = 64; - token.len_max[4] = CT_MAX_LEN_BASE64; - token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); - - if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); - - u8 tmp_buf[CT_MAX_LEN_BASE64] = { 0 }; - - size_t tmp_len = 0; - - // iter - - const u8 *iter_pos = token.buf[1]; - - const int iter = strtol ((const char *) iter_pos, NULL, 10); - - if (iter < 1) return (PARSER_SALT_ITERATION); - - salt->salt_iter = iter - 1; - - // salt - - const u8 *salt_pos = token.buf[2]; - const int salt_len = token.len[2]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - tmp_len = base64_decode (base64_to_int, salt_pos, salt_len, tmp_buf); - - if (tmp_len != 32) return (PARSER_SALT_LENGTH); - - memcpy (salt->salt_buf, tmp_buf, tmp_len); - - salt->salt_len = tmp_len; - - metamask->salt_buf[0] = salt->salt_buf[0]; - metamask->salt_buf[1] = salt->salt_buf[1]; - metamask->salt_buf[2] = salt->salt_buf[2]; - metamask->salt_buf[3] = salt->salt_buf[3]; - metamask->salt_buf[4] = salt->salt_buf[4]; - metamask->salt_buf[5] = salt->salt_buf[5]; - metamask->salt_buf[6] = salt->salt_buf[6]; - metamask->salt_buf[7] = salt->salt_buf[7]; - - // iv - - const u8 *iv_pos = token.buf[3]; - const int iv_len = token.len[3]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - tmp_len = base64_decode (base64_to_int, iv_pos, iv_len, tmp_buf); - - if (tmp_len != 16) return (PARSER_IV_LENGTH); - - memcpy ((u8 *) metamask->iv_buf, tmp_buf, tmp_len); - - metamask->iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); - metamask->iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); - metamask->iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); - metamask->iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); - - metamask->iv_len = tmp_len; - - // ciphertext - - const u8 *ct_pos = token.buf[4]; - const int ct_len = token.len[4]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - tmp_len = base64_decode (base64_to_int, ct_pos, ct_len, tmp_buf); - - if (tmp_len <= 16) return (PARSER_CT_LENGTH); - - tmp_len -= 16; - - if (tmp_len < 30 || tmp_len > 3136) return (PARSER_CT_LENGTH); - - memcpy ((u8 *) metamask->ct_buf, tmp_buf, tmp_len); - - u32 j = tmp_len / 4; - - if ((tmp_len % 4) > 0) j++; - - for (u32 i = 0; i < j; i++) metamask->ct_buf[i] = byte_swap_32 (metamask->ct_buf[i]); - - metamask->ct_len = tmp_len; - - // tag - - u32 tag_buf[4] = { 0 }; - - memcpy ((u8 *) tag_buf, tmp_buf+metamask->ct_len, 16); - - digest[0] = byte_swap_32 (tag_buf[0]); - digest[1] = byte_swap_32 (tag_buf[1]); - digest[2] = byte_swap_32 (tag_buf[2]); - digest[3] = byte_swap_32 (tag_buf[3]); - - return (PARSER_OK); -} - -int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) -{ - const u32 *digest = (const u32 *) digest_buf; - - const pbkdf2_sha256_aes_gcm_t *metamask = (const pbkdf2_sha256_aes_gcm_t *) esalt_buf; - - // salt - - #define SALT_LEN_BASE64 ((32 * 8) / 6) + 3 - #define IV_LEN_BASE64 ((16 * 8) / 6) + 3 - #define CT_MAX_LEN_BASE64 (((3136+16) * 8) / 6) + 3 - - u8 salt_buf[SALT_LEN_BASE64] = { 0 }; - - base64_encode (int_to_base64, (const u8 *) salt->salt_buf, (const int) salt->salt_len, salt_buf); - - // iv - - u32 tmp_iv_buf[4] = { 0 }; - - tmp_iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); - tmp_iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); - tmp_iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); - tmp_iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); - - u8 iv_buf[IV_LEN_BASE64+1] = { 0 }; - - base64_encode (int_to_base64, (const u8 *) tmp_iv_buf, (const int) metamask->iv_len, iv_buf); - - // ct - - u32 ct_len = metamask->ct_len; - - u32 j = ct_len / 4; - - if ((ct_len % 4) > 0) j++; - - u32 tmp_buf[788] = { 0 }; - - for (u32 i = 0; i < j; i++) tmp_buf[i] = byte_swap_32 (metamask->ct_buf[i]); - - u32 tmp_tag[4] = { 0 }; - - tmp_tag[0] = byte_swap_32 (digest[0]); - tmp_tag[1] = byte_swap_32 (digest[1]); - tmp_tag[2] = byte_swap_32 (digest[2]); - tmp_tag[3] = byte_swap_32 (digest[3]); - - u8 *tmp_buf_str = (u8 *) tmp_buf; - u8 *tmp_tag_str = (u8 *) tmp_tag; - - memcpy (tmp_buf_str+metamask->ct_len, tmp_tag_str, 16); - - u8 ct_buf[CT_MAX_LEN_BASE64] = { 0 }; - - base64_encode (int_to_base64, (const u8 *) tmp_buf, (const int) metamask->ct_len+16, ct_buf); - - u8 *out_buf = (u8 *) line_buf; - - int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", - SIGNATURE_METAMASK_WALLET, - salt_buf, - iv_buf, - ct_buf); - - return out_len; -} - -void module_init (module_ctx_t *module_ctx) -{ - module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; - module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; - - module_ctx->module_attack_exec = module_attack_exec; - module_ctx->module_benchmark_esalt = MODULE_DEFAULT; - module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; - module_ctx->module_benchmark_mask = MODULE_DEFAULT; - module_ctx->module_benchmark_charset = MODULE_DEFAULT; - module_ctx->module_benchmark_salt = MODULE_DEFAULT; - module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; - module_ctx->module_deprecated_notice = MODULE_DEFAULT; - module_ctx->module_dgst_pos0 = module_dgst_pos0; - module_ctx->module_dgst_pos1 = module_dgst_pos1; - module_ctx->module_dgst_pos2 = module_dgst_pos2; - module_ctx->module_dgst_pos3 = module_dgst_pos3; - module_ctx->module_dgst_size = module_dgst_size; - module_ctx->module_dictstat_disable = MODULE_DEFAULT; - module_ctx->module_esalt_size = module_esalt_size; - module_ctx->module_extra_buffer_size = MODULE_DEFAULT; - module_ctx->module_extra_tmp_size = MODULE_DEFAULT; - module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; - module_ctx->module_forced_outfile_format = MODULE_DEFAULT; - module_ctx->module_hash_binary_count = MODULE_DEFAULT; - module_ctx->module_hash_binary_parse = MODULE_DEFAULT; - module_ctx->module_hash_binary_save = MODULE_DEFAULT; - module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; - module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; - module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; - module_ctx->module_hash_decode = module_hash_decode; - module_ctx->module_hash_encode_status = MODULE_DEFAULT; - module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; - module_ctx->module_hash_encode = module_hash_encode; - module_ctx->module_hash_init_selftest = MODULE_DEFAULT; - module_ctx->module_hash_mode = MODULE_DEFAULT; - module_ctx->module_hash_category = module_hash_category; - module_ctx->module_hash_name = module_hash_name; - module_ctx->module_hashes_count_min = MODULE_DEFAULT; - module_ctx->module_hashes_count_max = MODULE_DEFAULT; - module_ctx->module_hlfmt_disable = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; - module_ctx->module_hook12 = MODULE_DEFAULT; - module_ctx->module_hook23 = MODULE_DEFAULT; - module_ctx->module_hook_salt_size = MODULE_DEFAULT; - module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = module_jit_build_options; - module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = MODULE_DEFAULT; - module_ctx->module_kernel_accel_min = MODULE_DEFAULT; - module_ctx->module_kernel_loops_max = MODULE_DEFAULT; - module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = MODULE_DEFAULT; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; - module_ctx->module_kern_type = module_kern_type; - module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; - module_ctx->module_opti_type = module_opti_type; - module_ctx->module_opts_type = module_opts_type; - module_ctx->module_outfile_check_disable = MODULE_DEFAULT; - module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; - module_ctx->module_potfile_custom_check = MODULE_DEFAULT; - module_ctx->module_potfile_disable = MODULE_DEFAULT; - module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; - module_ctx->module_pwdump_column = MODULE_DEFAULT; - module_ctx->module_pw_max = module_pw_max; - module_ctx->module_pw_min = module_pw_min; - module_ctx->module_salt_max = MODULE_DEFAULT; - module_ctx->module_salt_min = MODULE_DEFAULT; - module_ctx->module_salt_type = module_salt_type; - module_ctx->module_separator = MODULE_DEFAULT; - module_ctx->module_st_hash = module_st_hash; - module_ctx->module_st_pass = module_st_pass; - module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = MODULE_DEFAULT; - module_ctx->module_warmup_disable = MODULE_DEFAULT; -} diff --git a/src/modules/module_26630.c b/src/modules/module_26630.c deleted file mode 100644 index f6d6f596f..000000000 --- a/src/modules/module_26630.c +++ /dev/null @@ -1,395 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#include "common.h" -#include "types.h" -#include "modules.h" -#include "bitops.h" -#include "convert.h" -#include "shared.h" -#include "memory.h" - -static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; -static const u32 DGST_POS0 = 0; -static const u32 DGST_POS1 = 1; -static const u32 DGST_POS2 = 2; -static const u32 DGST_POS3 = 3; -static const u32 DGST_SIZE = DGST_SIZE_4_4; -static const u32 HASH_CATEGORY = HASH_CATEGORY_CRYPTOCURRENCY_WALLET; -static const char *HASH_NAME = "MetaMask Wallet (short hash, plaintext check, dynamic iterations)"; -static const u64 KERN_TYPE = 26630; -static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE - | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE - | OPTS_TYPE_PT_GENERATE_LE - | OPTS_TYPE_DEEP_COMP_KERNEL; -static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; -static const char *ST_PASS = "hashcat1"; -static const char *ST_HASH = "$metamask-short$600000$MBdUsmTcBHGCASECYr3gmD8XaJROwjhOegSWweCFhco=$N1aDRjt2ZD5x15Q1X9zVUw==$G2Qsp8BtQNz9E1vFlSebykcGkps5iJ53fBjJV8GIE7eolAQpz5m25MTZv0b0t2tpqXvfgEYKYZKsoLvuWWRHqg=="; - -u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } -u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } -u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } -u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } -u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } -u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } -u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } -const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } -u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } -u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } -u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } -u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } -const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } -const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } - -typedef struct pbkdf2_sha256_tmp -{ - u32 ipad[8]; - u32 opad[8]; - - u32 dgst[32]; - u32 out[32]; - -} pbkdf2_sha256_tmp_t; - -typedef struct pbkdf2_sha256_aes_gcm -{ - u32 salt_buf[64]; - u32 iv_buf[4]; - u32 iv_len; - u32 ct_buf[16]; - u32 ct_len; - -} pbkdf2_sha256_aes_gcm_t; - -static const char *SIGNATURE_METAMASK_WALLET = "$metamask-short$"; - -char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) -{ - char *jit_build_options = NULL; - - // Extra treatment for Apple systems - if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) - { - return jit_build_options; - } - - // HIP - if (device_param->opencl_device_vendor_id == VENDOR_ID_AMD_USE_HIP) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - // ROCM - if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) - { - hc_asprintf (&jit_build_options, "-D _unroll"); - } - - return jit_build_options; -} - -u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) -{ - return KERN_RUN_3; -} - -u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 esalt_size = (const u64) sizeof (pbkdf2_sha256_aes_gcm_t); - - return esalt_size; -} - -u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u64 tmp_size = (const u64) sizeof (pbkdf2_sha256_tmp_t); - - return tmp_size; -} - -u32 module_pw_min (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - const u32 pw_min = 8; - - return pw_min; -} - -u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) -{ - // this overrides the reductions of PW_MAX in case optimized kernel is selected - // IOW, even in optimized kernel mode it support length 256 - - const u32 pw_max = PW_MAX; - - return pw_max; -} - -int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) -{ - u32 *digest = (u32 *) digest_buf; - - pbkdf2_sha256_aes_gcm_t *metamask = (pbkdf2_sha256_aes_gcm_t *) esalt_buf; - - hc_token_t token; - - memset (&token, 0, sizeof (hc_token_t)); - - token.token_cnt = 5; - - token.signatures_cnt = 1; - token.signatures_buf[0] = SIGNATURE_METAMASK_WALLET; - - token.len[0] = 16; - token.attr[0] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_SIGNATURE; - - token.sep[1] = '$'; - token.len_min[1] = 1; - token.len_max[1] = 6; - token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH - | TOKEN_ATTR_VERIFY_DIGIT; - - token.sep[2] = '$'; - token.len[2] = 44; - token.attr[2] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - token.sep[3] = '$'; - token.len[3] = 24; - token.attr[3] = TOKEN_ATTR_FIXED_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - token.sep[4] = '$'; - token.len_min[4] = 88; - token.len_max[4] = 88; - token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH - | TOKEN_ATTR_VERIFY_BASE64A; - - const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); - - if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); - - // iter - - const u8 *iter_pos = token.buf[1]; - - const int iter = strtol ((const char *) iter_pos, NULL, 10); - - if (iter < 1) return (PARSER_SALT_ITERATION); - - salt->salt_iter = iter - 1; - - // salt - - const u8 *salt_pos = token.buf[2]; - const int salt_len = token.len[2]; - - u8 tmp_buf[88+1]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - size_t tmp_len = base64_decode (base64_to_int, salt_pos, salt_len, tmp_buf); - - if (tmp_len != 32) return (PARSER_SALT_LENGTH); - - memcpy (salt->salt_buf, tmp_buf, tmp_len); - - salt->salt_len = tmp_len; - - metamask->salt_buf[0] = salt->salt_buf[0]; - metamask->salt_buf[1] = salt->salt_buf[1]; - metamask->salt_buf[2] = salt->salt_buf[2]; - metamask->salt_buf[3] = salt->salt_buf[3]; - metamask->salt_buf[4] = salt->salt_buf[4]; - metamask->salt_buf[5] = salt->salt_buf[5]; - metamask->salt_buf[6] = salt->salt_buf[6]; - metamask->salt_buf[7] = salt->salt_buf[7]; - - // iv - - const u8 *iv_pos = token.buf[3]; - const int iv_len = token.len[3]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - tmp_len = base64_decode (base64_to_int, iv_pos, iv_len, tmp_buf); - - if (tmp_len != 16) return (PARSER_IV_LENGTH); - - memcpy ((u8 *) metamask->iv_buf, tmp_buf, tmp_len); - - metamask->iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); - metamask->iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); - metamask->iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); - metamask->iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); - - metamask->iv_len = tmp_len; - - // ciphertext - - const u8 *ct_pos = token.buf[4]; - const int ct_len = token.len[4]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - tmp_len = base64_decode (base64_to_int, ct_pos, ct_len, tmp_buf); - - if (tmp_len != 64) return (PARSER_CT_LENGTH); - - memcpy ((u8 *) metamask->ct_buf, tmp_buf, tmp_len); - - u32 j = tmp_len / 4; - - for (u32 i = 0; i < j; i++) - { - metamask->ct_buf[i] = byte_swap_32 (metamask->ct_buf[i]); - } - - metamask->ct_len = tmp_len; - - digest[0] = metamask->ct_buf[0]; - digest[1] = metamask->ct_buf[1]; - digest[2] = metamask->ct_buf[2]; - digest[3] = metamask->ct_buf[3]; - - return (PARSER_OK); -} - -int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) -{ - const pbkdf2_sha256_aes_gcm_t *metamask = (const pbkdf2_sha256_aes_gcm_t *) esalt_buf; - - // salt - - u8 salt_buf[44+1]; - - memset (salt_buf, 0, sizeof (salt_buf)); - - base64_encode (int_to_base64, (const u8 *) salt->salt_buf, (const int) salt->salt_len, salt_buf); - - // iv - - u32 tmp_iv_buf[4]; - - tmp_iv_buf[0] = byte_swap_32 (metamask->iv_buf[0]); - tmp_iv_buf[1] = byte_swap_32 (metamask->iv_buf[1]); - tmp_iv_buf[2] = byte_swap_32 (metamask->iv_buf[2]); - tmp_iv_buf[3] = byte_swap_32 (metamask->iv_buf[3]); - - u8 iv_buf[24+1]; - - memset (iv_buf, 0, sizeof (iv_buf)); - - base64_encode (int_to_base64, (const u8 *) tmp_iv_buf, (const int) metamask->iv_len, iv_buf); - - // ct - - u32 tmp_buf[16]; - - memset (tmp_buf, 0, sizeof (tmp_buf)); - - u32 ct_len = metamask->ct_len; - - u32 j = ct_len / 4; - - for (u32 i = 0; i < j; i++) tmp_buf[i] = byte_swap_32 (metamask->ct_buf[i]); - - u8 ct_buf[88+1]; - - memset (ct_buf, 0, sizeof (ct_buf)); - - base64_encode (int_to_base64, (const u8 *) tmp_buf, (const int) metamask->ct_len, ct_buf); - - u8 *out_buf = (u8 *) line_buf; - - int out_len = snprintf ((char *) out_buf, line_size, "%s%s$%s$%s", - SIGNATURE_METAMASK_WALLET, - salt_buf, - iv_buf, - ct_buf); - - return out_len; -} - -void module_init (module_ctx_t *module_ctx) -{ - module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; - module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; - - module_ctx->module_attack_exec = module_attack_exec; - module_ctx->module_benchmark_esalt = MODULE_DEFAULT; - module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; - module_ctx->module_benchmark_mask = MODULE_DEFAULT; - module_ctx->module_benchmark_charset = MODULE_DEFAULT; - module_ctx->module_benchmark_salt = MODULE_DEFAULT; - module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; - module_ctx->module_deprecated_notice = MODULE_DEFAULT; - module_ctx->module_dgst_pos0 = module_dgst_pos0; - module_ctx->module_dgst_pos1 = module_dgst_pos1; - module_ctx->module_dgst_pos2 = module_dgst_pos2; - module_ctx->module_dgst_pos3 = module_dgst_pos3; - module_ctx->module_dgst_size = module_dgst_size; - module_ctx->module_dictstat_disable = MODULE_DEFAULT; - module_ctx->module_esalt_size = module_esalt_size; - module_ctx->module_extra_buffer_size = MODULE_DEFAULT; - module_ctx->module_extra_tmp_size = MODULE_DEFAULT; - module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; - module_ctx->module_forced_outfile_format = MODULE_DEFAULT; - module_ctx->module_hash_binary_count = MODULE_DEFAULT; - module_ctx->module_hash_binary_parse = MODULE_DEFAULT; - module_ctx->module_hash_binary_save = MODULE_DEFAULT; - module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; - module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; - module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; - module_ctx->module_hash_decode = module_hash_decode; - module_ctx->module_hash_encode_status = MODULE_DEFAULT; - module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; - module_ctx->module_hash_encode = module_hash_encode; - module_ctx->module_hash_init_selftest = MODULE_DEFAULT; - module_ctx->module_hash_mode = MODULE_DEFAULT; - module_ctx->module_hash_category = module_hash_category; - module_ctx->module_hash_name = module_hash_name; - module_ctx->module_hashes_count_min = MODULE_DEFAULT; - module_ctx->module_hashes_count_max = MODULE_DEFAULT; - module_ctx->module_hlfmt_disable = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; - module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; - module_ctx->module_hook12 = MODULE_DEFAULT; - module_ctx->module_hook23 = MODULE_DEFAULT; - module_ctx->module_hook_salt_size = MODULE_DEFAULT; - module_ctx->module_hook_size = MODULE_DEFAULT; - module_ctx->module_jit_build_options = module_jit_build_options; - module_ctx->module_jit_cache_disable = MODULE_DEFAULT; - module_ctx->module_kernel_accel_max = MODULE_DEFAULT; - module_ctx->module_kernel_accel_min = MODULE_DEFAULT; - module_ctx->module_kernel_loops_max = MODULE_DEFAULT; - module_ctx->module_kernel_loops_min = MODULE_DEFAULT; - module_ctx->module_kernel_threads_max = MODULE_DEFAULT; - module_ctx->module_kernel_threads_min = MODULE_DEFAULT; - module_ctx->module_kern_type = module_kern_type; - module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; - module_ctx->module_opti_type = module_opti_type; - module_ctx->module_opts_type = module_opts_type; - module_ctx->module_outfile_check_disable = MODULE_DEFAULT; - module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; - module_ctx->module_potfile_custom_check = MODULE_DEFAULT; - module_ctx->module_potfile_disable = MODULE_DEFAULT; - module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; - module_ctx->module_pwdump_column = MODULE_DEFAULT; - module_ctx->module_pw_max = module_pw_max; - module_ctx->module_pw_min = module_pw_min; - module_ctx->module_salt_max = MODULE_DEFAULT; - module_ctx->module_salt_min = MODULE_DEFAULT; - module_ctx->module_salt_type = module_salt_type; - module_ctx->module_separator = MODULE_DEFAULT; - module_ctx->module_st_hash = module_st_hash; - module_ctx->module_st_pass = module_st_pass; - module_ctx->module_tmp_size = module_tmp_size; - module_ctx->module_unstable_warning = MODULE_DEFAULT; - module_ctx->module_warmup_disable = MODULE_DEFAULT; -} diff --git a/tools/metamask2hashcat.py b/tools/metamask2hashcat.py index 1e4de119c..6c32e82c7 100644 --- a/tools/metamask2hashcat.py +++ b/tools/metamask2hashcat.py @@ -94,7 +94,7 @@ def metamask_parser(file, shortdata): if __name__ == "__main__": parser = argparse.ArgumentParser(description="metamask2hashcat.py extraction tool") parser.add_argument('--vault', required=True, help='set metamask vault (json) file from path', type=str) - parser.add_argument('--shortdata', help='force short data, can only be used with m26610/m26630, ', action='store_true') + parser.add_argument('--shortdata', help='force short data, can only be used with m26610, ', action='store_true') args = parser.parse_args() From 5f9caed13db94572a00e1ba7831ad6da05abf2ad Mon Sep 17 00:00:00 2001 From: its5Q Date: Fri, 23 Feb 2024 06:24:24 +1000 Subject: [PATCH 06/57] Remove unused Metamask OpenCL kernels --- OpenCL/m26620-pure.cl | 375 -------------------------------------- OpenCL/m26630-pure.cl | 413 ------------------------------------------ 2 files changed, 788 deletions(-) delete mode 100644 OpenCL/m26620-pure.cl delete mode 100644 OpenCL/m26630-pure.cl diff --git a/OpenCL/m26620-pure.cl b/OpenCL/m26620-pure.cl deleted file mode 100644 index 26ad55595..000000000 --- a/OpenCL/m26620-pure.cl +++ /dev/null @@ -1,375 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#define NEW_SIMD_CODE - -#ifdef KERNEL_STATIC -#include M2S(INCLUDE_PATH/inc_vendor.h) -#include M2S(INCLUDE_PATH/inc_types.h) -#include M2S(INCLUDE_PATH/inc_platform.cl) -#include M2S(INCLUDE_PATH/inc_common.cl) -#include M2S(INCLUDE_PATH/inc_simd.cl) -#include M2S(INCLUDE_PATH/inc_hash_sha256.cl) -#include M2S(INCLUDE_PATH/inc_cipher_aes.cl) -#include M2S(INCLUDE_PATH/inc_cipher_aes-gcm.cl) -#endif - -#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) -#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) - -typedef struct pbkdf2_sha256_tmp -{ - u32 ipad[8]; - u32 opad[8]; - - u32 dgst[32]; - u32 out[32]; - -} pbkdf2_sha256_tmp_t; - -typedef struct pbkdf2_sha256_aes_gcm -{ - u32 salt_buf[64]; - u32 iv_buf[4]; - u32 iv_len; - u32 ct_buf[784]; - u32 ct_len; - -} pbkdf2_sha256_aes_gcm_t; - -DECLSPEC void hmac_sha256_run_V (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *ipad, PRIVATE_AS u32x *opad, PRIVATE_AS u32x *digest) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform_vector (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform_vector (w0, w1, w2, w3, digest); -} - -KERNEL_FQ void m26620_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - /** - * base - */ - - const u64 gid = get_global_id (0); - - if (gid >= GID_CNT) return; - - sha256_hmac_ctx_t sha256_hmac_ctx; - - sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); - - tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; - tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; - tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; - tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; - - tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; - tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; - tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; - tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); - - sha256_hmac_final (&sha256_hmac_ctx2); - - tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; - tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; - tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; - tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; - - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; - tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; - tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; - } -} - -KERNEL_FQ void m26620_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - const u64 gid = get_global_id (0); - - if ((gid * VECT_SIZE) >= GID_CNT) return; - - u32x ipad[8]; - u32x opad[8]; - - ipad[0] = packv (tmps, ipad, gid, 0); - ipad[1] = packv (tmps, ipad, gid, 1); - ipad[2] = packv (tmps, ipad, gid, 2); - ipad[3] = packv (tmps, ipad, gid, 3); - ipad[4] = packv (tmps, ipad, gid, 4); - ipad[5] = packv (tmps, ipad, gid, 5); - ipad[6] = packv (tmps, ipad, gid, 6); - ipad[7] = packv (tmps, ipad, gid, 7); - - opad[0] = packv (tmps, opad, gid, 0); - opad[1] = packv (tmps, opad, gid, 1); - opad[2] = packv (tmps, opad, gid, 2); - opad[3] = packv (tmps, opad, gid, 3); - opad[4] = packv (tmps, opad, gid, 4); - opad[5] = packv (tmps, opad, gid, 5); - opad[6] = packv (tmps, opad, gid, 6); - opad[7] = packv (tmps, opad, gid, 7); - - for (u32 i = 0; i < 8; i += 8) - { - u32x dgst[8]; - u32x out[8]; - - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); - dgst[5] = packv (tmps, dgst, gid, i + 5); - dgst[6] = packv (tmps, dgst, gid, i + 6); - dgst[7] = packv (tmps, dgst, gid, i + 7); - - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); - out[5] = packv (tmps, out, gid, i + 5); - out[6] = packv (tmps, out, gid, i + 6); - out[7] = packv (tmps, out, gid, i + 7); - - for (u32 j = 0; j < LOOP_CNT; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - unpackv (tmps, dgst, gid, i + 5, dgst[5]); - unpackv (tmps, dgst, gid, i + 6, dgst[6]); - unpackv (tmps, dgst, gid, i + 7, dgst[7]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); - unpackv (tmps, out, gid, i + 5, out[5]); - unpackv (tmps, out, gid, i + 6, out[6]); - unpackv (tmps, out, gid, i + 7, out[7]); - } -} - -KERNEL_FQ void m26620_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - const u64 gid = get_global_id (0); - const u64 lid = get_local_id (0); - const u64 lsz = get_local_size (0); - - /** - * aes shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } - - SYNC_THREADS (); - - #else - - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; - - #endif - - if (gid >= GID_CNT) return; - - // keys - - u32 ukey[8]; - - ukey[0] = tmps[gid].out[0]; - ukey[1] = tmps[gid].out[1]; - ukey[2] = tmps[gid].out[2]; - ukey[3] = tmps[gid].out[3]; - ukey[4] = tmps[gid].out[4]; - ukey[5] = tmps[gid].out[5]; - ukey[6] = tmps[gid].out[6]; - ukey[7] = tmps[gid].out[7]; - - u32 key_len = 32 * 8; - - u32 key[60] = { 0 }; - u32 subKey[4] = { 0 }; - - AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); - - // iv - - u32 iv[4]; - - iv[0] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[0]; - iv[1] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[1]; - iv[2] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[2]; - iv[3] = esalt_bufs[DIGESTS_OFFSET_HOST].iv_buf[3]; - - const u32 iv_len = esalt_bufs[DIGESTS_OFFSET_HOST].iv_len; - - u32 J0[4] = { 0 }; - - AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); - - // ct - - u32 T[4] = { 0 }; - u32 S[4] = { 0 }; - - u32 S_len = 16; - u32 aad_buf[4] = { 0 }; - u32 aad_len = 0; - - AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET_HOST].ct_buf, esalt_bufs[DIGESTS_OFFSET_HOST].ct_len, S); - - AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); - - /* compare tag */ - - const u32 r0 = T[0]; - const u32 r1 = T[1]; - const u32 r2 = T[2]; - const u32 r3 = T[3]; - - #define il_pos 0 - - #ifdef KERNEL_STATIC - #include COMPARE_M - #endif -} diff --git a/OpenCL/m26630-pure.cl b/OpenCL/m26630-pure.cl deleted file mode 100644 index 39727b730..000000000 --- a/OpenCL/m26630-pure.cl +++ /dev/null @@ -1,413 +0,0 @@ -/** - * Author......: See docs/credits.txt - * License.....: MIT - */ - -#define NEW_SIMD_CODE - -#ifdef KERNEL_STATIC -#include M2S(INCLUDE_PATH/inc_vendor.h) -#include M2S(INCLUDE_PATH/inc_types.h) -#include M2S(INCLUDE_PATH/inc_platform.cl) -#include M2S(INCLUDE_PATH/inc_common.cl) -#include M2S(INCLUDE_PATH/inc_simd.cl) -#include M2S(INCLUDE_PATH/inc_hash_sha256.cl) -#include M2S(INCLUDE_PATH/inc_cipher_aes.cl) -#include M2S(INCLUDE_PATH/inc_cipher_aes-gcm.cl) -#endif - -#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl) -#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl) - -typedef struct pbkdf2_sha256_tmp -{ - u32 ipad[8]; - u32 opad[8]; - - u32 dgst[32]; - u32 out[32]; - -} pbkdf2_sha256_tmp_t; - -typedef struct pbkdf2_sha256_aes_gcm -{ - u32 salt_buf[64]; - u32 iv_buf[4]; - u32 iv_len; - u32 ct_buf[16]; - u32 ct_len; - -} pbkdf2_sha256_aes_gcm_t; - -DECLSPEC void hmac_sha256_run_V (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *ipad, PRIVATE_AS u32x *opad, PRIVATE_AS u32x *digest) -{ - digest[0] = ipad[0]; - digest[1] = ipad[1]; - digest[2] = ipad[2]; - digest[3] = ipad[3]; - digest[4] = ipad[4]; - digest[5] = ipad[5]; - digest[6] = ipad[6]; - digest[7] = ipad[7]; - - sha256_transform_vector (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - digest[0] = opad[0]; - digest[1] = opad[1]; - digest[2] = opad[2]; - digest[3] = opad[3]; - digest[4] = opad[4]; - digest[5] = opad[5]; - digest[6] = opad[6]; - digest[7] = opad[7]; - - sha256_transform_vector (w0, w1, w2, w3, digest); -} - -KERNEL_FQ void m26630_init (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - /** - * base - */ - - const u64 gid = get_global_id (0); - - if (gid >= GID_CNT) return; - - sha256_hmac_ctx_t sha256_hmac_ctx; - - sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); - - tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; - tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; - tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; - tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; - tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; - tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; - tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; - tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; - - tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; - tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; - tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; - tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; - tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; - tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; - tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; - tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; - - sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, salt_bufs[SALT_POS_HOST].salt_len); - - for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) - { - sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; - - u32 w0[4]; - u32 w1[4]; - u32 w2[4]; - u32 w3[4]; - - w0[0] = j; - w0[1] = 0; - w0[2] = 0; - w0[3] = 0; - w1[0] = 0; - w1[1] = 0; - w1[2] = 0; - w1[3] = 0; - w2[0] = 0; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 0; - - sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4); - - sha256_hmac_final (&sha256_hmac_ctx2); - - tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; - tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; - tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; - tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; - tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; - tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; - tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; - tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; - - tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; - tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; - tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; - tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; - tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; - tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; - tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; - tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; - } -} - -KERNEL_FQ void m26630_loop (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - const u64 gid = get_global_id (0); - - if ((gid * VECT_SIZE) >= GID_CNT) return; - - u32x ipad[8]; - u32x opad[8]; - - ipad[0] = packv (tmps, ipad, gid, 0); - ipad[1] = packv (tmps, ipad, gid, 1); - ipad[2] = packv (tmps, ipad, gid, 2); - ipad[3] = packv (tmps, ipad, gid, 3); - ipad[4] = packv (tmps, ipad, gid, 4); - ipad[5] = packv (tmps, ipad, gid, 5); - ipad[6] = packv (tmps, ipad, gid, 6); - ipad[7] = packv (tmps, ipad, gid, 7); - - opad[0] = packv (tmps, opad, gid, 0); - opad[1] = packv (tmps, opad, gid, 1); - opad[2] = packv (tmps, opad, gid, 2); - opad[3] = packv (tmps, opad, gid, 3); - opad[4] = packv (tmps, opad, gid, 4); - opad[5] = packv (tmps, opad, gid, 5); - opad[6] = packv (tmps, opad, gid, 6); - opad[7] = packv (tmps, opad, gid, 7); - - for (u32 i = 0; i < 8; i += 8) - { - u32x dgst[8]; - u32x out[8]; - - dgst[0] = packv (tmps, dgst, gid, i + 0); - dgst[1] = packv (tmps, dgst, gid, i + 1); - dgst[2] = packv (tmps, dgst, gid, i + 2); - dgst[3] = packv (tmps, dgst, gid, i + 3); - dgst[4] = packv (tmps, dgst, gid, i + 4); - dgst[5] = packv (tmps, dgst, gid, i + 5); - dgst[6] = packv (tmps, dgst, gid, i + 6); - dgst[7] = packv (tmps, dgst, gid, i + 7); - - out[0] = packv (tmps, out, gid, i + 0); - out[1] = packv (tmps, out, gid, i + 1); - out[2] = packv (tmps, out, gid, i + 2); - out[3] = packv (tmps, out, gid, i + 3); - out[4] = packv (tmps, out, gid, i + 4); - out[5] = packv (tmps, out, gid, i + 5); - out[6] = packv (tmps, out, gid, i + 6); - out[7] = packv (tmps, out, gid, i + 7); - - for (u32 j = 0; j < LOOP_CNT; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = dgst[0]; - w0[1] = dgst[1]; - w0[2] = dgst[2]; - w0[3] = dgst[3]; - w1[0] = dgst[4]; - w1[1] = dgst[5]; - w1[2] = dgst[6]; - w1[3] = dgst[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = (64 + 32) * 8; - - hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); - - out[0] ^= dgst[0]; - out[1] ^= dgst[1]; - out[2] ^= dgst[2]; - out[3] ^= dgst[3]; - out[4] ^= dgst[4]; - out[5] ^= dgst[5]; - out[6] ^= dgst[6]; - out[7] ^= dgst[7]; - } - - unpackv (tmps, dgst, gid, i + 0, dgst[0]); - unpackv (tmps, dgst, gid, i + 1, dgst[1]); - unpackv (tmps, dgst, gid, i + 2, dgst[2]); - unpackv (tmps, dgst, gid, i + 3, dgst[3]); - unpackv (tmps, dgst, gid, i + 4, dgst[4]); - unpackv (tmps, dgst, gid, i + 5, dgst[5]); - unpackv (tmps, dgst, gid, i + 6, dgst[6]); - unpackv (tmps, dgst, gid, i + 7, dgst[7]); - - unpackv (tmps, out, gid, i + 0, out[0]); - unpackv (tmps, out, gid, i + 1, out[1]); - unpackv (tmps, out, gid, i + 2, out[2]); - unpackv (tmps, out, gid, i + 3, out[3]); - unpackv (tmps, out, gid, i + 4, out[4]); - unpackv (tmps, out, gid, i + 5, out[5]); - unpackv (tmps, out, gid, i + 6, out[6]); - unpackv (tmps, out, gid, i + 7, out[7]); - } -} - -KERNEL_FQ void m26630_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sha256_aes_gcm_t)) -{ - const u64 gid = get_global_id (0); - const u64 lid = get_local_id (0); - const u64 lsz = get_local_size (0); - - /** - * aes shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } - - SYNC_THREADS (); - - #else - - CONSTANT_AS u32a *s_te0 = te0; - CONSTANT_AS u32a *s_te1 = te1; - CONSTANT_AS u32a *s_te2 = te2; - CONSTANT_AS u32a *s_te3 = te3; - CONSTANT_AS u32a *s_te4 = te4; - - #endif - - if (gid >= GID_CNT) return; - - const u32 digest_pos = LOOP_POS; - - const u32 digest_cur = DIGESTS_OFFSET_HOST + digest_pos; - - GLOBAL_AS const pbkdf2_sha256_aes_gcm_t *pbkdf2_sha256_aes_gcm = &esalt_bufs[digest_cur]; - - // keys - - u32 ukey[8]; - - ukey[0] = tmps[gid].out[0]; - ukey[1] = tmps[gid].out[1]; - ukey[2] = tmps[gid].out[2]; - ukey[3] = tmps[gid].out[3]; - ukey[4] = tmps[gid].out[4]; - ukey[5] = tmps[gid].out[5]; - ukey[6] = tmps[gid].out[6]; - ukey[7] = tmps[gid].out[7]; - - u32 key_len = 32 * 8; - - u32 key[60] = { 0 }; - u32 subKey[4] = { 0 }; - - AES_GCM_Init (ukey, key_len, key, subKey, s_te0, s_te1, s_te2, s_te3, s_te4); - - // iv - - u32 iv[4]; - - iv[0] = pbkdf2_sha256_aes_gcm->iv_buf[0]; - iv[1] = pbkdf2_sha256_aes_gcm->iv_buf[1]; - iv[2] = pbkdf2_sha256_aes_gcm->iv_buf[2]; - iv[3] = pbkdf2_sha256_aes_gcm->iv_buf[3]; - - const u32 iv_len = pbkdf2_sha256_aes_gcm->iv_len; - - u32 J0[4] = { 0 }; - - AES_GCM_Prepare_J0 (iv, iv_len, subKey, J0); - - u32 ct[8]; - - ct[0] = pbkdf2_sha256_aes_gcm->ct_buf[0]; // first block of ciphertext - ct[1] = pbkdf2_sha256_aes_gcm->ct_buf[1]; - ct[2] = pbkdf2_sha256_aes_gcm->ct_buf[2]; - ct[3] = pbkdf2_sha256_aes_gcm->ct_buf[3]; - ct[4] = pbkdf2_sha256_aes_gcm->ct_buf[4]; // second block of ciphertext - ct[5] = pbkdf2_sha256_aes_gcm->ct_buf[5]; - ct[6] = pbkdf2_sha256_aes_gcm->ct_buf[6]; - ct[7] = pbkdf2_sha256_aes_gcm->ct_buf[7]; - - u32 pt[8] = { 0 }; - - AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4); - - const int correct = is_valid_printable_32 (pt[0]) - + is_valid_printable_32 (pt[1]) - + is_valid_printable_32 (pt[2]) - + is_valid_printable_32 (pt[3]) - + is_valid_printable_32 (pt[4]) - + is_valid_printable_32 (pt[5]) - + is_valid_printable_32 (pt[6]) - + is_valid_printable_32 (pt[7]); - - if (correct != 8) return; - - /* - const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte - // only we can't guarantee it is not in the middle of a UTF8 byte stream at that point - - if (hc_enc_scan (pt, pt_len)) - { - hc_enc_t hc_enc; - - hc_enc_init (&hc_enc); - - while (hc_enc_has_next (&hc_enc, pt_len)) - { - u32 enc_buf[16] = { 0 }; - - const int enc_len = hc_enc_next (&hc_enc, pt, pt_len, 32, enc_buf, sizeof (enc_buf)); - - if (enc_len == -1) return; - } - } - */ - - const u32 r0 = ct[0]; - const u32 r1 = ct[1]; - const u32 r2 = ct[2]; - const u32 r3 = ct[3]; - - #define il_pos 0 - - #ifdef KERNEL_STATIC - #include COMPARE_M - #endif -} From 9a14c99bb129fb6e67f4b5566b0d47a10c826499 Mon Sep 17 00:00:00 2001 From: its5Q Date: Sat, 24 Feb 2024 16:42:08 +1000 Subject: [PATCH 07/57] Update metamask2hashcat to use new format and add test vault JSON --- tools/2hashcat_tests/metamask2hashcat.withrounds.json | 1 + tools/metamask2hashcat.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 tools/2hashcat_tests/metamask2hashcat.withrounds.json diff --git a/tools/2hashcat_tests/metamask2hashcat.withrounds.json b/tools/2hashcat_tests/metamask2hashcat.withrounds.json new file mode 100644 index 000000000..b865647fd --- /dev/null +++ b/tools/2hashcat_tests/metamask2hashcat.withrounds.json @@ -0,0 +1 @@ +{"data":"G2Qsp8BtQNz9E1vFlSebykcGkps5iJ53fBjJV8GIE7eolAQpz5m25MTZv0b0t2tpqXvfgEYKYZKsoLvuWWRHqj2ommt+U0l6fEGPY6lTn5PYzDtMKNpPWHKCIS/QYhwEA9/X5RtyIwDL6VqcRp6owV+/icmRlQa+TI5buHWZ+99Z1kBbpTDVYLBwhMRsODM1vYizQDg0vFIo3cQDtpRWUqpAKXhFcpgRD+9grS3pP/zdlIUn//87DZ3ue6Sn6WFOe08EvuY8sYZqTiN3GxcfESOltNbZJGcedMubt/jGsk+qIwUWC/f456UgeX9DN7i2pQBBI+L7qkY5v1WT5Y0i8uho1c2M2G8M9miO8HSm/j4bpMN1J6lPtjFhfAzEPS1go1w2vTmOtr1y+2A4M6HEOcxIrPJ8lUUH9pcN7Xpd+u/tQv8BYFxd6RlNYKLyA6OChbF+TD5Zz6oFZQtkprXqzZUFOlxeWJ373kHMISZtXOx44YGaiT2178fXgHFXavw=","iv":"N1aDRjt2ZD5x15Q1X9zVUw==","keyMetadata":{"algorithm":"PBKDF2","params":{"iterations":600000}},"salt":"MBdUsmTcBHGCASECYr3gmD8XaJROwjhOegSWweCFhco="} diff --git a/tools/metamask2hashcat.py b/tools/metamask2hashcat.py index 823f528b6..319889c60 100644 --- a/tools/metamask2hashcat.py +++ b/tools/metamask2hashcat.py @@ -58,6 +58,7 @@ def metamask_parser(file, shortdata): iter_count = j['keyMetadata']['params']['iterations'] except KeyError: iter_count = 10_000 + if((len(j['data']) > 3000) or shortdata): data_bin = base64.b64decode(j['data']) # TODO limit data to 16 bytes, we only check the first block of data, so we don't need more data. @@ -66,12 +67,12 @@ def metamask_parser(file, shortdata): j['data'] = base64.b64encode(data_bin[0:64]).decode("ascii") if iter_count != 10000: - print('$metamask-short$' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + print('$metamask-short$rounds=' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) else: print('$metamask-short$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) else: if iter_count != 10000: - print('$metamask$' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) + print('$metamask$rounds=' + str(iter_count) + '$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) else: print('$metamask$' + j['salt'] + '$' + j['iv'] + '$' + j['data']) From 55ca7fb27b3167ca9fe31a3f1b580642960d23a5 Mon Sep 17 00:00:00 2001 From: holly-o <128481388+holly-o@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:19:27 +0100 Subject: [PATCH 08/57] Add bitlocker2hashcat.py For generating hashes for mode 22100. Hashcat supports modes $bitlocker$0$ and $bitlocker$1$ and therefore this script will output hashes that relate to a VMK protected by a user password only. Script created since bitlocker2john takes an extremely long time to run and sometimes fails to output a hash, or outputs incorrect information. Script created using information at https://github.com/libyal/libbde/blob/main/documentation/BitLocker%20Drive%20Encryption%20(BDE)%20format.asciidoc#encryption_methods and tested on Windows hard drives and USB sticks. --- tools/bitlocker2hashcat.py | 234 +++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 tools/bitlocker2hashcat.py diff --git a/tools/bitlocker2hashcat.py b/tools/bitlocker2hashcat.py new file mode 100644 index 000000000..56e185311 --- /dev/null +++ b/tools/bitlocker2hashcat.py @@ -0,0 +1,234 @@ +# Construct a hash for use with hashcat mode 22100 +# Usage: python3 bitlocker2hashcat.py -o +# Hashcat supports modes $bitlocker$0$ and $bitlocker$1$ and therefore this script will output hashes that relate to a VMK protected by a user password only. +# It is not possible to create a hash for VMKs protected by a TPM, and is infeasible to attempt to crack a hash of the recovery password. +# Refs: https://github.com/libyal/libbde/blob/main/documentation/BitLocker%20Drive%20Encryption%20(BDE)%20format.asciidoc#encryption_methods + +import argparse + +BITLOCKER_SIGNATURE = '-FVE-FS-' +BITLOCKER_TO_GO_SIGNATURE = 'MSWIN4.1' +BITLOCKER_GUIDS = {'4967D63B-2E29-4AD8-8399-F6A339E3D001' : 'BitLocker', '4967D63B-2E29-4AD8-8399-F6A339E3D01' : 'BitLocker To Go', '92A84D3B-DD80-4D0E-9E4E-B1E3284EAED8' : 'BitLocker Used Disk Space Only'} +PROTECTION_TYPES = {0x0: 'VMK protected with clear key', 0x100: 'VMK protected with TPM', 0x200: 'VMK protected with startup key', 0x500: 'VMK protected with TPM and PIN', 0x800: 'VMK protected with recovery password', 0x2000: 'VMK protected with password'} +FVE_ENTRY_TYPES = {0x0: 'None', 0x2: 'VMK', 0x3: 'FVEK', 0x4: 'Validation', 0x6: 'Startup key', 0x7: 'Computer description', 0xb: 'FVEK backup', 0xf: 'Volume header block'} +FVE_VALUE_TYPES = {0x0: 'Erased', 0x1: 'Key', 0x2: 'UTF-16 string', 0x3: 'Stretch key', 0x4: 'Use key', 0x5: 'AES-CCM encrypted key', 0x6: 'TPM encoded key', 0x7: 'Validation', 0x8: 'VMK', 0x9: 'External key', 0xa: 'Update', 0xb: 'Error', 0xf: 'Offset and size'} +ITERATION_COUNT = 0x100000 +BITLOCKER_HASH_VERSIONS = [0,1] # 0,1 both supported on hashcat +HASHCAT_HASH = [] + +def guid_to_hex(guid): + guid_parts = guid.split('-') + + search_target = ''.join([guid_parts[0][i:i+2] for i in range(0, len(guid_parts[0]), 2)][::-1]) + search_target += ''.join([guid_parts[1][i:i+2] for i in range(0, len(guid_parts[1]), 2)][::-1]) + search_target += ''.join([guid_parts[2][i:i+2] for i in range(0, len(guid_parts[2]), 2)][::-1]) + search_target += guid_parts[3] + search_target += guid_parts[4] + + return search_target + +def hex_to_guid(hex_str): + + guid_parts = [0] * 5 + guid_parts[0] = hex_str[0:8] + guid_parts[1] = hex_str[8:12] + guid_parts[2] = hex_str[12:16] + guid_parts[3] = hex_str[16:20] + guid_parts[4] = hex_str[20:] + + guid = ''.join([guid_parts[0][i:i+2] for i in range(0, len(guid_parts[0]), 2)][::-1]) + guid += '-' + guid += ''.join([guid_parts[1][i:i+2] for i in range(0, len(guid_parts[1]), 2)][::-1]) + guid += '-' + guid += ''.join([guid_parts[2][i:i+2] for i in range(0, len(guid_parts[2]), 2)][::-1]) + guid += '-' + guid += guid_parts[3] + guid += '-' + guid += guid_parts[4] + + return guid.upper() + +def uint_to_int(b): + return int(b[::-1].hex(), 16) + +def parse_FVEK(fvek_data): + print("\nParsing FVEK...") + nonce = fvek_data[:12] + mac = fvek_data[12:28] + enc_data = fvek_data[28:] + + print("Mac:", mac.hex()) + print("Nonce:", nonce.hex()) + print("Encrypted data:", enc_data.hex()) + + return nonce, mac, enc_data + +def parse_stretch_key(data): + print("\nParsing stretch key...") + encryption_method = hex(uint_to_int(data[0:4])) + salt = data[4:20] + print("Encryption method:", encryption_method) + print("Salt:", salt.hex()) + current_pos = 0 + aes_ccm_data = data[20:] + current_pos, data, value_type = parse_fve_metadata_entry(current_pos, aes_ccm_data) + nonce, mac, enc_data = parse_aes_ccm_encrypted_key(data) + + return salt, nonce, mac, enc_data + +def generate_hashcat_hash(salt, nonce, mac, enc_data): + print("\nFound hashcat hash!") + for version in BITLOCKER_HASH_VERSIONS: + generated_hash = f"$bitlocker${version}${len(salt)}${salt.hex()}${ITERATION_COUNT}${len(nonce)}${nonce.hex()}${len(mac + enc_data)}${(mac + enc_data).hex()}" + print(generated_hash) + HASHCAT_HASH.append(generated_hash) + +def parse_aes_ccm_encrypted_key(data): + print("Parsing AES CCM key...") + nonce, mac, enc_data = parse_FVEK(data) + return nonce, mac, enc_data + +def parse_description(data): + print("\nParsing description...") + print(f"Info: {data.decode('utf-16')}") + return + +def parse_volume_header_block(data): + print("\nParsing volume header block...") + block_offset = uint_to_int(data[0:8]) + block_size = uint_to_int(data[8:16]) + print(f"Block offset: {hex(block_offset)}") + print(f"Block size: {block_size}") + +def parse_VMK(VMK_data): + print("\nParsing VMK...") + guid = hex_to_guid(VMK_data[:16].hex()) + protection_type = uint_to_int(VMK_data[26:28]) + properties = VMK_data[28:] + print("GUID:", guid) + print(f"Protection type: {hex(protection_type)} = {PROTECTION_TYPES.get(protection_type)}") + + # only try parse properties if correct protection type + if protection_type == 0x2000: + current_pos = 28 + while current_pos < len(properties): + current_pos, data, value_type = parse_fve_metadata_entry(current_pos, VMK_data[current_pos:]) + if value_type == 0x3: + salt, strech_nonce, stretch_mac, stretch_enc_data = parse_stretch_key(data) + if value_type == 0x5: + nonce, mac, enc_data = parse_aes_ccm_encrypted_key(data) + generate_hashcat_hash(salt, nonce, mac, enc_data) + + return + +def parse_fve_metadata_block(block): + print('\nParsing FVE block...') + signature = block[0:8] + fve_metadata_header = block[64:64+48] + metadata_size = parse_fve_metadata_header(fve_metadata_header) + + entry_size = uint_to_int(block[112:114]) + current_pos = 112 + while current_pos < metadata_size: + current_pos, data, value_type = parse_fve_metadata_entry(current_pos, block[current_pos:current_pos+entry_size]) + if value_type == 0x2: + parse_description(data) + if value_type == 0x5: + parse_aes_ccm_encrypted_key(data) + if value_type == 0x8: + parse_VMK(data) + if value_type == 0xf: + parse_volume_header_block(data) + + try: + entry_size = uint_to_int(block[current_pos:current_pos+2]) + except: + return + +def parse_fve_metadata_entry(current_pos, block): + print("\nParsing FVE metadata entry...") + entry_size = uint_to_int(block[0:2]) + entry_type = uint_to_int(block[2:4]) + value_type = uint_to_int(block[4:6]) + version = hex(uint_to_int(block[6:8])) + data = block[8:entry_size] + + print(f"Entry size: {entry_size}") + print(f"Entry type: {hex(entry_type)} = {FVE_ENTRY_TYPES.get(entry_type)}") + print(f"Value type: {hex(value_type)} = {FVE_VALUE_TYPES.get(value_type)}") + + current_pos = current_pos + entry_size + + return current_pos, data, value_type + +def parse_fve_metadata_header(block): + print("\nParsing FVE metadata header...") + metadata_size = uint_to_int(block[0:4]) + volume_guid = hex_to_guid(block[16:32].hex()) + nonce_counter = uint_to_int(block[32:36]) + encryption_method = hex(uint_to_int(block[36:40])) + + print("Metadata size:", metadata_size) + print("Volume GUID:", volume_guid) + print("Encryption method:", encryption_method) + + return metadata_size + +def main(): + + p = argparse.ArgumentParser() + p.add_argument('image_path', help="Path to encrypted BitLocker image") + p.add_argument('-o', '--offset', help='Offset in image where BitLocker partition starts, default=0') + args = p.parse_args() + bitlocker_partition = args.image_path + + bitlocker_offset = 0 + if args.offset: + bitlocker_offset = int(args.offset) + + with open(bitlocker_partition, 'rb') as fp: + + fp.seek(bitlocker_offset) + boot_entry_point = fp.read(3) + + header = fp.read(8) + if header.decode('latin-1') not in [BITLOCKER_SIGNATURE, BITLOCKER_TO_GO_SIGNATURE]: + print("[!] Supplied image path is not a BitLocker partition. Try specifiying the offset of the BitLocker partition with -o") + exit() + print(f'[+] BitLocker signature found: {header.decode()}') + sector_size = uint_to_int(fp.read(2)) + + if header.decode('latin-1') == BITLOCKER_SIGNATURE: + guid_offset = 0xa0 + if header.decode('latin-1') == BITLOCKER_TO_GO_SIGNATURE: + guid_offset = 0x1a8 + + fp.seek(guid_offset + bitlocker_offset) + volume_guid = fp.read(16) + print(f'[+] Identified volume GUID: {hex_to_guid(volume_guid.hex())} = {BITLOCKER_GUIDS.get(hex_to_guid(volume_guid.hex()))}') + + # get FVE metadata block addresses + FVE_metadata_offsets = [hex(uint_to_int(fp.read(8)) + bitlocker_offset) for _ in range(3)] + print(f'[+] FVE metadata info found at offsets {FVE_metadata_offsets}') + + # all metadata blocks should be the same + for f in FVE_metadata_offsets: + + fp.seek(int(f, 16)) + FVE_metadata_block = fp.read(2048) + parse_fve_metadata_block(FVE_metadata_block) + + break + + if HASHCAT_HASH == []: + print("\nNo hashes associated with the user password found. Exiting...") + else: + print("\nThe following hashcat hashes were found:") + for bitlocker_hash in HASHCAT_HASH: + print(bitlocker_hash) + + return + + +if __name__ == "__main__": + main() \ No newline at end of file From 7c1688a266909c4987346dd8e9363bfee935aae0 Mon Sep 17 00:00:00 2001 From: holly-o <128481388+holly-o@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:07:01 +0100 Subject: [PATCH 09/57] Update tools/bitlocker2hashcat.py Co-authored-by: kgolawski --- tools/bitlocker2hashcat.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/bitlocker2hashcat.py b/tools/bitlocker2hashcat.py index 56e185311..19d0be2d7 100644 --- a/tools/bitlocker2hashcat.py +++ b/tools/bitlocker2hashcat.py @@ -178,13 +178,11 @@ def main(): p = argparse.ArgumentParser() p.add_argument('image_path', help="Path to encrypted BitLocker image") - p.add_argument('-o', '--offset', help='Offset in image where BitLocker partition starts, default=0') + p.add_argument('-o', '--offset', default=0, type=int, help='Offset in image where BitLocker partition starts') args = p.parse_args() bitlocker_partition = args.image_path - bitlocker_offset = 0 - if args.offset: - bitlocker_offset = int(args.offset) + bitlocker_offset = args.offset with open(bitlocker_partition, 'rb') as fp: From cbc21cf767a30105aef31cfe1e69407249445317 Mon Sep 17 00:00:00 2001 From: holly-o <128481388+holly-o@users.noreply.github.com> Date: Fri, 13 Sep 2024 15:08:31 +0100 Subject: [PATCH 10/57] Update tools/bitlocker2hashcat.py Co-authored-by: kgolawski --- tools/bitlocker2hashcat.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/bitlocker2hashcat.py b/tools/bitlocker2hashcat.py index 19d0be2d7..f7501a37b 100644 --- a/tools/bitlocker2hashcat.py +++ b/tools/bitlocker2hashcat.py @@ -29,12 +29,13 @@ def guid_to_hex(guid): def hex_to_guid(hex_str): - guid_parts = [0] * 5 - guid_parts[0] = hex_str[0:8] - guid_parts[1] = hex_str[8:12] - guid_parts[2] = hex_str[12:16] - guid_parts[3] = hex_str[16:20] - guid_parts[4] = hex_str[20:] + guid_parts = [ + hex_str[0:8], + hex_str[8:12], + hex_str[12:16], + hex_str[16:20], + hex_str[20:], + ] guid = ''.join([guid_parts[0][i:i+2] for i in range(0, len(guid_parts[0]), 2)][::-1]) guid += '-' From cb99fcf73b9c51c211a2ab72d47358327d8b8956 Mon Sep 17 00:00:00 2001 From: hashrepublic Date: Tue, 29 Oct 2024 07:27:11 +0100 Subject: [PATCH 11/57] fixed mode m21310 OpenCL types --- OpenCL/m21310_a1-pure.cl | 2 +- OpenCL/m21310_a3-pure.cl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL/m21310_a1-pure.cl b/OpenCL/m21310_a1-pure.cl index be8285e50..67a0eeb2d 100644 --- a/OpenCL/m21310_a1-pure.cl +++ b/OpenCL/m21310_a1-pure.cl @@ -37,7 +37,7 @@ typedef struct md5_double_salt } md5_double_salt_t; -KERNEL_FQ void m21310_mxx (KERN_ATTR_ESALT (md5_double_salt)) +KERNEL_FQ void m21310_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) { /** * modifier diff --git a/OpenCL/m21310_a3-pure.cl b/OpenCL/m21310_a3-pure.cl index 546e7defb..985d67cae 100644 --- a/OpenCL/m21310_a3-pure.cl +++ b/OpenCL/m21310_a3-pure.cl @@ -37,7 +37,7 @@ typedef struct md5_double_salt } md5_double_salt_t; -KERNEL_FQ void m21310_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt)) +KERNEL_FQ void m21310_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) { /** * modifier From bf338e0c12fbae21f78df5749a487723f626b5f3 Mon Sep 17 00:00:00 2001 From: hashrepublic Date: Tue, 29 Oct 2024 14:22:29 +0100 Subject: [PATCH 12/57] fixed m21310_sxx --- OpenCL/m21310_a1-pure.cl | 2 +- OpenCL/m21310_a3-pure.cl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/OpenCL/m21310_a1-pure.cl b/OpenCL/m21310_a1-pure.cl index 67a0eeb2d..006246416 100644 --- a/OpenCL/m21310_a1-pure.cl +++ b/OpenCL/m21310_a1-pure.cl @@ -149,7 +149,7 @@ KERNEL_FQ void m21310_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) } } -KERNEL_FQ void m21310_sxx (KERN_ATTR_ESALT (md5_double_salt)) +KERNEL_FQ void m21310_sxx (KERN_ATTR_ESALT (md5_double_salt_t)) { /** * modifier diff --git a/OpenCL/m21310_a3-pure.cl b/OpenCL/m21310_a3-pure.cl index 985d67cae..ce53dbc26 100644 --- a/OpenCL/m21310_a3-pure.cl +++ b/OpenCL/m21310_a3-pure.cl @@ -168,7 +168,7 @@ KERNEL_FQ void m21310_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) } } -KERNEL_FQ void m21310_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt)) +KERNEL_FQ void m21310_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) { /** * modifier From a80d68731ef2b33cc1c4325d2581b26de712429f Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Thu, 31 Oct 2024 18:04:58 +0100 Subject: [PATCH 13/57] Kernel: Renamed multiple defines in CAST cipher to fit expected naming convention of the C++ language standard --- OpenCL/inc_cipher_cast.cl | 80 +++++++++++++++++++-------------------- OpenCL/inc_cipher_cast.h | 22 +++++------ docs/changes.txt | 1 + 3 files changed, 50 insertions(+), 53 deletions(-) diff --git a/OpenCL/inc_cipher_cast.cl b/OpenCL/inc_cipher_cast.cl index e2c5a6fb1..f37b09072 100644 --- a/OpenCL/inc_cipher_cast.cl +++ b/OpenCL/inc_cipher_cast.cl @@ -569,22 +569,22 @@ DECLSPEC void Cast5Encrypt (PRIVATE_AS const u8 *inBlock, PRIVATE_AS u8 *outBloc u32 t; /* Do the work */ - _CAST_F1(l, r, 0, 16); - _CAST_F2(r, l, 1, 16); - _CAST_F3(l, r, 2, 16); - _CAST_F1(r, l, 3, 16); - _CAST_F2(l, r, 4, 16); - _CAST_F3(r, l, 5, 16); - _CAST_F1(l, r, 6, 16); - _CAST_F2(r, l, 7, 16); - _CAST_F3(l, r, 8, 16); - _CAST_F1(r, l, 9, 16); - _CAST_F2(l, r, 10, 16); - _CAST_F3(r, l, 11, 16); - _CAST_F1(l, r, 12, 16); - _CAST_F2(r, l, 13, 16); - _CAST_F3(l, r, 14, 16); - _CAST_F1(r, l, 15, 16); + CAST_F1(l, r, 0, 16); + CAST_F2(r, l, 1, 16); + CAST_F3(l, r, 2, 16); + CAST_F1(r, l, 3, 16); + CAST_F2(l, r, 4, 16); + CAST_F3(r, l, 5, 16); + CAST_F1(l, r, 6, 16); + CAST_F2(r, l, 7, 16); + CAST_F3(l, r, 8, 16); + CAST_F1(r, l, 9, 16); + CAST_F2(l, r, 10, 16); + CAST_F3(r, l, 11, 16); + CAST_F1(l, r, 12, 16); + CAST_F2(r, l, 13, 16); + CAST_F3(l, r, 14, 16); + CAST_F1(r, l, 15, 16); /* Put l,r into outblock */ PUT_UINT32BE(r, outBlock, 0); @@ -599,22 +599,22 @@ DECLSPEC void Cast5Decrypt (PRIVATE_AS const u8 *inBlock, PRIVATE_AS u8 *outBloc u32 t; /* Only do full 16 rounds if key length > 80 bits */ - _CAST_F1(r, l, 15, 16); - _CAST_F3(l, r, 14, 16); - _CAST_F2(r, l, 13, 16); - _CAST_F1(l, r, 12, 16); - _CAST_F3(r, l, 11, 16); - _CAST_F2(l, r, 10, 16); - _CAST_F1(r, l, 9, 16); - _CAST_F3(l, r, 8, 16); - _CAST_F2(r, l, 7, 16); - _CAST_F1(l, r, 6, 16); - _CAST_F3(r, l, 5, 16); - _CAST_F2(l, r, 4, 16); - _CAST_F1(r, l, 3, 16); - _CAST_F3(l, r, 2, 16); - _CAST_F2(r, l, 1, 16); - _CAST_F1(l, r, 0, 16); + CAST_F1(r, l, 15, 16); + CAST_F3(l, r, 14, 16); + CAST_F2(r, l, 13, 16); + CAST_F1(l, r, 12, 16); + CAST_F3(r, l, 11, 16); + CAST_F2(l, r, 10, 16); + CAST_F1(r, l, 9, 16); + CAST_F3(l, r, 8, 16); + CAST_F2(r, l, 7, 16); + CAST_F1(l, r, 6, 16); + CAST_F3(r, l, 5, 16); + CAST_F2(l, r, 4, 16); + CAST_F1(r, l, 3, 16); + CAST_F3(l, r, 2, 16); + CAST_F2(r, l, 1, 16); + CAST_F1(l, r, 0, 16); /* Put l,r into outblock */ PUT_UINT32BE(r, outBlock, 0); PUT_UINT32BE(l, outBlock, 4); @@ -633,8 +633,8 @@ DECLSPEC void Cast5SetKey (PRIVATE_AS CAST_KEY *key, u32 keylength, PRIVATE_AS c GET_UINT32BE(X[2], userKey, 8); GET_UINT32BE(X[3], userKey, 12); - #define x(i) GETBYTE(X[i/4], 3-i%4) - #define z(i) GETBYTE(Z[i/4], 3-i%4) + #define x(i) GETBYTE(X[i/4], 3-i%4) + #define z(i) GETBYTE(Z[i/4], 3-i%4) for (i=0; i<=16; i+=16) { // this part is copied directly from RFC 2144 (with some search and replace) by Wei Dai @@ -673,11 +673,11 @@ DECLSPEC void Cast5SetKey (PRIVATE_AS CAST_KEY *key, u32 keylength, PRIVATE_AS c } u32 data[32]; - for (i = 0; i < 16; i++) { - data[i * 2] = K[i]; - data[i * 2 + 1] = ((K[i + 16]) + 16) & 0x1f; // here only the lowest 5 bits are set.. - } - for (i=16; i<32; i++) - K[i] &= 0x1f; + for (i = 0; i < 16; i++) { + data[i * 2] = K[i]; + data[i * 2 + 1] = ((K[i + 16]) + 16) & 0x1f; // here only the lowest 5 bits are set.. + } + + for (i=16; i<32; i++) K[i] &= 0x1f; } diff --git a/OpenCL/inc_cipher_cast.h b/OpenCL/inc_cipher_cast.h index 7b96dc558..46a511ec7 100644 --- a/OpenCL/inc_cipher_cast.h +++ b/OpenCL/inc_cipher_cast.h @@ -1,7 +1,5 @@ - - -#ifndef _OPENCL_CAST_H -#define _OPENCL_CAST_H +#ifndef INC_CIPHER_CAST_H +#define INC_CIPHER_CAST_H // #include "opencl_misc.h" #define GET_UINT32BE(n, b, i) \ @@ -32,28 +30,26 @@ typedef struct { #define U8d(x) GETBYTE(x,0) /* CAST uses three different round functions */ -#define _CAST_f1(l, r, km, kr) \ +#define CAST_f1(l, r, km, kr) \ t = hc_rotl32_S(km + r, kr); \ l ^= ((s_S[0][U8a(t)] ^ s_S[1][U8b(t)]) - \ s_S[2][U8c(t)]) + s_S[3][U8d(t)]; -#define _CAST_f2(l, r, km, kr) \ +#define CAST_f2(l, r, km, kr) \ t = hc_rotl32_S(km ^ r, kr); \ l ^= ((s_S[0][U8a(t)] - s_S[1][U8b(t)]) + \ s_S[2][U8c(t)]) ^ s_S[3][U8d(t)]; -#define _CAST_f3(l, r, km, kr) \ +#define CAST_f3(l, r, km, kr) \ t = hc_rotl32_S(km - r, kr); \ l ^= ((s_S[0][U8a(t)] + s_S[1][U8b(t)]) ^ \ s_S[2][U8c(t)]) - s_S[3][U8d(t)]; -#define _CAST_F1(l, r, i, j) _CAST_f1(l, r, K[i], K[i+j]) -#define _CAST_F2(l, r, i, j) _CAST_f2(l, r, K[i], K[i+j]) -#define _CAST_F3(l, r, i, j) _CAST_f3(l, r, K[i], K[i+j]) - +#define CAST_F1(l, r, i, j) CAST_f1(l, r, K[i], K[i+j]) +#define CAST_F2(l, r, i, j) CAST_f2(l, r, K[i], K[i+j]) +#define CAST_F3(l, r, i, j) CAST_f3(l, r, K[i], K[i+j]) /* OpenSSL API compatibility */ #define CAST_set_key(ckey, len, key) Cast5SetKey(ckey, len, key) #define CAST_ecb_encrypt(in, out, ckey) Cast5Encrypt(in, out, ckey) #define CAST_ecb_decrypt(in, out, ckey) Cast5Decrypt(in, out, ckey) - -#endif /* _OPENCL_CAST_H */ +#endif /* INC_CIPHER_CAST_H */ diff --git a/docs/changes.txt b/docs/changes.txt index 283e3c0d4..aef8969c1 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -134,6 +134,7 @@ - Help: show supported hash-modes only with -hh - Makefile: prevent make failure with Apple Silicon in case of partial rebuild - Rules: Rename best64.rule to best66.rule and remove the unknown section from it +- Kernel: Renamed multiple defines in CAST cipher to fit expected naming convention of the C++ language standard * changes v6.2.5 -> v6.2.6 From 3a3453c9dd5300747b980e57e9c5bd38f3afd54f Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sun, 3 Nov 2024 03:23:20 +0100 Subject: [PATCH 14/57] Added hash-mode: md5(md5(md5($pass.$salt1)).$salt2) --- OpenCL/m32800_a0-pure.cl | 335 +++++++++++++++++++++++++++++++++ OpenCL/m32800_a1-pure.cl | 329 ++++++++++++++++++++++++++++++++ OpenCL/m32800_a3-pure.cl | 355 +++++++++++++++++++++++++++++++++++ docs/changes.txt | 1 + docs/readme.txt | 1 + src/modules/module_32800.c | 275 +++++++++++++++++++++++++++ tools/test_modules/m32800.pm | 46 +++++ 7 files changed, 1342 insertions(+) create mode 100644 OpenCL/m32800_a0-pure.cl create mode 100644 OpenCL/m32800_a1-pure.cl create mode 100644 OpenCL/m32800_a3-pure.cl create mode 100644 src/modules/module_32800.c create mode 100644 tools/test_modules/m32800.pm diff --git a/OpenCL/m32800_a0-pure.cl b/OpenCL/m32800_a0-pure.cl new file mode 100644 index 000000000..9df5a7b80 --- /dev/null +++ b/OpenCL/m32800_a0-pure.cl @@ -0,0 +1,335 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_rp.h) +#include M2S(INCLUDE_PATH/inc_rp.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_md5.cl) +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +typedef struct md5_double_salt +{ + u32 salt1_buf[64]; + int salt1_len; + + u32 salt2_buf[64]; + int salt2_len; + +} md5_double_salt_t; + +KERNEL_FQ void m32800_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc uppercase table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32 salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32 salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_update (&ctx0, salt1_buf, salt1_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update (&ctx, salt2_buf, salt2_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m32800_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc uppercase table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + COPY_PW (pws[gid]); + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32 salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32 salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + pw_t tmp = PASTE_PW; + + tmp.pw_len = apply_rules (rules_buf[il_pos].cmds, tmp.i, tmp.pw_len); + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update (&ctx0, tmp.i, tmp.pw_len); + + md5_update (&ctx0, salt1_buf, salt1_len); + + md5_final (&ctx0); + + u32 a = ctx0.h[0]; + u32 b = ctx0.h[1]; + u32 c = ctx0.h[2]; + u32 d = ctx0.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update (&ctx, salt2_buf, salt2_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m32800_a1-pure.cl b/OpenCL/m32800_a1-pure.cl new file mode 100644 index 000000000..756df73ab --- /dev/null +++ b/OpenCL/m32800_a1-pure.cl @@ -0,0 +1,329 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +//#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_scalar.cl) +#include M2S(INCLUDE_PATH/inc_hash_md5.cl) +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +typedef struct md5_double_salt +{ + u32 salt1_buf[64]; + int salt1_len; + + u32 salt2_buf[64]; + int salt2_len; + +} md5_double_salt_t; + +KERNEL_FQ void m32800_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc uppercase array + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32 salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32 salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx1, salt1_buf, salt1_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update (&ctx, salt2_buf, salt2_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_M_SCALAR (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m32800_sxx (KERN_ATTR_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /** + * bin2asc uppercase array + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32 salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32 salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * base + */ + + md5_ctx_t ctx0; + + md5_init (&ctx0); + + md5_update_global (&ctx0, pws[gid].i, pws[gid].pw_len); + + /** + * loop + */ + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos++) + { + md5_ctx_t ctx1 = ctx0; + + md5_update_global (&ctx1, combs_buf[il_pos].i, combs_buf[il_pos].pw_len); + + md5_update (&ctx1, salt1_buf, salt1_len); + + md5_final (&ctx1); + + u32 a = ctx1.h[0]; + u32 b = ctx1.h[1]; + u32 c = ctx1.h[2]; + u32 d = ctx1.h[3]; + + md5_ctx_t ctx; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update (&ctx, salt2_buf, salt2_len); + + md5_final (&ctx); + + const u32 r0 = ctx.h[DGST_R0]; + const u32 r1 = ctx.h[DGST_R1]; + const u32 r2 = ctx.h[DGST_R2]; + const u32 r3 = ctx.h[DGST_R3]; + + COMPARE_S_SCALAR (r0, r1, r2, r3); + } +} diff --git a/OpenCL/m32800_a3-pure.cl b/OpenCL/m32800_a3-pure.cl new file mode 100644 index 000000000..531785dde --- /dev/null +++ b/OpenCL/m32800_a3-pure.cl @@ -0,0 +1,355 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include M2S(INCLUDE_PATH/inc_vendor.h) +#include M2S(INCLUDE_PATH/inc_types.h) +#include M2S(INCLUDE_PATH/inc_platform.cl) +#include M2S(INCLUDE_PATH/inc_common.cl) +#include M2S(INCLUDE_PATH/inc_simd.cl) +#include M2S(INCLUDE_PATH/inc_hash_md5.cl) +#endif + +#if VECT_SIZE == 1 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i)]) +#elif VECT_SIZE == 2 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1]) +#elif VECT_SIZE == 4 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3]) +#elif VECT_SIZE == 8 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7]) +#elif VECT_SIZE == 16 +#define uint_to_hex_lower8(i) make_u32x (l_bin2asc[(i).s0], l_bin2asc[(i).s1], l_bin2asc[(i).s2], l_bin2asc[(i).s3], l_bin2asc[(i).s4], l_bin2asc[(i).s5], l_bin2asc[(i).s6], l_bin2asc[(i).s7], l_bin2asc[(i).s8], l_bin2asc[(i).s9], l_bin2asc[(i).sa], l_bin2asc[(i).sb], l_bin2asc[(i).sc], l_bin2asc[(i).sd], l_bin2asc[(i).se], l_bin2asc[(i).sf]) +#endif + +typedef struct md5_double_salt +{ + u32 salt1_buf[64]; + int salt1_len; + + u32 salt2_buf[64]; + int salt2_len; + +} md5_double_salt_t; + +KERNEL_FQ void m32800_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /* + * bin2asc uppercase table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32x salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32x salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_update_vector (&ctx0, salt1_buf, salt1_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update_vector (&ctx, salt2_buf, salt2_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_M_SIMD (r0, r1, r2, r3); + } +} + +KERNEL_FQ void m32800_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) +{ + /** + * modifier + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + + /* + * bin2asc uppercase table + */ + + LOCAL_VK u32 l_bin2asc[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + const u32 i0 = (i >> 0) & 15; + const u32 i1 = (i >> 4) & 15; + + l_bin2asc[i] = ((i0 < 10) ? '0' + i0 : 'a' - 10 + i0) << 8 + | ((i1 < 10) ? '0' + i1 : 'a' - 10 + i1) << 0; + } + + SYNC_THREADS (); + + if (gid >= GID_CNT) return; + + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R0], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R1], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R2], + digests_buf[DIGESTS_OFFSET_HOST].digest_buf[DGST_R3] + }; + + /** + * base + */ + + const u32 pw_len = pws[gid].pw_len; + + u32x w[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < pw_len; i += 4, idx += 1) + { + w[idx] = pws[gid].i[idx]; + } + + const u32 salt1_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_len; + + u32x salt1_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt1_len; i += 4, idx += 1) + { + salt1_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt1_buf[idx]; + } + + const u32 salt2_len = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_len; + + u32x salt2_buf[64] = { 0 }; + + for (u32 i = 0, idx = 0; i < salt2_len; i += 4, idx += 1) + { + salt2_buf[idx] = esalt_bufs[DIGESTS_OFFSET_HOST].salt2_buf[idx]; + } + + /** + * loop + */ + + u32x w0l = w[0]; + + for (u32 il_pos = 0; il_pos < IL_CNT; il_pos += VECT_SIZE) + { + const u32x w0r = words_buf_r[il_pos / VECT_SIZE]; + + const u32x w0 = w0l | w0r; + + w[0] = w0; + + md5_ctx_vector_t ctx0; + + md5_init_vector (&ctx0); + + md5_update_vector (&ctx0, w, pw_len); + + md5_update_vector (&ctx0, salt1_buf, salt1_len); + + md5_final_vector (&ctx0); + + u32x a = ctx0.h[0]; + u32x b = ctx0.h[1]; + u32x c = ctx0.h[2]; + u32x d = ctx0.h[3]; + + md5_ctx_vector_t ctx; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_final_vector (&ctx); + + a = ctx.h[0]; + b = ctx.h[1]; + c = ctx.h[2]; + d = ctx.h[3]; + + md5_init_vector (&ctx); + + ctx.w0[0] = uint_to_hex_lower8 ((a >> 0) & 255) << 0 + | uint_to_hex_lower8 ((a >> 8) & 255) << 16; + ctx.w0[1] = uint_to_hex_lower8 ((a >> 16) & 255) << 0 + | uint_to_hex_lower8 ((a >> 24) & 255) << 16; + ctx.w0[2] = uint_to_hex_lower8 ((b >> 0) & 255) << 0 + | uint_to_hex_lower8 ((b >> 8) & 255) << 16; + ctx.w0[3] = uint_to_hex_lower8 ((b >> 16) & 255) << 0 + | uint_to_hex_lower8 ((b >> 24) & 255) << 16; + ctx.w1[0] = uint_to_hex_lower8 ((c >> 0) & 255) << 0 + | uint_to_hex_lower8 ((c >> 8) & 255) << 16; + ctx.w1[1] = uint_to_hex_lower8 ((c >> 16) & 255) << 0 + | uint_to_hex_lower8 ((c >> 24) & 255) << 16; + ctx.w1[2] = uint_to_hex_lower8 ((d >> 0) & 255) << 0 + | uint_to_hex_lower8 ((d >> 8) & 255) << 16; + ctx.w1[3] = uint_to_hex_lower8 ((d >> 16) & 255) << 0 + | uint_to_hex_lower8 ((d >> 24) & 255) << 16; + + ctx.len = 32; + + md5_update_vector (&ctx, salt2_buf, salt2_len); + + md5_final_vector (&ctx); + + const u32x r0 = ctx.h[DGST_R0]; + const u32x r1 = ctx.h[DGST_R1]; + const u32x r2 = ctx.h[DGST_R2]; + const u32x r3 = ctx.h[DGST_R3]; + + COMPARE_S_SIMD (r0, r1, r2, r3); + } +} diff --git a/docs/changes.txt b/docs/changes.txt index 283e3c0d4..5cf5a62bc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -49,6 +49,7 @@ - Added hash-mode: md5(md5($salt).md5(md5($pass))) - Added hash-mode: Domain Cached Credentials 2 (DCC2), MS Cache 2, (NT) - Added hash-mode: Domain Cached Credentials (DCC), MS Cache (NT) +- Added hash-mode: md5(md5(md5($pass.$salt1)).$salt2) - Added hash-mode: md5(md5(md5($pass).$salt1).$salt2) - Added hash-mode: md5(md5(md5($pass)).$salt) - Added hash-mode: md5(sha1($pass.$salt)) diff --git a/docs/readme.txt b/docs/readme.txt index e5fd69f25..4a5bfa7a7 100644 --- a/docs/readme.txt +++ b/docs/readme.txt @@ -94,6 +94,7 @@ NVIDIA GPUs require "NVIDIA Driver" (440.64 or later) and "CUDA Toolkit" (9.0 or - md5(md5(md5($pass))) - md5(md5(md5($pass)).$salt) - md5(md5(md5($pass).$salt1).$salt2) +- md5(md5(md5($pass.$salt1)).$salt2) - md5(sha1($pass)) - md5(sha1($pass).$salt) - md5(sha1($pass).md5($pass).sha1($pass)) diff --git a/src/modules/module_32800.c b/src/modules/module_32800.c new file mode 100644 index 000000000..0d2469591 --- /dev/null +++ b/src/modules/module_32800.c @@ -0,0 +1,275 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" +#include "emu_inc_hash_md5.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_INSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 3; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 1; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_RAW_HASH_SALTED; +static const char *HASH_NAME = "md5(md5(md5($pass.$salt1)).$salt2)"; +static const u64 KERN_TYPE = 32800; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_PT_ADD80 + | OPTS_TYPE_PT_ADDBITS14; +static const u32 SALT_TYPE = SALT_TYPE_GENERIC; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "2c749af6c65cf3e82e5837e3056727f5:59331674906582121215362940957615121466283616005471:17254656838978443692786064919357750120910718779182716907569266"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +typedef struct md5_double_salt +{ + u32 salt1_buf[64]; + int salt1_len; + + u32 salt2_buf[64]; + int salt2_len; + +} md5_double_salt_t; + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (md5_double_salt_t); + + return esalt_size; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + md5_double_salt_t *md5_double_salt = (md5_double_salt_t *) esalt_buf; + + hc_token_t token; + + memset (&token, 0, sizeof (hc_token_t)); + + token.token_cnt = 3; + + token.sep[0] = hashconfig->separator; + token.len[0] = 32; + token.attr[0] = TOKEN_ATTR_FIXED_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + token.sep[1] = hashconfig->separator; + token.len_min[1] = SALT_MIN; + token.len_max[1] = SALT_MAX; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH; + + token.sep[2] = hashconfig->separator; + token.len_min[2] = SALT_MIN; + token.len_max[2] = SALT_MAX; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH; + + if (hashconfig->opts_type & OPTS_TYPE_ST_HEX) + { + token.len_min[1] *= 2; + token.len_max[1] *= 2; + + token.attr[1] |= TOKEN_ATTR_VERIFY_HEX; + + token.len_min[2] *= 2; + token.len_max[2] *= 2; + + token.attr[2] |= TOKEN_ATTR_VERIFY_HEX; + } + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *hash_pos = token.buf[0]; + + digest[0] = hex_to_u32 (hash_pos + 0); + digest[1] = hex_to_u32 (hash_pos + 8); + digest[2] = hex_to_u32 (hash_pos + 16); + digest[3] = hex_to_u32 (hash_pos + 24); + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + digest[0] -= MD5M_A; + digest[1] -= MD5M_B; + digest[2] -= MD5M_C; + digest[3] -= MD5M_D; + } + + const bool parse_rc1 = generic_salt_decode (hashconfig, token.buf[1], token.len[1], (u8 *) md5_double_salt->salt1_buf, &md5_double_salt->salt1_len); + + if (parse_rc1 == false) return (PARSER_SALT_LENGTH); + + const bool parse_rc2 = generic_salt_decode (hashconfig, token.buf[2], token.len[2], (u8 *) md5_double_salt->salt2_buf, &md5_double_salt->salt2_len); + + if (parse_rc2 == false) return (PARSER_SALT_LENGTH); + + // make salt sorter happy + + md5_ctx_t md5_ctx; + + md5_init (&md5_ctx); + md5_update (&md5_ctx, md5_double_salt->salt1_buf, md5_double_salt->salt1_len); + md5_update (&md5_ctx, md5_double_salt->salt2_buf, md5_double_salt->salt2_len); + md5_final (&md5_ctx); + + salt->salt_buf[0] = md5_ctx.h[0]; + salt->salt_buf[1] = md5_ctx.h[1]; + salt->salt_buf[2] = md5_ctx.h[2]; + salt->salt_buf[3] = md5_ctx.h[3]; + + salt->salt_len = 16; + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + const md5_double_salt_t *md5_double_salt = (const md5_double_salt_t *) esalt_buf; + + // we can not change anything in the original buffer, otherwise destroying sorting + // therefore create some local buffer + + u32 tmp[4]; + + tmp[0] = digest[0]; + tmp[1] = digest[1]; + tmp[2] = digest[2]; + tmp[3] = digest[3]; + + if (hashconfig->opti_type & OPTI_TYPE_OPTIMIZED_KERNEL) + { + tmp[0] += MD5M_A; + tmp[1] += MD5M_B; + tmp[2] += MD5M_C; + tmp[3] += MD5M_D; + } + + u8 *out_buf = (u8 *) line_buf; + + int out_len = 0; + + u32_to_hex (tmp[0], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[1], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[2], out_buf + out_len); out_len += 8; + u32_to_hex (tmp[3], out_buf + out_len); out_len += 8; + + out_buf[out_len] = hashconfig->separator; + + out_len += 1; + + out_len += generic_salt_encode (hashconfig, (const u8 *) md5_double_salt->salt1_buf, md5_double_salt->salt1_len, out_buf + out_len); + + out_buf[out_len] = hashconfig->separator; + + out_len += 1; + + out_len += generic_salt_encode (hashconfig, (const u8 *) md5_double_salt->salt2_buf, md5_double_salt->salt2_len, out_buf + out_len); + + return out_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_charset = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = MODULE_DEFAULT; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = MODULE_DEFAULT; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m32800.pm b/tools/test_modules/m32800.pm new file mode 100644 index 000000000..e283dcafd --- /dev/null +++ b/tools/test_modules/m32800.pm @@ -0,0 +1,46 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Digest::MD5 qw (md5_hex); + +sub module_constraints { [[0, 256], [0, 256], [-1, -1], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt1 = shift; + my $salt2 = shift || random_numeric_string (random_number (1, 255)); + + my $digest = md5_hex (md5_hex (md5_hex ($word . $salt1)) . $salt2); + + my $hash = sprintf ("%s:%s:%s", $digest, $salt1, $salt2); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $salt1, $salt2, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $salt1; + return unless defined $salt2; + return unless defined $word; + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, $salt1, $salt2); + + return ($new_hash, $word); +} + +1; From 3e10c363dedbe3d1630c607e79287a2df919b12b Mon Sep 17 00:00:00 2001 From: PenguinKeeper7 Date: Tue, 26 Nov 2024 00:08:26 +0000 Subject: [PATCH 15/57] Check additional blocks for safety --- OpenCL/m26610-pure.cl | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/OpenCL/m26610-pure.cl b/OpenCL/m26610-pure.cl index abd476395..cb02840f6 100644 --- a/OpenCL/m26610-pure.cl +++ b/OpenCL/m26610-pure.cl @@ -368,7 +368,7 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh AES_GCM_decrypt (key, J0, ct, 32, pt, s_te0, s_te1, s_te2, s_te3, s_te4); - const int correct = is_valid_printable_32 (pt[0]) + int correct = is_valid_printable_32 (pt[0]) + is_valid_printable_32 (pt[1]) + is_valid_printable_32 (pt[2]) + is_valid_printable_32 (pt[3]) @@ -379,6 +379,37 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh if (correct != 8) return; + u32 ct2[8]; + + ct2[0] = pbkdf2_sha256_aes_gcm->ct_buf[8]; // third block of ciphertext + ct2[1] = pbkdf2_sha256_aes_gcm->ct_buf[9]; + ct2[2] = pbkdf2_sha256_aes_gcm->ct_buf[10]; + ct2[3] = pbkdf2_sha256_aes_gcm->ct_buf[11]; + ct2[4] = pbkdf2_sha256_aes_gcm->ct_buf[12]; // fourth block of ciphertext + ct2[5] = pbkdf2_sha256_aes_gcm->ct_buf[13]; + ct2[6] = pbkdf2_sha256_aes_gcm->ct_buf[14]; + ct2[7] = pbkdf2_sha256_aes_gcm->ct_buf[15]; + + // Only a single increment as the previous AES_GCM_DECRYPT already does one for us + J0[3]++; + + u32 pt2[8] = { 0 }; + + AES_GCM_decrypt (key, J0, ct2, 32, pt2, s_te0, s_te1, s_te2, s_te3, s_te4); + + correct = is_valid_printable_32 (pt2[0]) + + is_valid_printable_32 (pt2[1]) + + is_valid_printable_32 (pt2[2]) + + is_valid_printable_32 (pt2[3]) + + is_valid_printable_32 (pt2[4]) + + is_valid_printable_32 (pt2[5]) + + is_valid_printable_32 (pt2[6]) + + is_valid_printable_32 (pt2[7]); + + // We need to check a second and third block to avoid extremely rare false-positives. See: + // https://github.com/hashcat/hashcat/issues/4121 + if (correct != 8) return; + /* const int pt_len = 28; // not using 32 byte but 28 because our UTF8 allows up to 4 byte per character and since we decrypt 32 byte // only we can't guarantee it is not in the middle of a UTF8 byte stream at that point From dcfa17100ea13b27f5bd18c8ed2bf1b99ce276e4 Mon Sep 17 00:00:00 2001 From: PenguinKeeper7 Date: Tue, 26 Nov 2024 00:09:53 +0000 Subject: [PATCH 16/57] Minor typo fix --- OpenCL/m26610-pure.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL/m26610-pure.cl b/OpenCL/m26610-pure.cl index cb02840f6..9815eb8dd 100644 --- a/OpenCL/m26610-pure.cl +++ b/OpenCL/m26610-pure.cl @@ -406,7 +406,7 @@ KERNEL_FQ void m26610_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh + is_valid_printable_32 (pt2[6]) + is_valid_printable_32 (pt2[7]); - // We need to check a second and third block to avoid extremely rare false-positives. See: + // We need to check a third and fourth block to avoid extremely rare false-positives. See: // https://github.com/hashcat/hashcat/issues/4121 if (correct != 8) return; From 475512a80e81b3c83ffe912a12e342424fd6f81c Mon Sep 17 00:00:00 2001 From: PenguinKeeper7 Date: Fri, 6 Dec 2024 05:43:10 +0000 Subject: [PATCH 17/57] Add [b] [c] and [f] functionality in pause state Add the ability to [b]ypass, [c]heckpoint and [f]inish an attack while paused. I'm not too sure why it was limited to only [r]unning states but have not found any bugs in testing --- src/status.c | 16 ++++++++++++++-- src/thread.c | 6 ------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/status.c b/src/status.c index 4e964fd3b..be64a0743 100644 --- a/src/status.c +++ b/src/status.c @@ -35,6 +35,8 @@ static const char *const ST_0013 = "Error"; static const char *const ST_0014 = "Aborted (Finish)"; static const char *const ST_0015 = "Running (Quit after attack requested)"; static const char *const ST_0016 = "Autodetect"; +static const char *const ST_0017 = "Paused (Checkpoint Quit requested)"; +static const char *const ST_0018 = "Paused (Quit after attack requested)"; static const char *const ST_9999 = "Unknown! Bug!"; static const char UNITS[7] = { ' ', 'k', 'M', 'G', 'T', 'P', 'E' }; @@ -262,8 +264,6 @@ const char *status_get_status_string (const hashcat_ctx_t *hashcat_ctx) const int devices_status = status_ctx->devices_status; - // special case: running but checkpoint quit requested - if (devices_status == STATUS_RUNNING) { if (status_ctx->checkpoint_shutdown == true) @@ -276,6 +276,18 @@ const char *status_get_status_string (const hashcat_ctx_t *hashcat_ctx) return ST_0015; } } + else if (devices_status == STATUS_PAUSED) + { + if (status_ctx->checkpoint_shutdown == true) + { + return ST_0017; + } + + if (status_ctx->finish_shutdown == true) + { + return ST_0018; + } + } switch (devices_status) { diff --git a/src/thread.c b/src/thread.c index c33d25b8d..bfad64c38 100644 --- a/src/thread.c +++ b/src/thread.c @@ -214,8 +214,6 @@ int bypass (hashcat_ctx_t *hashcat_ctx) { status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - if (status_ctx->devices_status != STATUS_RUNNING) return -1; - status_ctx->devices_status = STATUS_BYPASS; status_ctx->run_main_level1 = true; @@ -262,8 +260,6 @@ int stop_at_checkpoint (hashcat_ctx_t *hashcat_ctx) { status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - if (status_ctx->devices_status != STATUS_RUNNING) return -1; - // this feature only makes sense if --restore-disable was not specified restore_ctx_t *restore_ctx = hashcat_ctx->restore_ctx; @@ -305,8 +301,6 @@ int finish_after_attack (hashcat_ctx_t *hashcat_ctx) { status_ctx_t *status_ctx = hashcat_ctx->status_ctx; - if (status_ctx->devices_status != STATUS_RUNNING) return -1; - // Enable or Disable if (status_ctx->finish_shutdown == false) From 3617df2f2516688617c6267f53fb8d09c491cdf3 Mon Sep 17 00:00:00 2001 From: PenguinKeeper7 Date: Tue, 10 Dec 2024 07:57:41 +0000 Subject: [PATCH 18/57] Gracefully handle corrupt .gz archives --- src/combinator.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++ src/filehandling.c | 11 +++++++++ src/straight.c | 35 ++++++++++++++++++++++++++++ src/wordlist.c | 21 +++++++++++++---- 4 files changed, 121 insertions(+), 4 deletions(-) diff --git a/src/combinator.c b/src/combinator.c index 654ada03f..5ff52d8ac 100644 --- a/src/combinator.c +++ b/src/combinator.c @@ -96,6 +96,16 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc1 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile1); + + hc_fclose (&fp1); + hc_fclose (&fp2); + + return -1; + } + if (words1_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile1); @@ -122,6 +132,13 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc2 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile2); + + return -1; + } + if (words2_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile2); @@ -199,6 +216,16 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc1 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile1); + + hc_fclose (&fp1); + hc_fclose (&fp2); + + return -1; + } + if (words1_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile1); @@ -225,6 +252,13 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc2 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile2); + + return -1; + } + if (words2_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile2); @@ -330,6 +364,16 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc1 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile1); + + hc_fclose (&fp1); + hc_fclose (&fp2); + + return -1; + } + if (words1_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile1); @@ -356,6 +400,13 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc2 == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile2); + + return -1; + } + if (words2_cnt == 0) { event_log_error (hashcat_ctx, "%s: empty file.", dictfile2); @@ -412,6 +463,13 @@ int combinator_ctx_init (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", dictfile); + + return -1; + } + combinator_ctx->combs_cnt = words_cnt; combinator_ctx->combs_mode = COMBINATOR_MODE_BASE_LEFT; } diff --git a/src/filehandling.c b/src/filehandling.c index 9a2d0ea39..712ccdb66 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -410,6 +410,17 @@ size_t hc_fread (void *ptr, size_t size, size_t nmemb, HCFILE *fp) else if (fp->gfp) { n = gzfread (ptr, size, nmemb, fp->gfp); + + // Double check to make sure that it successfully read 0 bytes instead of erroring + if (n == 0) + { + int errnum; + gzerror (fp->gfp, &errnum); + if (errnum != Z_OK) + { + return (size_t) -1; + } + } } else if (fp->ufp) { diff --git a/src/straight.c b/src/straight.c index 36d3a8eb8..7a75a7d66 100644 --- a/src/straight.c +++ b/src/straight.c @@ -91,6 +91,13 @@ int straight_ctx_update_loop (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", straight_ctx->dict); + + return -1; + } + if (status_ctx->words_cnt == 0) { logfile_sub_msg ("STOP"); @@ -125,6 +132,13 @@ int straight_ctx_update_loop (hashcat_ctx_t *hashcat_ctx) return -1; } + + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", combinator_ctx->dict1); + + return -1; + } } else if (combinator_ctx->combs_mode == COMBINATOR_MODE_BASE_RIGHT) { @@ -147,6 +161,13 @@ int straight_ctx_update_loop (hashcat_ctx_t *hashcat_ctx) return -1; } + + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", combinator_ctx->dict2); + + return -1; + } } if (status_ctx->words_cnt == 0) @@ -194,6 +215,13 @@ int straight_ctx_update_loop (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", straight_ctx->dict); + + return -1; + } + if (status_ctx->words_cnt == 0) { logfile_sub_msg ("STOP"); @@ -234,6 +262,13 @@ int straight_ctx_update_loop (hashcat_ctx_t *hashcat_ctx) return -1; } + if (rc == -2) + { + event_log_error (hashcat_ctx, "Error reading wordlist: %s", straight_ctx->dict); + + return -1; + } + if ((status_ctx->words_cnt / straight_ctx->kernel_rules_cnt) != hashes->salts_cnt) { event_log_error (hashcat_ctx, "Number of words in wordlist '%s' is not in sync with number of unique salts", straight_ctx->dict); diff --git a/src/wordlist.c b/src/wordlist.c index 1776b4885..0511381b6 100644 --- a/src/wordlist.c +++ b/src/wordlist.c @@ -60,6 +60,11 @@ int load_segment (hashcat_ctx_t *hashcat_ctx, HCFILE *fp) wl_data->cnt = hc_fread (wl_data->buf, 1, wl_data->incr - 1000, fp); + if (wl_data->cnt == (size_t) -1) + { + return -1; + } + wl_data->buf[wl_data->cnt] = 0; if (wl_data->cnt == 0) return 0; @@ -339,7 +344,12 @@ void get_next_word (hashcat_ctx_t *hashcat_ctx, HCFILE *fp, char **out_buf, u32 return; } - load_segment (hashcat_ctx, fp); + if (load_segment (hashcat_ctx, fp) == -1) + { + event_log_error (hashcat_ctx, "Error reading file!\n"); + + return; + } get_next_word (hashcat_ctx, fp, out_buf, out_len); } @@ -559,9 +569,12 @@ int count_words (hashcat_ctx_t *hashcat_ctx, HCFILE *fp, const char *dictfile, u u64 cnt2 = 0; while (!hc_feof (fp)) - { - load_segment (hashcat_ctx, fp); - + { + if (load_segment (hashcat_ctx, fp) == -1) + { + return -2; + } + comp += wl_data->cnt; u64 i = 0; From 4395d1467812918a8b0ec0019a7b154d9a47dbfa Mon Sep 17 00:00:00 2001 From: Romke van Dijk Date: Sun, 22 Dec 2024 19:27:19 +0100 Subject: [PATCH 19/57] Adding keybag2hashcat --- tools/keybag2hashcat.py | 171 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 tools/keybag2hashcat.py diff --git a/tools/keybag2hashcat.py b/tools/keybag2hashcat.py new file mode 100644 index 000000000..83da25c5e --- /dev/null +++ b/tools/keybag2hashcat.py @@ -0,0 +1,171 @@ +import argparse +import logging +import sys + +__VERSION__ = '1.0.0' + +# Set up logging +logger = logging.getLogger("keybag_logger") +handler = logging.StreamHandler() +formatter = logging.Formatter('%(message)s') +handler.setFormatter(formatter) +logger.addHandler(handler) + +class Keybag: + def __init__(self, file_obj): + self.size = 0 + self.uuid = '' + self.version = 0 + self.type = 0 + self.hmackey = '' + self.wrap = 0 + self.salt = '' + self.iterations = 0 + + + self._read_header(file_obj) + self.class_keys = self._read_class_keys(file_obj) + + def _read_header(self, file_obj): + while True: + tag = file_obj.read(4).decode('ascii') + if tag == 'DATA': # DATA + self.size = int.from_bytes(file_obj.read(4), byteorder='big') + else: + length = int.from_bytes(file_obj.read(4), byteorder='big') + data = file_obj.read(length) + + if tag == 'VERS': # VERS + self.version = int.from_bytes(data, byteorder='big') + elif tag == 'TYPE': + self.type = int.from_bytes(data, byteorder='big') + elif tag == 'UUID': + if not self.uuid: + self.uuid = data.hex() + else: + file_obj.seek(-length - 8, 1) + break + elif tag == 'HMCK': + self.hmackey = data.hex() + elif tag == 'WRAP': + self.wrap = int.from_bytes(data, byteorder='big') + elif tag == 'SALT': + self.salt = data.hex() + elif tag == 'ITER': + self.iterations = int.from_bytes(data, byteorder='big') + + def _read_class_keys(self, file_obj): + class_keys = {} + + for x in range(0, 10): + stop = False + while stop != True: + tag = file_obj.read(4).decode('ascii') + length = int.from_bytes(file_obj.read(4), byteorder='big') + data = file_obj.read(length) + # new class key + if tag == 'UUID': + if class_keys.get(x): + if class_keys[x].get('UUID'): + file_obj.seek(-length - 8, 1) + stop = True + else: + class_keys[x] = {} + else: + class_keys[x] = {} + if tag == 'WRAP' or tag == 'CLAS' or tag == 'KTYP': + class_keys[x][tag] = int.from_bytes(data, byteorder='big') + else: + class_keys[x][tag] = data.hex() + if file_obj.tell() > self.size: + stop = True + return class_keys + + + def print_keybag(self): + logger.debug(f'SIZE: {self.size}') + logger.debug(f'VERSION: {self.version}') + logger.debug(f'TYPE: {self.type}') + logger.debug(f'UUID: {self.uuid}') + logger.debug(f'HMACKEY: {self.hmackey}') + logger.debug(f'SALT: {self.salt}') + logger.debug(f'ITERATIONS: {self.iterations}') + for x, class_key in self.class_keys.items(): + logger.debug(f'{x}:') + for key, value in class_key.items(): + logger.debug(f' {key}: {value}') + +def main(): + # Create the argument parser + parser = argparse.ArgumentParser(description="Process a keybag file with a specified UID.") + + # Add the UID argument + parser.add_argument( + '--uid', + type=str, + required=True, + help="Specify the device UID." + ) + + # Add the keybag file argument + parser.add_argument( + 'keybag', + type=str, + help="Path to the keybag file." + ) + + # Add the debug flag + parser.add_argument( + '--debug', + action='store_true', + help="Enable debug logging." + ) + + # Parse the arguments + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.WARNING) + + # Access the arguments + uid = args.uid[0:32] + keybag_path = args.keybag + + logger.debug(f'keybag2hashcat - version {__VERSION__}') + + with open(keybag_path, 'br') as keybag_file: + kb = Keybag(keybag_file) + kb.print_keybag() + if not kb.version: + logger.error('Unable to detect version of keybag, exiting.') + sys.exit(1) + if not kb.salt: + logger.error('Unable to detect salt, exiting.') + sys.exit(1) + if not kb.iterations: + logger.error('Unable to detect iterations, exiting.') + sys.exit(1) + if not kb.version in [3, 4]: + logger.error(f'This script has not been tested with version {kb.version}.') + sys.exit(1) + if not kb.class_keys: + logger.error(f'Unable to parse class keys, exiting.') + sys.exit(1) + classkey1 = 0 + for x, class_key in kb.class_keys.items(): + if class_key.get('WRAP') == 3: + class_type = class_key.get('CLAS') + if class_type == 1 or class_type == 33: + classkey1 = class_key.get('WPKY') + + if not classkey1: + logger.error(f'Unable to find a classkey of class NSFileProtectionComplete.') + logger.error(f'You could try to get another class key, make sure it is ktyp 0 and wrap 3.') + exit(1) + print(f'$uido${uid}${kb.salt}${kb.iterations}${classkey1}') + + +if __name__ == "__main__": + main() From 08514edd22c10d21ba329317b0985753a695396f Mon Sep 17 00:00:00 2001 From: unix-ninja Date: Sat, 15 Feb 2025 22:29:38 -0500 Subject: [PATCH 20/57] Ignore .DS_Store files. --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index d8ff600bb..f7d1967e3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.DS_Store +*/.DS_Store *.exe *.bin *.app From faa680fbab803723d77449b7107c1c985a6b7981 Mon Sep 17 00:00:00 2001 From: unix-ninja Date: Sat, 15 Feb 2025 22:30:05 -0500 Subject: [PATCH 21/57] Add gitea2hashcat.py --- tools/gitea2hashcat.py | 75 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100755 tools/gitea2hashcat.py diff --git a/tools/gitea2hashcat.py b/tools/gitea2hashcat.py new file mode 100755 index 000000000..ac1c539d2 --- /dev/null +++ b/tools/gitea2hashcat.py @@ -0,0 +1,75 @@ +#!/usr/bin/python3 +# Converts gitea PBKDF2-HMAC-SHA256 hashes into a format hashcat can use +# written by unix-ninja + +import argparse +import base64 +import sys + +def convert_hash(hash_string): + """Converts a SALT+HASH string to a hashcat compatible format, + ensuring the smaller input is treated as the salt. + Use : or | as delimeters. + """ + hash_string = hash_string.replace('|', ':') + try: + part1, part2 = hash_string.split(":") + except ValueError: + print(f"[-] Invalid input format: {hash_string}") + return None + + try: + bytes1 = bytes.fromhex(part1) + bytes2 = bytes.fromhex(part2) + except ValueError: + print(f"[-] Invalid hex input: {hash_string}") + return None + + # If lengths are equal, we will maintain the original order + if len(bytes1) > len(bytes2): + salt_bytes = bytes2 + hash_bytes = bytes1 + else: + salt_bytes = bytes1 + hash_bytes = bytes2 + + + salt_b64 = base64.b64encode(salt_bytes).decode('utf-8') + hash_b64 = base64.b64encode(hash_bytes).decode('utf-8') + + return f"sha256:50000:{salt_b64}:{hash_b64}" + + +def main(): + parser = argparse.ArgumentParser(description="Convert Gitea SALT+HASH strings to a hashcat-compatible format.", + formatter_class=argparse.RawTextHelpFormatter, + epilog="""Example: + gitea2hashcat.py : | ... or pipe input from stdin. + + You can also dump output straight from sqlite into this script: + sqlite3 gitea.db 'select salt,passwd from user;' | gitea2hashcat.py""") + parser.add_argument('hashes', nargs='*', help='SALT+HASH strings to convert') + args = parser.parse_args() + + # ... (rest of the main function remains the same) + print("[+] Run the output hashes through hashcat mode 10900 (PBKDF2-HMAC-SHA256)") + print() + + if args.hashes: + # Process command-line arguments + for hash_string in args.hashes: + converted_hash = convert_hash(hash_string) + if converted_hash: + print(converted_hash) + + else: + # Process input from stdin + for line in sys.stdin: + hash_string = line.strip() # Remove leading/trailing whitespace + converted_hash = convert_hash(hash_string) + if converted_hash: + print(converted_hash) + + +if __name__ == "__main__": + main() From 7509c6f70abcbf55fa35e17e8608ae2779fd9760 Mon Sep 17 00:00:00 2001 From: holly-o <128481388+holly-o@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:03:42 +0000 Subject: [PATCH 22/57] Add apfs2hashcat.py --- tools/apfs2hashcat.py | 434 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 tools/apfs2hashcat.py diff --git a/tools/apfs2hashcat.py b/tools/apfs2hashcat.py new file mode 100644 index 000000000..d310dd43e --- /dev/null +++ b/tools/apfs2hashcat.py @@ -0,0 +1,434 @@ +#!/usr/bin/env python3 + +# For extracting APFS hashes to be cracked by hashcat modes 18300 ($fvde$2$) or 16700 ($fvde$1$). +# Usage: `python3 apfs2hashcat.py -o <_apfs_container_offset>` +# The argument -o is optional. The script will attempt to read the partition table to find the location of APFS container(s). In the case that the partition table is missing or you want to specify a particular APFS container, use -o to provide the offset to the start of the container. + +import argparse +from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes + +# KNOWN_RECOVERY_HASHES = ['ebc6c064000011aaaa1100306543ecac', 'ec1c2ad9b6184ed6bd8d50f361c27507'] +KNOWN_RECOVERY_HASHES = ['64C0C6EB-0000-AA11-AA11-00306543ECAC', 'D92A1CEC-18B6-D64E-BD8D-50F361C27507'] +TAG_DICT = {'unk_80' : {'tag' : b'\x80', 'expected_len' : 1}, + 'uuid' : {'tag' : b'\x81', 'expected_len' : 0x10}, + 'unk_82' : {'tag' : b'\x82'}, + 'wrapped_kek' : {'tag' : b'\x83', 'expected_len' : 0x28}, + 'iterations' : {'tag' : b'\x84'}, + 'salt' : {'tag' : b'\x85', 'expected_len' : 0x10}} +# HEX_APFS_CONTAINER_GUID = 'ef57347c0000aa11aa1100306543ecac' +HEX_APFS_CONTAINER_GUID = '7C3457EF-0000-11AA-AA11-00306543ECAC' +AES_XTS_SECTOR_SIZE = 512 +EFI_PARTITION_HEADER = b'EFI PART' + +def uint_to_int(b): + return int(b[::-1].hex(), 16) + + +def findall(p, s): + i = s.find(p) + while i != -1: + yield i + i = s.find(p, i+1) + + +def hex_to_guid(hex_str): + + guid_parts = [0] * 5 + guid_parts[0] = hex_str[0:8] + guid_parts[1] = hex_str[8:12] + guid_parts[2] = hex_str[12:16] + guid_parts[3] = hex_str[16:20] + guid_parts[4] = hex_str[20:] + + guid = ''.join([guid_parts[0][i:i+2] for i in range(0, len(guid_parts[0]), 2)][::-1]) + guid += '-' + guid += ''.join([guid_parts[1][i:i+2] for i in range(0, len(guid_parts[1]), 2)][::-1]) + guid += '-' + guid += ''.join([guid_parts[2][i:i+2] for i in range(0, len(guid_parts[2]), 2)][::-1]) + guid += '-' + guid += guid_parts[3] + guid += '-' + guid += guid_parts[4] + + return guid.upper() + + +def parse_partition_entry(partition_entry): + type_GUID = partition_entry[0:0x10] + part_GUID = partition_entry[0x10:0x20] + start_LBA = partition_entry[0x20:0x28] + # end_LBA = partition_entry[0x28:0x30] + return part_GUID, type_GUID, start_LBA + + +# get main_start by multiplying apfs partition start lba by block size +def parse_partition_table(fp): + + # determine whether sector size is 0x200 or 0x1000 + sector_size = 0x0 + + # look for EFI PART at start of sector 1 + fp.seek(0x200) + signature = fp.read(0x8) + if signature == EFI_PARTITION_HEADER: + sector_size = 0x200 + + else: + fp.seek(0x1000) + signature = fp.read(0x8) + if signature == EFI_PARTITION_HEADER: + sector_size = 0x1000 + + print("[+] Identified sector size:", sector_size) + + if not sector_size: + print(f"[!] Invalid sector size {sector_size} (not 512 or 4096 bytes). Exiting.") + + fp.seek(2 * sector_size) # go to sector 2 + partitions = [] + partition_entry = b'1' + while any(partition_entry): + partition_entry = fp.read(0x80) + if any(partition_entry): + partitions.append(partition_entry) + + partition_dict = {} + for p in partitions: + part_GUID, type_GUID, start = parse_partition_entry(p) + starting_pos = uint_to_int(start) * sector_size + partition_dict[part_GUID.hex()] = {'start':starting_pos, 'partition_type':type_GUID.hex()} + + return partition_dict + + +def AES_XTS_decrypt_sector(uuid, tweak, ct): + + decryptor = Cipher( + algorithms.AES(key=uuid+uuid), + modes.XTS(tweak=tweak), + ).decryptor() + pt = decryptor.update(ct) + decryptor.finalize() + + return pt + + +def AES_decrypt(data, start_offset, block_size, uuid): + cs_factor = block_size // 0x200 # = 8 for block_size=4096 + uno = start_offset * cs_factor + pt = b'' + for offset in range(0, block_size, AES_XTS_SECTOR_SIZE): + ct = data[offset:offset + AES_XTS_SECTOR_SIZE] + tweak = hex(uno)[2:].zfill(32) # 32 so that the key is the correct length (16 bytes) + tweak = bytearray.fromhex(tweak)[::-1] + pt += AES_XTS_decrypt_sector(uuid, tweak, ct) + uno += 1 + + return pt + + +def TLV(full_kek_blob, tag, starting_index): + # expected tag should follow if this is the correct TLV) + if full_kek_blob[starting_index:starting_index+1] != TAG_DICT[tag]['tag']: + return -1, starting_index + # check for expected len for further confirmation + length = uint_to_int(full_kek_blob[starting_index+1:starting_index+2]) + expected_len = TAG_DICT[tag].get('expected_len') # use .get() since not all tags have an expected len + if expected_len: + if length != expected_len: + return -1, starting_index + next_starting_index = starting_index+2+length + value = full_kek_blob[starting_index+2:next_starting_index] + + return value, next_starting_index + + +def TLV_iterate(starting_index, pt, hash_set, volume_uuid): + for tag in TAG_DICT: + value, starting_index = TLV(pt, tag, starting_index) + + # i.e. if fails length check + if value == -1: + return starting_index + 1, hash_set + TAG_DICT[tag]['value'] = value + + aes_type = TAG_DICT['unk_82']['value'] + wrapped_kek = TAG_DICT['wrapped_kek']['value'] + iterations = TAG_DICT['iterations']['value'] + salt = TAG_DICT['salt']['value'] + + aes_type = uint_to_int(aes_type[0:4]) + + # FVDE - AES128 + if aes_type == 2: + aes_hash_value = 1 + wrapped_kek = wrapped_kek[:0x18] # shorter kek value, this removes zeros + + # APFS - AES256 + elif aes_type == 16 or aes_type == 0: + aes_hash_value = 2 + + else: + print("[!] AES type not recognised, continuing...") + return + + password_hash = f"$fvde${aes_hash_value}${len(salt)}${salt.hex()}${int(iterations.hex(),16)}${wrapped_kek.hex()}" + hash_set.add(password_hash) + print(f"\nFound password hash: {password_hash} (vol uuid: {volume_uuid.hex()})") + + kek_uuid = hex_to_guid(TAG_DICT['uuid']['value'].hex()) + if kek_uuid in KNOWN_RECOVERY_HASHES: + print(f"[!] Warning! Recognised UUID {kek_uuid}... possible recovery hash\n") + + return starting_index, hash_set + + +def parse_block(block): + nx_xid = uint_to_int(block[16:24]) + obj_type = uint_to_int(block[24:26]) + magic = block[0x20:0x24] + + return nx_xid, obj_type, magic + + +def parse_apsb_block(block): + obj_type = uint_to_int(block[24:26]) + magic = block[0x20:0x24] + uuid = block[240:256] + encryption = uint_to_int(block[264:272]) + name = block[704:960] + + return obj_type, magic, uuid, encryption, name + + +def parse_keybag_entry(uuid, pt): + uuid_iterator = findall(uuid, pt) + for starting_pos in uuid_iterator: + ke_uuid, ke_tag, ke_keylen = pt[starting_pos:starting_pos+16], uint_to_int(pt[starting_pos + 16:starting_pos + 18]), uint_to_int(pt[starting_pos + 18:starting_pos + 20]) + padding = pt[starting_pos + 20:starting_pos + 24] + keydata = pt[starting_pos + 24: starting_pos + 24 + ke_keylen] + + # only tag 3 is needed for constructing the hash + if ke_tag == 3: + assert padding == b'\x00\x00\x00\x00' + volume_unlock_record = keydata + return volume_unlock_record + + return None + + +def get_fs_oids(csb_body): + max_file_systems = uint_to_int(csb_body[0x94:0x98]) + fs_oids = set() + for fs_entry in range(max_file_systems): + oid_start = 0x98 + 8 * fs_entry + fs_oid = uint_to_int(csb_body[oid_start:oid_start + 8]) + if not fs_oid: + continue + fs_oids.add(fs_oid) + + return fs_oids + + +def parse_csb(csb): + csb_body = csb[0x20:0x568] + + header = csb_body[:4] # 'NXSB' + assert header == b'NXSB' + block_size = uint_to_int(csb_body[4:8]) # default is 4096 + uuid = csb_body[0x28:0x38] # used as key for unwrapping + omap_oid = uint_to_int(csb_body[0x80:0x88]) # omap_oid to locate the omap to find volume offsets + fs_oids = get_fs_oids(csb_body) + + # locate container's keybag using nx_keylocker field + keylocker_paddr = uint_to_int(csb_body[0x4f0:0x4f8]) + + # block info for iterating to find most recent csb + xp_desc_blocks = uint_to_int(csb_body[0x48:0x4b]) + xp_desc_base = uint_to_int(csb_body[0x50:0x54]) + + return block_size, uuid, keylocker_paddr, omap_oid, fs_oids, xp_desc_base, xp_desc_blocks + + +def get_offset_from_oid(oid, apfs_start, block_size): + return apfs_start + oid * block_size + + +def parse_tree(tree, fs_oids, block_size): + + volume_addresses = [] + + # get key data from TOC: + table_space_offset = uint_to_int(tree[0x28:0x2a]) + table_space_len = uint_to_int(tree[0x2a:0x2c]) + start_of_key_area = table_space_offset + table_space_len + 0x38 # 0x38 = header + entries + + # b-tree structure is header (0x20 bytes) -> ToC -> keys -> free space -> values -> btree_info (0x28 bytes) + end_of_value_area = block_size - 0x28 + + tree_data = tree[0x38:] + for m in range(len(fs_oids)): + data_start = m * 4 + key_offset = uint_to_int(tree_data[data_start:data_start + 2]) # key offset is from the start of the key area downwards + data_offset = uint_to_int(tree_data[data_start + 2:data_start + 4]) # data offset is from the end of the data area upwards + + # get to key area + key_start = key_offset + start_of_key_area + key_oid = uint_to_int(tree[key_start:key_start + 0x8]) + + if key_oid not in fs_oids: + print(f"Found key_oid {key_oid} in omap but not present in fs map. Skipping this volume") + + else: + val_end = end_of_value_area - data_offset + data_paddr = uint_to_int(tree[val_end + 0x8:val_end + 0x10]) + volume_addresses.append(data_paddr) + + return volume_addresses + + +def get_volumes(fp, block_size, apfs_start, tree, fs_oids): + volume_addresses = parse_tree(tree, fs_oids, block_size) + volumes_dict = dict() + for v in volume_addresses: + fp.seek(apfs_start + block_size * v) + block_start = fp.read(block_size) + obj_type, magic, uuid, encryption, name = parse_apsb_block(block_start) + if obj_type == 13 and magic == b'APSB': + volumes_dict[uuid] = {'start':v, 'name':name} + print() + print("[+] The following volumes are present:") + for u in volumes_dict: + print(f"{u.hex()} ({volumes_dict[u]['name'].decode()}) at {hex(volumes_dict[u]['start'] * block_size + apfs_start)}") + + return volumes_dict + + +def decrypt_volume_keybag(fp, volume_keybag_addr, block_size, apfs_struct_start, volume_uuid): + volume_keybag_addr = volume_keybag_addr[:4].hex().zfill(8) + volume_keybag_addr = bytearray.fromhex(volume_keybag_addr)[::-1] + volume_keybag_addr = int(volume_keybag_addr.hex(),16) + + offset = block_size * volume_keybag_addr + apfs_struct_start + fp.seek(offset) + encrypted_keybag = fp.read(block_size) + pt = AES_decrypt(encrypted_keybag, volume_keybag_addr, block_size, volume_uuid) + + return pt + + +def get_apfs_containers(fp): + partition_dict = parse_partition_table(fp) + apfs_containers = [] + for d in partition_dict: + if hex_to_guid(partition_dict[d]['partition_type']) == HEX_APFS_CONTAINER_GUID: + apfs_containers.append(partition_dict[d]['start']) + + return apfs_containers + + +def get_tree(fp, omap_oid, apfs_struct_start, block_size): + omap_offset = get_offset_from_oid(omap_oid, apfs_struct_start, block_size) + fp.seek(omap_offset + 0x30) # location for tree_oid + tree_oid = fp.read(0x10) + tree_oid = uint_to_int(tree_oid) + tree_offset = get_offset_from_oid(tree_oid, apfs_struct_start, block_size) + + fp.seek(tree_offset) + tree = fp.read(0x1000) + + return tree + + +def get_container_keybag(fp, apfs_struct_start, block_size, keylocker_paddr): + # calculate offset to read from + offs = block_size * keylocker_paddr + apfs_struct_start + fp.seek(offs) + data = fp.read(block_size) + + return data + + +def find_valid_csb(fp, block_size, xp_desc_base, xp_desc_blocks, apfs_start): + max_xid = 0 + max_xid_paddr = 0 + + for paddr in range(xp_desc_base, xp_desc_base + xp_desc_blocks): + offs = block_size * paddr + apfs_start + fp.seek(offs + 0x10) + csb_xid = uint_to_int(fp.read(0x8)) + if csb_xid >= max_xid: + max_xid = csb_xid + max_xid_paddr = paddr + + print(f"[+] Found valid csb with xid {max_xid} at {hex(max_xid_paddr)}") + return max_xid_paddr + + +def main(): + + p = argparse.ArgumentParser() + p.add_argument('filename') + p.add_argument('-o', '--offset', help='[OPTIONAL] offset for APFS volume - may be necessary if partition table is not present') + args = p.parse_args() + + filename = args.filename + with open(filename, 'rb') as fp: + + if args.offset: + apfs_offset = int(args.offset) + apfs_containers = [apfs_offset] + + else: + apfs_containers = get_apfs_containers(fp) + + if apfs_containers == []: + print("[!] APFS volume GUID not found, exiting.") + exit() + + for apfs_struct_start in apfs_containers: + print(f"[+] APFS container starts at {hex(apfs_struct_start)}") + fp.seek(apfs_struct_start) + csb = fp.read(0x568) + + # read the first csb for initial info - then use this to iterate through all csbs and find the most recent one + block_size, uuid, keylocker_paddr, omap_oid, fs_oids, xp_desc_base, xp_desc_blocks = parse_csb(csb) + valid_csb_paddr = find_valid_csb(fp, block_size, xp_desc_base, xp_desc_blocks, apfs_struct_start) + + fp.seek(valid_csb_paddr * block_size + apfs_struct_start) + valid_csb = fp.read(block_size) + block_size, uuid, keylocker_paddr, omap_oid, fs_oids, xp_desc_base, xp_desc_blocks = parse_csb(valid_csb) + + encrypted_keybag = get_container_keybag(fp, apfs_struct_start, block_size, keylocker_paddr) + # Unwrap container keybag using AES-XTS with container UUID as key + starting_pt = AES_decrypt(encrypted_keybag, keylocker_paddr, block_size, uuid) + + # find all volumes to iterate through + tree = get_tree(fp, omap_oid, apfs_struct_start, block_size) + + volumes_dict = get_volumes(fp, block_size, apfs_struct_start, tree, fs_oids) + + hash_set = set() + for volume_uuid in volumes_dict: + + # find entry in container's keybag matching volume UUID and has KB_TAG_VOLUME_UNLOCK_RECORDS = 3. Its keydata is location of volume keybag. + volume_keybag_addr = parse_keybag_entry(volume_uuid, starting_pt) + + # continue if encrypted keybag not found + if not volume_keybag_addr: + continue + + # unwrap volume keybag using volume uuid AES-XTS + pt = decrypt_volume_keybag(fp, volume_keybag_addr, block_size, apfs_struct_start, volume_uuid) + + # parse TLV for 80 first + index_iterator = findall(TAG_DICT['unk_80']['tag'], pt) + for starting_index in index_iterator: + starting_index, hash_set = TLV_iterate(starting_index, pt, hash_set, volume_uuid) + + print() + print("[+] All hashes found.") + + return + +if __name__ == "__main__": + main() From a8e0c8b213aea5211aa459d90af6922f40831428 Mon Sep 17 00:00:00 2001 From: holly-o <128481388+holly-o@users.noreply.github.com> Date: Fri, 21 Feb 2025 16:06:29 +0000 Subject: [PATCH 23/57] Update apfs2hashcat.py Remove hex version of known UUIDs --- tools/apfs2hashcat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/apfs2hashcat.py b/tools/apfs2hashcat.py index d310dd43e..a0cc6a5da 100644 --- a/tools/apfs2hashcat.py +++ b/tools/apfs2hashcat.py @@ -7,7 +7,6 @@ import argparse from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes -# KNOWN_RECOVERY_HASHES = ['ebc6c064000011aaaa1100306543ecac', 'ec1c2ad9b6184ed6bd8d50f361c27507'] KNOWN_RECOVERY_HASHES = ['64C0C6EB-0000-AA11-AA11-00306543ECAC', 'D92A1CEC-18B6-D64E-BD8D-50F361C27507'] TAG_DICT = {'unk_80' : {'tag' : b'\x80', 'expected_len' : 1}, 'uuid' : {'tag' : b'\x81', 'expected_len' : 0x10}, @@ -15,7 +14,6 @@ TAG_DICT = {'unk_80' : {'tag' : b'\x80', 'expected_len' : 1}, 'wrapped_kek' : {'tag' : b'\x83', 'expected_len' : 0x28}, 'iterations' : {'tag' : b'\x84'}, 'salt' : {'tag' : b'\x85', 'expected_len' : 0x10}} -# HEX_APFS_CONTAINER_GUID = 'ef57347c0000aa11aa1100306543ecac' HEX_APFS_CONTAINER_GUID = '7C3457EF-0000-11AA-AA11-00306543ECAC' AES_XTS_SECTOR_SIZE = 512 EFI_PARTITION_HEADER = b'EFI PART' From 2c43290684e6315a4d0e671d855a7365cdbf66b7 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Fri, 30 May 2025 08:08:22 +0200 Subject: [PATCH 24/57] porting module 32800 to v7 --- src/modules/module_32800.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/modules/module_32800.c b/src/modules/module_32800.c index 0d2469591..67e0d3509 100644 --- a/src/modules/module_32800.c +++ b/src/modules/module_32800.c @@ -205,6 +205,8 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_benchmark_mask = MODULE_DEFAULT; module_ctx->module_benchmark_charset = MODULE_DEFAULT; module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_bridge_name = MODULE_DEFAULT; + module_ctx->module_bridge_type = MODULE_DEFAULT; module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; module_ctx->module_deprecated_notice = MODULE_DEFAULT; From ad58af988300a6171e8f699bf664d311bef8b434 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Thu, 26 Jun 2025 22:06:39 +0200 Subject: [PATCH 25/57] change KERNEL_FQ to KERNEL_FQ KERNEL_FA statements --- OpenCL/m32800_a0-pure.cl | 4 ++-- OpenCL/m32800_a1-pure.cl | 4 ++-- OpenCL/m32800_a3-pure.cl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/OpenCL/m32800_a0-pure.cl b/OpenCL/m32800_a0-pure.cl index 9df5a7b80..7745b43fe 100644 --- a/OpenCL/m32800_a0-pure.cl +++ b/OpenCL/m32800_a0-pure.cl @@ -38,7 +38,7 @@ typedef struct md5_double_salt } md5_double_salt_t; -KERNEL_FQ void m32800_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) { /** * modifier @@ -180,7 +180,7 @@ KERNEL_FQ void m32800_mxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) } } -KERNEL_FQ void m32800_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_sxx (KERN_ATTR_RULES_ESALT (md5_double_salt_t)) { /** * modifier diff --git a/OpenCL/m32800_a1-pure.cl b/OpenCL/m32800_a1-pure.cl index 756df73ab..49f753e91 100644 --- a/OpenCL/m32800_a1-pure.cl +++ b/OpenCL/m32800_a1-pure.cl @@ -36,7 +36,7 @@ typedef struct md5_double_salt } md5_double_salt_t; -KERNEL_FQ void m32800_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) { /** * modifier @@ -176,7 +176,7 @@ KERNEL_FQ void m32800_mxx (KERN_ATTR_ESALT (md5_double_salt_t)) } } -KERNEL_FQ void m32800_sxx (KERN_ATTR_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_sxx (KERN_ATTR_ESALT (md5_double_salt_t)) { /** * modifier diff --git a/OpenCL/m32800_a3-pure.cl b/OpenCL/m32800_a3-pure.cl index 531785dde..07a51c51b 100644 --- a/OpenCL/m32800_a3-pure.cl +++ b/OpenCL/m32800_a3-pure.cl @@ -36,7 +36,7 @@ typedef struct md5_double_salt } md5_double_salt_t; -KERNEL_FQ void m32800_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) { /** * modifier @@ -189,7 +189,7 @@ KERNEL_FQ void m32800_mxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) } } -KERNEL_FQ void m32800_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) +KERNEL_FQ KERNEL_FA void m32800_sxx (KERN_ATTR_VECTOR_ESALT (md5_double_salt_t)) { /** * modifier From 02a439ce02c722dcb798e2b2869a5d94606f21e5 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 9 Jul 2025 15:54:03 +0200 Subject: [PATCH 26/57] Fix compile error on MSYS2 console --- src/terminal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/terminal.c b/src/terminal.c index 0b6e00f41..c36cf4c20 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -17,6 +17,13 @@ #include "timer.h" #include "terminal.h" +#if defined (_POSIX) +#include +#if !defined (__APPLE__) +#include +#endif +#endif + static const size_t MAXIMUM_EXAMPLE_HASH_LENGTH = 200; static const size_t TERMINAL_LINE_LENGTH = 79; @@ -1253,7 +1260,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) printf ("\"SystemInfo\": { "); } - #if defined (_WIN) || defined (__CYGWIN__) || defined (__MSYS__) + #if defined (_WIN) // Get Windows system information SYSTEM_INFO sysinfo; OSVERSIONINFO osvi; @@ -1311,6 +1318,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) printf ("\"Model\": \"%s\" } ", "N/A"); printf ("}, "); } + #else struct utsname utsbuf; @@ -1320,7 +1328,7 @@ void backend_info (hashcat_ctx_t *hashcat_ctx) char *hw_model_buf = NULL; - #if !defined (__linux__) + #if !defined (__linux__) && !defined (__CYGWIN__) && !defined (__MSYS__) size_t hw_model_len = 0; From 3d28985f6bda2daf52dafd8fdca3b71be104d31d Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 17:41:14 +0200 Subject: [PATCH 27/57] move includes from terminal.c to terminal.h, hide build warnings on unrar when using clang --- include/terminal.h | 11 ++++++----- src/Makefile | 2 ++ src/terminal.c | 7 ------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/include/terminal.h b/include/terminal.h index 1946e0c6e..083bb49a8 100644 --- a/include/terminal.h +++ b/include/terminal.h @@ -18,15 +18,16 @@ #include #if defined (__APPLE__) #include +#include #endif // __APPLE__ #endif // _WIN -#if !defined (_WIN) && !defined (__CYGWIN__) && !defined (__MSYS__) +#if defined (_POSIX) #include -#if !defined (__linux__) -#include -#endif // ! __linux__ -#endif // ! _WIN && | __CYGWIN__ && ! __MSYS__ +#if !defined (__APPLE__) +#include +#endif // ! __APPLE__ +#endif // _POSIX void welcome_screen (hashcat_ctx_t *hashcat_ctx, const char *version_tag); void goodbye_screen (hashcat_ctx_t *hashcat_ctx, const time_t proc_start, const time_t proc_stop); diff --git a/src/Makefile b/src/Makefile index 2cfbf9406..cf85746aa 100644 --- a/src/Makefile +++ b/src/Makefile @@ -236,6 +236,8 @@ ifneq ($(CC),clang) CFLAGS_UNRAR += -Wno-class-memaccess CFLAGS_UNRAR += -Wno-misleading-indentation CFLAGS_UNRAR += -Wno-format-overflow +else +CFLAGS_UNRAR += -Wno-nontrivial-memcall endif CFLAGS_UNRAR += -Wno-missing-braces CFLAGS_UNRAR += -Wno-unused-variable diff --git a/src/terminal.c b/src/terminal.c index c36cf4c20..8d5562b15 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -17,13 +17,6 @@ #include "timer.h" #include "terminal.h" -#if defined (_POSIX) -#include -#if !defined (__APPLE__) -#include -#endif -#endif - static const size_t MAXIMUM_EXAMPLE_HASH_LENGTH = 200; static const size_t TERMINAL_LINE_LENGTH = 79; From 0f51dc6d4b7c814aa107974589c3ba6a4b9fd69d Mon Sep 17 00:00:00 2001 From: oblivionsage Date: Wed, 9 Jul 2025 20:08:01 +0200 Subject: [PATCH 28/57] Fix XZ file seek operation in hc_fseek() - Implement missing XZ file seeking functionality - Support SEEK_SET with offset 0 (rewind operation) - Return error for unsupported arbitrary seek operations - Follows existing gfp/ufp implementation pattern - Resolves TODO comment on line 582 in src/filehandling.c - Code complies with all hashcat style requirements --- src/filehandling.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/filehandling.c b/src/filehandling.c index 63fc4f908..92af1f1e7 100644 --- a/src/filehandling.c +++ b/src/filehandling.c @@ -579,7 +579,18 @@ int hc_fseek (HCFILE *fp, off_t offset, int whence) } else if (fp->xfp) { - /* TODO */ + /* XZ files are compressed streams, seeking is limited */ + if (offset == 0 && whence == SEEK_SET) + { + /* Rewind to beginning */ + hc_rewind(fp); + r = 0; + } + else + { + /* Arbitrary seeking not supported for compressed XZ files */ + r = -1; + } } return r; From a590ce8351dd158e2aa6c392ebb0edd97f6040c8 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 22:19:02 +0200 Subject: [PATCH 29/57] fix build errors with 33800 --- OpenCL/m33800-pure.cl | 257 ++++++++++++++++++++++++++++++++---------- 1 file changed, 199 insertions(+), 58 deletions(-) diff --git a/OpenCL/m33800-pure.cl b/OpenCL/m33800-pure.cl index 646a68c7b..7746f7f01 100644 --- a/OpenCL/m33800-pure.cl +++ b/OpenCL/m33800-pure.cl @@ -1155,64 +1155,205 @@ KERNEL_FQ KERNEL_FA void m33800_init2 (KERN_ATTR_TMPS (bcrypt_tmp_t)) KERNEL_FQ KERNEL_FA void m33800_loop2 (KERN_ATTR_TMPS (bcrypt_tmp_t)) { - // the second loop is the same as the first one, only different "password" in init2 -#if defined IS_CUDA || defined IS_HIP - m33800_loop( - pws, - g_rules_buf, - combs_buf, - g_bfs_buf, - tmps, - hooks, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - plains_buf, - digests_buf, - hashes_shown, - salt_bufs, - esalt_bufs, - d_return_buf, - d_extra0_buf, - d_extra1_buf, - d_extra2_buf, - d_extra3_buf, - kernel_param - ); -#else - m33800_loop( - pws, - rules_buf, - combs_buf, - bfs_buf, - tmps, - hooks, - bitmaps_buf_s1_a, - bitmaps_buf_s1_b, - bitmaps_buf_s1_c, - bitmaps_buf_s1_d, - bitmaps_buf_s2_a, - bitmaps_buf_s2_b, - bitmaps_buf_s2_c, - bitmaps_buf_s2_d, - plains_buf, - digests_buf, - hashes_shown, - salt_bufs, - esalt_bufs, - d_return_buf, - d_extra0_buf, - d_extra1_buf, - d_extra2_buf, - d_extra3_buf, - kernel_param - ); -#endif + /** + * base + */ + + const u64 gid = get_global_id (0); + const u64 lid = get_local_id (0); + + if (gid >= GID_CNT) return; + + // load + + u32 E[18]; + + for (u32 i = 0; i < 18; i++) + { + E[i] = tmps[gid].E[i]; + } + + u32 P[18]; + + for (u32 i = 0; i < 18; i++) + { + P[i] = tmps[gid].P[i]; + } + + #ifdef DYNAMIC_LOCAL + // from host + #else + LOCAL_VK u32 S0_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S1_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S2_all[FIXED_LOCAL_SIZE][256]; + LOCAL_VK u32 S3_all[FIXED_LOCAL_SIZE][256]; + #endif + + #ifdef BCRYPT_AVOID_BANK_CONFLICTS + LOCAL_AS u32 *S0 = S + (FIXED_LOCAL_SIZE * 256 * 0); + LOCAL_AS u32 *S1 = S + (FIXED_LOCAL_SIZE * 256 * 1); + LOCAL_AS u32 *S2 = S + (FIXED_LOCAL_SIZE * 256 * 2); + LOCAL_AS u32 *S3 = S + (FIXED_LOCAL_SIZE * 256 * 3); + #else + LOCAL_AS u32 *S0 = S0_all[lid]; + LOCAL_AS u32 *S1 = S1_all[lid]; + LOCAL_AS u32 *S2 = S2_all[lid]; + LOCAL_AS u32 *S3 = S3_all[lid]; + #endif + + for (u32 i = 0; i < 256; i++) + { + SET_KEY32 (S0, i, tmps[gid].S0[i]); + SET_KEY32 (S1, i, tmps[gid].S1[i]); + SET_KEY32 (S2, i, tmps[gid].S2[i]); + SET_KEY32 (S3, i, tmps[gid].S3[i]); + } + + /** + * salt + */ + + u32 salt_buf[4]; + + salt_buf[0] = salt_bufs[SALT_POS_HOST].salt_buf[0]; + salt_buf[1] = salt_bufs[SALT_POS_HOST].salt_buf[1]; + salt_buf[2] = salt_bufs[SALT_POS_HOST].salt_buf[2]; + salt_buf[3] = salt_bufs[SALT_POS_HOST].salt_buf[3]; + + /** + * main loop + */ + + u32 L0; + u32 R0; + + for (u32 i = 0; i < LOOP_CNT; i++) + { + for (u32 i = 0; i < 18; i++) + { + P[i] ^= E[i]; + } + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + + P[ 0] ^= salt_buf[0]; + P[ 1] ^= salt_buf[1]; + P[ 2] ^= salt_buf[2]; + P[ 3] ^= salt_buf[3]; + P[ 4] ^= salt_buf[0]; + P[ 5] ^= salt_buf[1]; + P[ 6] ^= salt_buf[2]; + P[ 7] ^= salt_buf[3]; + P[ 8] ^= salt_buf[0]; + P[ 9] ^= salt_buf[1]; + P[10] ^= salt_buf[2]; + P[11] ^= salt_buf[3]; + P[12] ^= salt_buf[0]; + P[13] ^= salt_buf[1]; + P[14] ^= salt_buf[2]; + P[15] ^= salt_buf[3]; + P[16] ^= salt_buf[0]; + P[17] ^= salt_buf[1]; + + L0 = 0; + R0 = 0; + + for (u32 i = 0; i < 9; i++) + { + BF_ENCRYPT (L0, R0); + + P[i * 2 + 0] = L0; + P[i * 2 + 1] = R0; + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S0, i + 0, L0); + SET_KEY32 (S0, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S1, i + 0, L0); + SET_KEY32 (S1, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S2, i + 0, L0); + SET_KEY32 (S2, i + 1, R0); + } + + for (u32 i = 0; i < 256; i += 2) + { + BF_ENCRYPT (L0, R0); + + SET_KEY32 (S3, i + 0, L0); + SET_KEY32 (S3, i + 1, R0); + } + } + + // store + + for (u32 i = 0; i < 18; i++) + { + tmps[gid].P[i] = P[i]; + } + + for (u32 i = 0; i < 256; i++) + { + tmps[gid].S0[i] = GET_KEY32 (S0, i); + tmps[gid].S1[i] = GET_KEY32 (S1, i); + tmps[gid].S2[i] = GET_KEY32 (S2, i); + tmps[gid].S3[i] = GET_KEY32 (S3, i); + } } KERNEL_FQ KERNEL_FA void m33800_comp (KERN_ATTR_TMPS (bcrypt_tmp_t)) From 3d6ebd00b7a240496747814fe051c7f9f22ac4d2 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 22:34:05 +0200 Subject: [PATCH 30/57] fix build errors with 32500 on Metal --- OpenCL/m32500-pure.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OpenCL/m32500-pure.cl b/OpenCL/m32500-pure.cl index 9a0a5a08d..338fd3725 100644 --- a/OpenCL/m32500-pure.cl +++ b/OpenCL/m32500-pure.cl @@ -107,7 +107,7 @@ DECLSPEC u32 base64_encode_three_bytes_better (u32 in) return out; } -DECLSPEC void base64_encode_sha256 (u32 *out, const u32 *in) +DECLSPEC void base64_encode_sha256 (PRIVATE_AS u32 *out, PRIVATE_AS const u32 *in) { out[0] = base64_encode_three_bytes_better( (in[0] >> 8)); out[1] = base64_encode_three_bytes_better((in[0] << 16) | (in[1] >> 16)); From 3ed45d4e83ae5d9c8006043e5ed8d35f81fd766d Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 22:37:59 +0200 Subject: [PATCH 31/57] fix build errors with 32700 on Metal --- OpenCL/m32700-pure.cl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/OpenCL/m32700-pure.cl b/OpenCL/m32700-pure.cl index 1779c1be6..f1afe3e75 100644 --- a/OpenCL/m32700-pure.cl +++ b/OpenCL/m32700-pure.cl @@ -41,7 +41,7 @@ CONSTANT_VK u32 newdes_rotor[256] = 0x3a, 0x37, 0x03, 0xf4, 0x61, 0xc5, 0xee, 0xe3, 0x76, 0x31, 0x4f, 0xe6, 0xdf, 0xa5, 0x99, 0x3b, }; -DECLSPEC void new_des (u32 *block, u32 *newdes_key) +DECLSPEC void new_des (PRIVATE_AS u32 *block, PRIVATE_AS u32 *newdes_key) { #define B0 (*(block+0)) #define B1 (*(block+1)) @@ -71,7 +71,7 @@ DECLSPEC void new_des (u32 *block, u32 *newdes_key) B7 = B7 ^ newdes_rotor[B3 ^ *(newdes_key++)]; } -DECLSPEC void key_expansion (const u8 *sha1sum, u32 *result) +DECLSPEC void key_expansion (PRIVATE_AS const u8 *sha1sum, PRIVATE_AS u32 *result) { for (int count = 0; count < 15; count++) { @@ -143,7 +143,7 @@ KERNEL_FQ KERNEL_FA void m32700_init (KERN_ATTR_TMPS (sha1_tmp_t)) // Crate a NewDES key u32 newdes_key32[60]; - key_expansion ((const u8 *) ctx.h, newdes_key32); + key_expansion ((PRIVATE_AS const u8 *) ctx.h, newdes_key32); for (int i = 0; i < 60; i++) { @@ -182,7 +182,7 @@ KERNEL_FQ KERNEL_FA void m32700_loop (KERN_ATTR_TMPS (sha1_tmp_t)) } // Run 1000 iterations of NewDES on the derived salt - for (int i = 0; i < LOOP_CNT; i++) + for (u32 i = 0; i < LOOP_CNT; i++) { new_des (salt32, newdes_key32); } From 5a5824ea59abd86ef6595e05e0ee9fb14fd5827c Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 23:19:49 +0200 Subject: [PATCH 32/57] Introduced the 'edge' test type in test.pl along with its corresponding tool, test_edge.sh Hashcat is evolving, both in its core and in the supported algorithms. To uncover bugs in the code, I implemented edge case testing to verify the settings defined in the specific algorithm test modules (e.g., m00000.pm), as well as the behavior of the kernels (pure and optimized) in relation to the different attack modes (-a0, -a1, etc.). --- docs/changes.txt | 1 + tools/test.pl | 327 ++++++++++++++++++- tools/test_edge.sh | 761 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1087 insertions(+), 2 deletions(-) create mode 100755 tools/test_edge.sh diff --git a/docs/changes.txt b/docs/changes.txt index f14019f60..5d0c93eba 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -75,6 +75,7 @@ - Added option --total-candidates to provide the total candidate count for an attack insteda of the internal "--keyspace" value - Added option --backend-devices-keepfree to configure X percentage of device memory available to keep free - Added display of password length mininum and maximum in the Kernel.Feature status line +- Added the 'edge' test type to test.pl and the corresponding tool test_edge.sh ## ## Performance diff --git a/tools/test.pl b/tools/test.pl index 2ea8b094d..8f8b5abe3 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -12,6 +12,8 @@ use Data::Types qw (is_count is_whole); use File::Basename; use FindBin; use List::Util 'shuffle'; +use Text::Iconv; +use Digest::MD4 qw (md4_hex); # allows require by filename use lib "$FindBin::Bin/test_modules"; @@ -23,7 +25,7 @@ if (exists $ENV{"IS_OPTIMIZED"} && defined $ENV{"IS_OPTIMIZED"}) $IS_OPTIMIZED = $ENV{"IS_OPTIMIZED"}; } -my $TYPES = [ 'single', 'passthrough', 'potthrough', 'verify' ]; +my $TYPES = [ 'edge', 'single', 'passthrough', 'potthrough', 'verify' ]; my $TYPE = shift @ARGV; my $MODE = shift @ARGV; @@ -45,7 +47,13 @@ my $single_outputs = 8; my $constraints = get_module_constraints (); -if ($TYPE eq 'single') +if ($TYPE eq 'edge') +{ + usage_exit () if scalar @ARGV > 2; + + edge (@ARGV); +} +elsif ($TYPE eq 'single') { single (@ARGV); } @@ -68,6 +76,307 @@ else usage_exit (); } +sub edge_format +{ + my $word_len = shift; + my $salt_len = shift; + my $attack_type = shift; + my $optimized = shift; + + my $hash = ""; + my $word = ""; + my $salt = ""; + + my $cond = 0; + + do + { + $word = random_numeric_string ($word_len) // ""; + $salt = random_numeric_string ($salt_len) // ""; + + $hash = module_generate_hash ($word, $salt); + + $cond = 1; + + if ($MODE == 30901 && length ($hash) != 34) + { + $cond = 0; + } + + } while ($cond != 1); + + if (defined $hash) + { + my $format = "%d,%d,%d,%d,%d,'%s','%s','%s'\n"; + + printf ($format, $MODE, $attack_type, $optimized, $word_len, $salt_len, $word, $salt, $hash); + } +} + +sub edge +{ + my $attack_type = shift // 0; + my $optimized = shift // 0; + + my @attack_types = (0, 1, 3, 6, 7); + + if (not grep $_ == $attack_type, @attack_types) + { + return -1; + } + + if ($optimized != 0 && $optimized != 1) + { + return -1; + } + + my $idx_max = 0; + my $idx = 0; + + my $word_min = ($optimized == 1) ? $constraints->[2]->[0] : $constraints->[0]->[0]; + my $word_max = ($optimized == 1) ? $constraints->[2]->[1] : $constraints->[0]->[1]; + + my $salt_min = ($optimized == 1) ? $constraints->[3]->[0] : $constraints->[1]->[0]; + my $salt_max = ($optimized == 1) ? $constraints->[3]->[1] : $constraints->[1]->[1]; + + my $comb_min = ($optimized == 1) ? $constraints->[4]->[0] : -1; + my $comb_max = ($optimized == 1) ? $constraints->[4]->[1] : -1; + + if ($attack_type != 3) + { + if ($optimized == 1) + { + if ($word_min != $word_max && $word_max > 31) + { + $word_max = 31; + } + } + } + + if ($attack_type != 0) + { + if ($word_min < 2) + { + $word_min = 2; + } + } + + my $word_len = 0; + my $salt_len = 0; + + # word_min, salt_min + # word_min, salt_max + # word_max, salt_min + # word_max, salt_max + + if ($word_min != -1) + { + if ($salt_min != $salt_max) + { + if ($salt_min != -1) # word_min, salt_min + { + $word_len = $word_min; + $salt_len = $salt_min; + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + + if ($salt_max != -1) # word_min, salt_max + { + my $salt_max_tmp = $salt_max; + + if ($optimized == 1) + { + if ($salt_max_tmp > 51) + { + $salt_max_tmp = 51; + } + + if ($comb_max != -1) + { + if (($word_len + $salt_max_tmp) > $comb_max) + { + my $off = $word_len + $salt_max_tmp - $comb_max; + + if ($salt_max_tmp > $off) + { + $salt_max_tmp -= $off; + } + } + } + } + + $word_len = $word_min; + $salt_len = $salt_max_tmp; + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + } + else + { + if ($salt_min != -1) # word_min, salt_min/salt_max (are the same) + { + $word_len = $word_min; + $salt_len = $salt_min; + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + else + { + # no salt + + $word_len = $word_min; + $salt_len = 0; + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + } + } + + if ($word_max != -1) + { + if ($salt_min != $salt_max) + { + my $last_word_len = -1; + my $last_salt_len = -1; + + if ($salt_min != -1) # word_max, salt_min + { + $word_len = $word_max; + $salt_len = $salt_min; + + if ($optimized == 1) + { + my $comb_max_cur = 55; + + if ($comb_max != -1) + { + $comb_max_cur = $comb_max; + } + + if (($word_len + $salt_len) > $comb_max_cur) + { + my $off = $word_len + $salt_len - $comb_max_cur; + + if ($word_len > $off) + { + $word_len -= $off; + } + else + { + print ("ERROR with MODE $MODE, WORD $word_len, SALT $salt_len, MAX $comb_max_cur"); + exit (1); + } + } + } + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + + # save last + $last_word_len = $word_len; + $last_salt_len = $salt_len; + } + + if ($salt_max != -1) # word_max, salt_max + { + $word_len = $word_max; + $salt_len = $salt_max; + + if ($optimized == 1) + { + # limit comb_max to 55 if is not set + my $comb_max_cur = 55; + + if ($comb_max != -1) + { + $comb_max_cur = $comb_max; + } + + # limit salt_max to 51 + my $salt_max_tmp = $salt_len; + + if ($salt_max_tmp > 51) + { + $salt_max_tmp = 51; + } + + if (($word_len + $salt_max_tmp) > $comb_max_cur) + { + my $off = $word_len + $salt_max_tmp - $comb_max_cur; + + if ($last_word_len == $word_len) + { + $word_len -= $off; + if ($word_len < $word_min) + { + $off = $word_min - $word_len; + $word_len = $word_min; + $salt_max_tmp -= $off; + } + } + else + { + $salt_max_tmp -= $off; + if ($salt_max_tmp < $salt_min) + { + $off = $salt_min - $salt_max_tmp; + $salt_max_tmp = $salt_min; + $word_len -= $off; + } + } + } + + $salt_len = $salt_max_tmp; + } + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + + # reset last + $last_word_len = -1; + $last_salt_len = -1; + } + } + else + { + if ($salt_min != -1) # word_max, salt_min/salt_max (are the same) + { + $word_len = $word_max; + $salt_len = $salt_max; + + if ($optimized == 1) + { + if ($comb_max != -1) + { + if (($word_len + $salt_len) > $comb_max) + { + my $off = $word_len + $salt_len - $comb_max; + + if ($word_len > $off) + { + $word_len -= $off; + + if ($word_len < $word_min) + { + $word_len = $word_min; + } + } + } + } + } + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + else + { + $word_len = $word_max; + $salt_len = 0; + + edge_format ($word_len, $salt_len, $attack_type, $optimized); + } + } + } +} + sub single { my $len = shift; @@ -159,6 +468,13 @@ sub single { for my $salt (sort { length $a <=> length $b } keys %{$db_prev->{$word}}) { + if ($MODE == 31600 || $MODE == 31500) + { + my $converter = Text::Iconv->new('utf8', 'UTF-16LE'); + + $word = md4_hex ($converter->convert ($word)); + } + my $hash = module_generate_hash ($word, $salt); # possible if the requested length is not supported by algorithm @@ -592,11 +908,18 @@ sub usage_exit print "\n" . "Usage:\n" + . " $f edge [attack-type] [optimized]\n" . " $f single [length]\n" . " $f passthrough \n" . " $f potthrough \n" . " $f verify \n" . "\n" + . "Edge:\n" + . " Generates edge case for selected .\n" + . " Will be generated a list of value separated by comma to stdout:\n" + . " ,,,,,,,\n" + . " The output can be processed by the test_edge.sh script.\n" + . "\n" . "Single:\n" . " Generates up to 32 hashes of random numbers of incrementing length, or up to 32\n" . " hashes of random numbers of exact [length]. Writes shell commands to stdout that\n" diff --git a/tools/test_edge.sh b/tools/test_edge.sh new file mode 100755 index 000000000..a6960e00e --- /dev/null +++ b/tools/test_edge.sh @@ -0,0 +1,761 @@ +#!/usr/bin/env bash + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +function usage() +{ + echo "> Usage: $0 []" + echo "" + echo ":" + echo "" + echo "-m / --hash-type : set Hash Type (default: all)" + echo " --hash-type-min : set min hash-type (default: 0)" + echo " --hash-type-max : set max hash-type (default: 99999)" + echo "" + echo "-a / --attack-type : set Attack Type (default: all. supported: 0 (Straight), 1 (Combination), 3 (Brute-force), 6 (Hybrid Wordlist + Mask), 7 (Hybrid Mask + Wordlist))" + echo "-K / --kernel-type : set Kernel-Type (default: all. supported: 0 (Pure), 1 (Optimized))" + echo "-t / --target-type : set Target Type (default: all. supported: single, multi)" + echo "" + echo "-V / --vector-width : set Vector Width (default: all. supported: 1, 2, 4, 8, 16)" + echo " --vector-width-min : set min vector-width (default: 1)" + echo " --vector-width-max : set max vector-width (default: 16)" + echo "" + echo "-d : set Device ID" + echo "-D : set Device-Type ID" + echo "" + echo "-r : set max runtime, in seconds, for each kernel execution (default: 270)" + echo " --metal-compiler-runtime : set max runtime, in seconds, for each kernel build using Apple Metal (default: 120)" + echo "" + echo " --metal-backend : exclude all hash types that do not work with Metal, exclude vector-width > 4, set --metal-compiler-runtime argument" + echo "" + echo "-f / --force : run hashcat using --force" + echo "-v / --verbose : show debug messages" + echo "-h / --help : show this help, then exit" + echo "" + + exit 1 +} + +function is_in_array() +{ + for e in "${@:2}"; do + [ "$e" = "$1" ] && return 0 + done + + return 1 +} + +export LC_CTYPE=C +export LANG=C + +OUTD="test_edge_$(date +%s)" + +HASH_TYPE=all +HASH_TYPE_MIN=0 +HASH_TYPE_MAX=99999 +ATTACK_TYPE=all +ATTACK_TYPES="0 1 3 6 7" +KERNEL_TYPE=all +TARGET_TYPE=all +VECTOR_WIDTH=all +VECTOR_WIDTHS="1 2 4 8 16" +VECTOR_WIDTH_MIN=1 +VECTOR_WIDTH_MAX=16 + +FORCE=0 +VERBOSE=0 +RUNTIME_MAX=270 # 4.5 min +METAL_BACKEND=0 +METAL_COMPILER_RUNTIME=120 + +OPTS="--quiet --potfile-disable --hwmon-disable --self-test-disable --machine-readable --logfile-disable" + +SKIP_HASH_TYPES="2000 2500 2501 16800 16801 99999 32000" +SKIP_HASH_TYPES_METAL="1800 10700 11700 11750 11760 11800 11850 11860 19200 21600" +SKIP_METAL_SCRYPT="8900 15700 9300 22700 27700 28200 29800" + +SKIP_OUT_MATCH_HASH_TYPES="14000 14100 18100 22000" +SKIP_SAME_SALT_HASH_TYPES="6600 7100 7200 8200 13200 13400 15300 15310 15900 15910 16900 18300 18900 20200 20300 20400 27000 27100 29700 29930 29940" +#SKIP_SAME_SALT_HASH_TYPES="400 3200 5800 6400 6500 6600 6700 7100 7200 7401 7900 8200 9100 9200 9400 10500 10901 12001 12200 12300 12400 12500 12700 12800 12900 13000 13200 13400 13600 14700 14800 15100 15200 15300 15310 15400 15600 15900 15910 16200 16300 16700 16900 18300 18400 18800 18900 19000 19100 19600 19700 19800 19900 20011 20012 20013 20200 20300 20400 21501 22100 22400 22600 23100 23300 23500 23600 23700 23900 24100 24200 24410 24420 24500 25300 25400 25500 25600 25800 26100 26500 26600 27000 27100 27400 27500 27600 28100 28400 28600 28800 28900 29600 29700 29910 29920 29930 29940 30600 31200 31900" + +while [ $# -gt 0 ]; do + case $1 in + --metal-backend) METAL_BACKEND=1 ;; + --metal-compiler-runtime) METAL_COMPILER_RUNTIME=${2}; shift ;; + -r) RUNTIME_MAX=${2}; shift ;; + -h|--help) usage; break ;; + -v|--verbose) VERBOSE=1 ;; + -f|--force) FORCE=1 ;; + -V|--vector-width) + if [ "${2}" != "all" ]; then + if [[ ${2} =~ ^-?[0-9]+$ ]]; then + if [ "${2}" == "1" ]; then + VECTOR_WIDTH=1 + elif [ "${2}" == "2" ]; then + VECTOR_WIDTH=2 + elif [ "${2}" == "4" ]; then + VECTOR_WIDTH=4 + elif [ "${2}" == "8" ]; then + VECTOR_WIDTH=8 + elif [ "${2}" == "16" ]; then + VECTOR_WIDTH=16 + else + usage + fi + else + usage + fi + fi + shift + ;; + --vector-width-min) VECTOR_WIDTH_MIN=${2}; shift ;; + --vector-width-max) VECTOR_WIDTH_MAX=${2}; shift ;; + -t|--target-type) + if [ "${2}" != "all" ]; then + if [ "${2}" == "single" ]; then + TARGET_TYPE=0 + elif [ "${2}" == "multi" ]; then + TARGET_TYPE=1 + else + usage + fi + fi + shift + ;; + -m|--hash-type) + if [ "${2}" != "all" ]; then + if [[ ${2} =~ ^-?[0-9]+$ ]]; then + HASH_TYPE=${2} + else + usage + fi + fi + shift + ;; + --hash-type-min) HASH_TYPE_MIN=${2}; shift ;; + --hash-type-max) HASH_TYPE_MAX=${2}; shift ;; + -a|--attack-type) + if [ "${2}" != "all" ]; then + if [[ ${2} =~ ^-?[0-9]+$ ]]; then + if [ "${2}" == "0" ]; then + ATTACK_TYPE=0 + elif [ "${2}" == "1" ]; then + ATTACK_TYPE=1 + elif [ "${2}" == "3" ]; then + ATTACK_TYPE=3 + elif [ "${2}" == "6" ]; then + ATTACK_TYPE=6 + elif [ "${OPTARG}" == "7" ]; then + ATTACK_TYPE=7 + else + usage + fi + else + usage + fi + fi + shift + ;; + -K|--kernel-type) + if [ "${2}" != "all" ]; then + if [[ ${2} =~ ^-?[0-9]+$ ]]; then + if [ "${2}" == "0" ]; then + KERNEL_TYPE=0 #pure + elif [ "${2}" == "1" ]; then + KERNEL_TYPE=1 #optimized + else + usage + fi + else + usage + fi + fi + shift + ;; + -d) OPTS="${OPTS} -d ${2}"; shift ;; + -D) + if [ "${2}" == "1" ]; then + OPTS="${OPTS} -D 1" + DEVICE_TYPE="Cpu" + elif [ "${2}" == "2" ]; then + OPTS="${OPTS} -D 2" + DEVICE_TYPE="Gpu" + else + OPTS="${OPTS} -D ${2}" + DEVICE_TYPE="Cpu + Gpu" + fi + shift + ;; + *) echo "Unknown parameter passed: $1"; usage; break ;; + esac + shift +done + +OPTS="${OPTS} --runtime ${RUNTIME_MAX}" + +if [ ${FORCE} -eq 1 ]; then + OPTS="${OPTS} --force" +fi + +if [ $METAL_BACKEND -eq 1 ]; then + VECTOR_WIDTHS="1 2 4" + + if [ $VECTOR_WIDTH_MAX -gt 4 ]; then + VECTOR_WIDTH_MAX=4 + fi + + if [ $METAL_COMPILER_RUNTIME -ne 120 ]; then + OPTS="${OPTS} --metal-compiler-runtime ${METAL_COMPILER_RUNTIME}" + fi +fi + +mkdir -p ${OUTD} &> /dev/null + +for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | awk '{print $1+=0}'); do + + if [ $HASH_TYPE != "all" ]; then + if [ $HASH_TYPE -ne $hash_type ]; then continue; fi + else + if [ $hash_type -lt ${HASH_TYPE_MIN} ]; then continue; fi + if [ $hash_type -gt ${HASH_TYPE_MAX} ]; then continue; fi + fi + + if is_in_array "${hash_type}" ${SKIP_HASH_TYPES}; then + echo "[ ${OUTD} ] > Skip processing Hash-Type ${hash_type} (common)" | tee -a ${OUTD}/test_edge.details.log + continue + fi + + if [ $METAL_BACKEND -eq 1 ]; then + if is_in_array "${hash_type}" ${SKIP_HASH_TYPES_METAL}; then + echo "[ ${OUTD} ] > Skip processing Hash-Type ${hash_type} (due to metal kernel build failed)" | tee -a ${OUTD}/test_edge.details.log + continue + fi + + if is_in_array "${hash_type}" ${SKIP_METAL_SCRYPT}; then + echo "[ ${OUTD} ] > Skip processing Hash-Type ${hash_type} (due to metal scrypt is broken)" | tee -a ${OUTD}/test_edge.details.log + continue + fi + fi + + build_failed_err=0 + test_vectors_err=0 + + for attack_type in ${ATTACK_TYPES}; do + + if [ $ATTACK_TYPE != "all" ] && [ $ATTACK_TYPE -ne $attack_type ]; then continue; fi + + kernel_types=$(./hashcat -m ${hash_type} -HH | grep 'Kernel.Type(s' | cut -d: -f2 | xargs | sed -e 's/,//g') + + for kernel_type in ${kernel_types}; do + + kernel_type_pad=$(printf "%9s\n" ${kernel_type}) + + CUR_OPTS="${OPTS}" + + optimized=0 + if [ "${kernel_type}" == "optimized" ]; then + optimized=1 + CUR_OPTS="${CUR_OPTS} -O" + fi + + if [ $KERNEL_TYPE != "all" ] && [ $KERNEL_TYPE -ne $optimized ]; then continue; fi + + tmp_salt=$(./hashcat -m ${hash_type} -HH | grep Salt\\.Type) + have_salt=$? + + if [ $have_salt -eq 0 ]; then + salt_type=$(echo $tmp_salt | awk '{print $2}') + + if [ $salt_type == "Virtual" ]; then + have_salt=1 + fi + fi + + slow_hash=0 + tmp_slow_hash=$(./hashcat -m ${hash_type} -HH | grep Slow\\.Hash | awk '{print $2}') + if [ "${tmp_slow_hash}" == "Yes" ]; then + slow_hash=1 + fi + + pt_hex=0 + tmp_pw_type=$(./hashcat -m ${hash_type} -HH | grep Password\\.Type | awk '{print $2}') + if [ "${tmp_pw_type}" == "HEX" ]; then + pt_hex=1 + fi + + echo "[ ${OUTD} ] # Export tests for Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}" >> ${OUTD}/test_edge.details.log + + edge_out="${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}.out" + + ./tools/test.pl edge ${hash_type} ${attack_type} ${optimized} 2>/dev/null > ${edge_out} + + if [ ${VERBOSE} -eq 1 ]; then + cat ${edge_out} + fi + + if [ $? -eq 0 ]; then + + check_hash=$(cat ${edge_out} | cut -d, -f8- | head -1) + if [ ${#check_hash} -eq 2 ] || [ ${#check_hash} -eq 3 ]; then + echo "[ ${OUTD} ] !> error detected with Hash-Type ${hash_type}: empty test vectors" | tee -a ${OUTD}/test_edge.details.log + break + fi + + for vector_width in ${VECTOR_WIDTHS}; do + + if [ $VECTOR_WIDTH != "all" ]; then + if [ $VECTOR_WIDTH -ne $vector_width ]; then continue; fi + else + if [ ${vector_width} -lt ${VECTOR_WIDTH_MIN} ]; then continue; fi + if [ ${vector_width} -gt ${VECTOR_WIDTH_MAX} ]; then continue; fi + fi + + CUR_OPTS_V="${CUR_OPTS} --backend-vector-width ${vector_width}" + + # single hash + if [ $TARGET_TYPE == all ] || [ $TARGET_TYPE == 0 ]; then + + echo "[ ${OUTD} ] # Processing Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Target-Type single" | tee -a ${OUTD}/test_edge.details.log + + cnt=$(wc -l ${edge_out} | awk '{print $1}') + + for ((i = 1; i <= cnt; i++)); do + word_compare=None + word_len=$(cat ${edge_out} | cut -d, -f4 | head -${i} | tail -1) + salt_len=$(cat ${edge_out} | cut -d, -f5 | head -${i} | tail -1) + word=$(cat ${edge_out} | cut -d, -f6 | head -${i} | tail -1) + salt=$(cat ${edge_out} | cut -d, -f7 | head -${i} | tail -1) + hash=$(cat ${edge_out} | cut -d, -f8- | head -${i} | tail -1) + + x="echo -n '${word}'" + + if [ "${hash_type}" == "20510" ]; then + word_compare="echo -n '${word}'" + x="echo -n '${word}' | cut -b7-" + fi + + if [ ${have_salt} -eq 1 ]; then + salt_len="None" + salt= + else + z="echo -n '${salt}'" + salt=$(eval $z) + fi + + word=$(eval $x) + + if [ ${VERBOSE} -eq 1 ]; then + echo "[ ${OUTD} ] > Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Salt '${salt}', Hash ${hash}" | tee -a ${OUTD}/test_edge.details.log + else + echo "[ ${OUTD} ] > Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Salt '${salt}', Hash ${hash}" >> ${OUTD}/test_edge.details.log + fi + + CMD="" + + if [ "${attack_type}" -eq 0 ]; then + #echo ${word} > test_${hash_type}_${kernel_type}_${attack_type}_${i}.word + + CMD="echo ${word} | ./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash} -a 0" + elif [ "${attack_type}" -eq 1 ]; then + word=$(eval $x) + + if [ "${word_len}" -eq 2 ]; then + word_1=$(echo $word | cut -c -1) + word_2=$(echo $word | cut -c 2-) + elif [ "${word_len}" -gt 2 ]; then + word_1_cnt=$((word_len/2)) + + word_1=$(echo $word | cut -c -${word_1_cnt}) + + ((word_1_cnt++)) + + word_2=$(echo $word | cut -c ${word_1_cnt}-) + fi + + echo ${word_1} > ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}.1.word + echo ${word_2} > ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}.2.word + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash} -a 1 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}.1.word ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}.2.word" + elif [ "${attack_type}" -eq 3 ]; then + + if [ $pt_hex -eq 1 ]; then + word_1="${word%??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ]; then + word_1="${word%?}" + mask_1="?d" + elif [ "${slow_hash}" -eq 1 ]; then + word_1="${word%??}" + mask_1="?d?d" + else + word_1="${word%???}" + mask_1="?d?d?d" + fi + fi + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash} -a 3 ${word_1}${mask_1}" + elif [ "${attack_type}" -eq 6 ]; then + + if [ $pt_hex -eq 1 ]; then + word_1="${word%??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ] || [ "${slow_hash}" -eq 1 ]; then + word_1="${word%?}" + mask_1="?d" + else + word_1="${word%??}" + mask_1="?d?d" + fi + fi + + echo -n ${word_1} > ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}_1.word + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash} -a 6 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}_1.word ${mask_1}" + elif [ "${attack_type}" -eq 7 ]; then + + if [ $pt_hex -eq 1 ]; then + word_1="${word#??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ] || [ "${slow_hash}" -eq 1 ]; then + word_1="${word#?}" + mask_1="?d" + else + word_1="${word#??}" + mask_1="?d?d" + fi + fi + + echo -n ${word_1} > ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}_2.word + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash} -a 7 ${mask_1} ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}_${i}_2.word" + fi + + cmd_out="${OUTD}/cmd_${hash_type}_${kernel_type}_${attack_type}_${i}.single.log" + + eval ${CMD} &> ${cmd_out} + retVal=$? + + #echo "RET: $retVal" + + cat ${cmd_out} >> ${OUTD}/test_edge.details.log + + if [ "${retVal}" -ne 0 ]; then + echo "[ ${OUTD} ] !> error ($retVal) detected with CMD: ${CMD}" | tee -a ${OUTD}/test_edge.details.log + echo "[ ${OUTD} ] !> Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Hash ${hash}" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + cat ${cmd_out} | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + + if [ "${retVal}" -eq 250 ]; then + echo "[ ${OUTD} ] > Skipping current tests due to build error ..." | tee -a ${OUTD}/test_edge.details.log + break + fi + else + if is_in_array "${hash_type}" ${SKIP_OUT_MATCH_HASH_TYPES}; then + echo "[ ${OUTD} ] > Skip output check for Hash-Type ${hash_type} (due to collisions)" >> ${OUTD}/test_edge.details.log + continue + fi + + ./hashcat -m ${hash_type} -HH | grep 'Keep.Guessing.......: Yes' &> /dev/null + if [ $? -eq 0 ]; then + echo "[ ${OUTD} ] > Skip output check for Hash-Type ${hash_type} (due to keep guessing)" >> ${OUTD}/test_edge.details.log + continue + fi + + out=$(grep -v "Unsupported\|STATUS\|^$" ${cmd_out} | sed -e 's/ (user password.*$//g') + + x="echo -n ${hash}" + hash=$(eval $x) + + md5_1=$(echo ${out} | md5sum | cut -d' ' -f1) + + hc_out="${hash}:${word}" + + if [ "${word_compare}" != "None" ]; then + word_tmp=$(eval $word_compare) + hc_out="${hash}:${word_tmp}" + fi + + md5_2=$(echo ${hc_out} | md5sum | cut -d' ' -f1) + + if [ $md5_1 != $md5_2 ]; then + echo "[ ${OUTD} ] !> error detected with CMD: ${CMD}" | tee -a ${OUTD}/test_edge.details.log + echo "[ ${OUTD} ] !> Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Salt '${salt}', Hash ${hash}" | tee -a ${OUTD}/test_edge.details.log + echo "[ ${OUTD} ] !> output don't match" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + echo ${out} | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + echo "! expected output" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + echo ${hc_out} | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + fi + fi + done + fi + + # multi hash + if [ $TARGET_TYPE == all ] || [ $TARGET_TYPE == 1 ]; then + + cnt_max=-1 + tmp_cnt_max=$(./hashcat -m ${hash_type} -HH | grep Hashes\\.Count\\.Max | awk '{print $2}') + if [[ $tmp_cnt_max =~ ^-?[0-9]+$ ]]; then + cnt_max=$tmp_cnt_max + fi + + if [ $hash_type -eq 20510 ]; then + cnt_max=1 + fi + + if [ $cnt_max -eq 1 ]; then + # cannot exec multi-hash because this hash_type allow max 1 hash at time + echo "[ ${OUTD} ] > Skipping Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Target-Type multi (max 1 hash at time allowed)" | tee -a ${OUTD}/test_edge.details.log + cnt=0 + continue + fi + + # check if hash_type cannot crack multiple hashes with the same salt + same_salt=1 + + is_in_array "${hash_type}" ${SKIP_SAME_SALT_HASH_TYPES} + if [ ${?} -eq 1 ]; then + multi_hashes_same_salt_allowed=$(./hashcat -m ${hash_type} -HH | grep Hashes\\.w/\\.Same\\.Salt | awk '{print $2}') + if [ "${multi_hashes_same_salt_allowed}" == "Not" ]; then + same_salt=0 + fi + fi + + cnt=$(wc -l ${edge_out} | awk '{print $1}') + + if [ $cnt -eq 0 ]; then + echo "[ ${OUTD} ] > Skipping Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Target-Type multi (due to no valid test vectors)" | tee -a ${OUTD}/test_edge.details.log + continue + fi + + echo "[ ${OUTD} ] # Processing Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Target-Type multi" | tee -a ${OUTD}/test_edge.details.log + + CMD="" + SALTS_VAL="" + + hash_cnt=0 + + hash_in="${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.hashes" + + for ((i = 1; i <= cnt; i++)); do + + # limit to cnt_max if is set + if [ ${cnt_max} -gt 1 ] && [ ${hash_cnt} -gt ${cnt_max} ]; then continue; fi + + word_compare=None + word_len=$(cat ${edge_out} | cut -d, -f4 | head -${i} | tail -1) + salt_len=$(cat ${edge_out} | cut -d, -f5 | head -${i} | tail -1) + word=$(cat ${edge_out} | cut -d, -f6 | head -${i} | tail -1) + salt=$(cat ${edge_out} | cut -d, -f7 | head -${i} | tail -1) + hash=$(cat ${edge_out} | cut -d, -f8- | head -${i} | tail -1) + + x="echo -n '${word}'" + y="echo -n ${hash}" + + if [ "${hash_type}" == "20510" ]; then + word_compare="echo -n '${word}'" + x="echo -n '${word}' | cut -b7-" + fi + + if [ ${have_salt} -eq 1 ]; then + salt_len="None" + salt= + else + z="echo -n '${salt}'" + salt=$(eval $z) + + # skip hashes with same salt if are not allowed + if [ ${same_salt} -eq 0 ]; then + if is_in_array "${salt_len}:${salt}" ${SALTS_VAL}; then + continue + fi + if [ ${#SALTS_VAL} -eq 0 ]; then + SALTS_VAL="${salt_len}:${salt}" + else + SALTS_VAL="${SALTS_VAL} ${salt_len}:${salt}" + fi + fi + fi + + word=$(eval $x) + hash=$(eval $y) + + echo $hash >> ${hash_in} + + if [ "${word_compare}" != "None" ]; then + w=$(eval $word_compare) + echo $w >> ${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.words_compare + else + echo ${word} >> ${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.words + fi + + if [ "${attack_type}" -eq 0 ]; then + ((hash_cnt++)) + + echo ${word} >> ${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}.1.words + + CMD="cat ${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}.1.words | ./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash_in} -a 0" + elif [ "${attack_type}" -eq 1 ]; then + ((hash_cnt++)) + + if [ "${word_len}" -eq 2 ]; then + word_1=$(echo $word | cut -c -1) + word_2=$(echo $word | cut -c 2-) + elif [ "${word_len}" -gt 2 ]; then + word_1_cnt=$((word_len/2)) + word_1=$(echo $word | cut -c -${word_1_cnt}) + ((word_1_cnt++)) + word_2=$(echo $word | cut -c ${word_1_cnt}-) + fi + + echo ${word_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words + echo ${word_2} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.words + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash_in} -a 1 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.words" + elif [ "${attack_type}" -eq 3 ]; then + ((hash_cnt++)) + + if [ $pt_hex -eq 1 ]; then + word_1="${word%??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ]; then + word_1="${word%?}" + mask_1="?d" + elif [ "${slow_hash}" -eq 1 ]; then + word_1="${word%??}" + mask_1="?d?d" + else + word_1="${word%???}" + mask_1="?d?d?d" + fi + fi + + echo -n ${word_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words.masks + echo ${mask_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words.masks + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash_in} -a 3 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words.masks" + elif [ "${attack_type}" -eq 6 ]; then + ((hash_cnt++)) + + if [ $pt_hex -eq 1 ]; then + word_1="${word%??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ] || [ "${slow_hash}" -eq 1 ]; then + word_1="${word%?}" + mask_1="?d" + else + word_1="${word%??}" + mask_1="?d?d" + fi + fi + + echo ${word_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words + echo ${mask_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.masks + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash_in} -a 6 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.words ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.1.masks" + elif [ "${attack_type}" -eq 7 ]; then + ((hash_cnt++)) + + if [ $pt_hex -eq 1 ]; then + word_1="${word#??}" + mask_1="?b" + else + if [ "${word_len}" -eq 2 ] || [ "${slow_hash}" -eq 1 ]; then + word_1="${word#?}" + mask_1="?d" + else + word_1="${word#??}" + mask_1="?d?d" + fi + fi + + echo ${word_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.words + echo ${mask_1} >> ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.masks + + CMD="./hashcat ${CUR_OPTS_V} -m ${hash_type} ${hash_in} -a 7 ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.masks ${OUTD}/test_${hash_type}_${kernel_type}_${attack_type}.2.words" + fi + done + +# echo "hash_cnt: $hash_cnt" +# cat ${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}.hashes + + if [ $hash_cnt -gt 1 ]; then + cmd_out="${OUTD}/cmd_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.multi.log" + eval ${CMD} &> ${cmd_out} + retVal=$? + + cat ${cmd_out} >> ${OUTD}/test_edge.details.log + + hc_out="${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.hashes.words" + + if [ "${word_compare}" != "None" ]; then + word_in="${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.words_compare" + else + word_in="${OUTD}/edge_${hash_type}_${kernel_type}_${attack_type}_${vector_width}.words" + fi + + paste -d ":" ${hash_in} ${word_in} > ${hc_out} + + if [ "${retVal}" -ne 0 ]; then + echo "[ ${OUTD} ] !> error ($retVal) detected with CMD: ${CMD}" | tee -a ${OUTD}/test_edge.details.log + echo "[ ${OUTD} ] !> Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Words ${word_in}, Hashes ${hash_in}" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + cat ${cmd_out} | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + + if [ "${retVal}" -eq 250 ]; then + echo "[ ${OUTD} ] > Skipping current tests due to build error ..." | tee -a ${OUTD}/test_edge.details.log + break + fi + else + if is_in_array "${hash_type}" ${SKIP_OUT_MATCH_HASH_TYPES}; then + echo "[ ${OUTD} ] > Skip output check for Hash-Type ${hash_type} (due to collisions)" >> ${OUTD}/test_edge.details.log + continue + fi + + ./hashcat -m ${hash_type} -HH | grep 'Keep.Guessing.......: Yes' &> /dev/null + if [ $? -eq 0 ]; then + echo "[ ${OUTD} ] > Skip output check for Hash-Type ${hash_type} (due to keep guessing)" >> ${OUTD}/test_edge.details.log + continue + fi + + out=$(grep -v "Unsupported\|STATUS\|^$" ${cmd_out} | sed -e 's/ (user password.*$//g') + + md5_1=$(echo "${out}" | sort -s | md5sum | cut -d' ' -f1) + md5_2=$(cat ${hc_out} | sort -s | md5sum | cut -d' ' -f1) + + if [ $md5_1 != $md5_2 ]; then + echo "[ ${OUTD} ] !> error detected (output don't match) with CMD: ${CMD}" | tee -a ${OUTD}/test_edge.details.log + echo "[ ${OUTD} ] !> Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Words ${word_in}, Hashes ${hash_in}" | tee -a ${OUTD}/test_edge.details.log + echo "! output" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + echo "${out}" | sort -s | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + echo "! expected output" | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + cat ${hc_out} | sort -s | tee -a ${OUTD}/test_edge.details.log + echo '```' | tee -a ${OUTD}/test_edge.details.log + fi + fi + else + echo "[ ${OUTD} ] > Skipping Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Vector-Width ${vector_width}, Target-Type multi, Hashes ${hash_in} (hashes < 2)" | tee -a ${OUTD}/test_edge.details.log + echo "hash_cnt: ${hash_cnt}" + fi + fi + done + fi + done + done +done From c780d36d4bde24244fff222e86fdbf20f44c2a46 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Wed, 9 Jul 2025 23:31:17 +0200 Subject: [PATCH 33/57] updated credits.txt --- docs/credits.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/credits.txt b/docs/credits.txt index f126840c8..842e4d986 100644 --- a/docs/credits.txt +++ b/docs/credits.txt @@ -27,7 +27,8 @@ Gabriele "matrix" Gristina (@gm4tr1x) * Universal binary on Apple Silicon * Hardware monitor initial code base and maintenance * Test suite initial code base and maintenance -* Makefile initial code base +* Edge case testing suite +* Makefile initial code base and maintenance * Multithreading initial code base * MultiGPU initial code base * Benchmarks initial code base From 238110cd045fcd42ea6e56883e588b0092140353 Mon Sep 17 00:00:00 2001 From: Chick3nman Date: Thu, 10 Jul 2025 05:39:10 -0500 Subject: [PATCH 34/57] Fix JSON Integer formatting --- src/terminal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/terminal.c b/src/terminal.c index 6a54eb142..3f7b9354f 100644 --- a/src/terminal.c +++ b/src/terminal.c @@ -2632,7 +2632,7 @@ void status_display_status_json (hashcat_ctx_t *hashcat_ctx) printf (","); } - printf (" { \"device_id\": %02u,", device_id + 1); + printf (" { \"device_id\": %u,", device_id + 1); char *device_name_json_encoded = (char *) hcmalloc (strlen (device_info->device_name) * 2); From e4daa498a3ae1a11cc24b7300845fb0e8ff90468 Mon Sep 17 00:00:00 2001 From: Chick3nman Date: Thu, 10 Jul 2025 05:40:47 -0500 Subject: [PATCH 35/57] Fix typo in changes.txt --- docs/changes.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changes.txt b/docs/changes.txt index de2d26bcd..a22ca8f13 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -70,7 +70,7 @@ - Added new feature (-Y) that creates N virtual instances for each device in your system at the cost of N times the device memory consumption - Added options --benchmark-min and --benchmark-max to set a hash-mode range to be used during the benchmark -- Added option --total-candidates to provide the total candidate count for an attack insteda of the internal "--keyspace" value +- Added option --total-candidates to provide the total candidate count for an attack instead of the internal "--keyspace" value - Added option --backend-devices-keepfree to configure X percentage of device memory available to keep free ## From 06344910a48db8d477336810392f17bcf096e7ed Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 10 Jul 2025 13:31:00 +0200 Subject: [PATCH 36/57] Refactored HIP kernel code for improved performance and cleanup - Replaced inline asm in hc_byte_perm() with __builtin_amdgcn_perm() - Replaced inline asm in hc_bytealign() with __builtin_amdgcn_alignbyte() - Defined HC_INLINE as default for HIP, significantly boosting kernel performance of pure kernels - Removed IS_ROCM from inc_vendor.h as it's no longer needed - Removed backend-specific code from several hash-modes and inc_rp_optimized.cl, as hc_bytealign_S() is now available on all backends --- OpenCL/inc_common.cl | 31862 +------------------------------- OpenCL/inc_hash_blake2b.cl | 4 +- OpenCL/inc_hash_blake2s.cl | 2 +- OpenCL/inc_rp_optimized.cl | 118 - OpenCL/inc_vendor.h | 11 +- OpenCL/m00500-optimized.cl | 104 +- OpenCL/m01600-optimized.cl | 106 +- OpenCL/m05800-optimized.cl | 41 +- OpenCL/m06300-optimized.cl | 107 +- OpenCL/m07400-optimized.cl | 180 +- OpenCL/m10700-optimized.cl | 56 +- OpenCL/m11600-pure.cl | 20 +- OpenCL/m12500-pure.cl | 20 +- OpenCL/m13800_a0-optimized.cl | 31 - OpenCL/m13800_a1-optimized.cl | 31 - OpenCL/m13800_a3-optimized.cl | 31 - OpenCL/m17010-pure.cl | 39 +- OpenCL/m17020-pure.cl | 39 +- OpenCL/m17030-pure.cl | 39 +- OpenCL/m17040-pure.cl | 39 +- OpenCL/m23700-pure.cl | 20 +- OpenCL/m23800-pure.cl | 20 +- OpenCL/m31400_a0-optimized.cl | 51 - OpenCL/m31400_a0-pure.cl | 51 - OpenCL/m31400_a1-optimized.cl | 51 - OpenCL/m31400_a1-pure.cl | 51 - OpenCL/m31400_a3-optimized.cl | 51 - OpenCL/m31400_a3-pure.cl | 51 - 28 files changed, 324 insertions(+), 32902 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index b13dc4d5f..0bc1063a4 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -1425,121 +1425,174 @@ DECLSPEC u64 hc_swap64_S (const u64 v) DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c) { - #define BIT(x) (make_u32x (1u) << (x)) - #define BIT_MASK(x) (BIT (x) - 1) - #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) + u32x r; - return BFE (a, b, c); + #if VECT_SIZE == 1 + r = __builtin_amdgcn_ubfe (a, b, c); + #endif - #undef BIT - #undef BIT_MASK - #undef BFE + #if VECT_SIZE >= 2 + r.s0 = __builtin_amdgcn_ubfe (a.s0, b.s0, c.s0); + r.s1 = __builtin_amdgcn_ubfe (a.s1, b.s1, c.s1); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __builtin_amdgcn_ubfe (a.s2, b.s2, c.s2); + r.s3 = __builtin_amdgcn_ubfe (a.s3, b.s3, c.s3); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __builtin_amdgcn_ubfe (a.s4, b.s4, c.s4); + r.s5 = __builtin_amdgcn_ubfe (a.s5, b.s5, c.s5); + r.s6 = __builtin_amdgcn_ubfe (a.s6, b.s6, c.s6); + r.s7 = __builtin_amdgcn_ubfe (a.s7, b.s7, c.s7); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __builtin_amdgcn_ubfe (a.s8, b.s8, c.s8); + r.s9 = __builtin_amdgcn_ubfe (a.s9, b.s9, c.s9); + r.sa = __builtin_amdgcn_ubfe (a.sa, b.sa, c.sa); + r.sb = __builtin_amdgcn_ubfe (a.sb, b.sb, c.sb); + r.sc = __builtin_amdgcn_ubfe (a.sc, b.sc, c.sc); + r.sd = __builtin_amdgcn_ubfe (a.sd, b.sd, c.sd); + r.se = __builtin_amdgcn_ubfe (a.se, b.se, c.se); + r.sf = __builtin_amdgcn_ubfe (a.sf, b.sf, c.sf); + #endif + + return r; } DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) { - #define BIT(x) (1u << (x)) - #define BIT_MASK(x) (BIT (x) - 1) - #define BFE(x,y,z) (((x) >> (y)) & BIT_MASK (z)) - - return BFE (a, b, c); - - #undef BIT - #undef BIT_MASK - #undef BFE + return __builtin_amdgcn_ubfe (a, b, c); } DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) { - u32x r = 0; + u32x r; - const int cm = c & 3; + #if VECT_SIZE == 1 + r = __builtin_amdgcn_alignbyte (a, b, c); + #endif - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a << 24) | (b >> 8); } - else if (cm == 2) { r = (a << 16) | (b >> 16); } - else if (cm == 3) { r = (a << 8) | (b >> 24); } + #if VECT_SIZE >= 2 + r.s0 = __builtin_amdgcn_alignbyte (a.s0, b.s0, c); + r.s1 = __builtin_amdgcn_alignbyte (a.s1, b.s1, c); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __builtin_amdgcn_alignbyte (a.s2, b.s2, c); + r.s3 = __builtin_amdgcn_alignbyte (a.s3, b.s3, c); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __builtin_amdgcn_alignbyte (a.s4, b.s4, c); + r.s5 = __builtin_amdgcn_alignbyte (a.s5, b.s5, c); + r.s6 = __builtin_amdgcn_alignbyte (a.s6, b.s6, c); + r.s7 = __builtin_amdgcn_alignbyte (a.s7, b.s7, c); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __builtin_amdgcn_alignbyte (a.s8, b.s8, c); + r.s9 = __builtin_amdgcn_alignbyte (a.s9, b.s9, c); + r.sa = __builtin_amdgcn_alignbyte (a.sa, b.sa, c); + r.sb = __builtin_amdgcn_alignbyte (a.sb, b.sb, c); + r.sc = __builtin_amdgcn_alignbyte (a.sc, b.sc, c); + r.sd = __builtin_amdgcn_alignbyte (a.sd, b.sd, c); + r.se = __builtin_amdgcn_alignbyte (a.se, b.se, c); + r.sf = __builtin_amdgcn_alignbyte (a.sf, b.sf, c); + #endif return r; } DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { - u32 r = 0; - - const int cm = c & 3; - - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a << 24) | (b >> 8); } - else if (cm == 2) { r = (a << 16) | (b >> 16); } - else if (cm == 3) { r = (a << 8) | (b >> 24); } - - return r; + return __builtin_amdgcn_alignbyte (a, b, c); } DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { - u32x r = 0; + const int c_mod_4 = c & 3; - const int cm = c & 3; + u32x r; - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a >> 24) | (b << 8); } - else if (cm == 2) { r = (a >> 16) | (b << 16); } - else if (cm == 3) { r = (a >> 8) | (b << 24); } + #if VECT_SIZE == 1 + r = (c_mod_4 == 0) ? b : __builtin_amdgcn_alignbyte (b, a, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 2 + r.s0 = (c_mod_4 == 0) ? b.s0 : __builtin_amdgcn_alignbyte (b.s0, a.s0, 4 - c_mod_4); + r.s1 = (c_mod_4 == 0) ? b.s1 : __builtin_amdgcn_alignbyte (b.s1, a.s1, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 4 + r.s2 = (c_mod_4 == 0) ? b.s2 : __builtin_amdgcn_alignbyte (b.s2, a.s2, 4 - c_mod_4); + r.s3 = (c_mod_4 == 0) ? b.s3 : __builtin_amdgcn_alignbyte (b.s3, a.s3, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 8 + r.s4 = (c_mod_4 == 0) ? b.s4 : __builtin_amdgcn_alignbyte (b.s4, a.s4, 4 - c_mod_4); + r.s5 = (c_mod_4 == 0) ? b.s5 : __builtin_amdgcn_alignbyte (b.s5, a.s5, 4 - c_mod_4); + r.s6 = (c_mod_4 == 0) ? b.s6 : __builtin_amdgcn_alignbyte (b.s6, a.s6, 4 - c_mod_4); + r.s7 = (c_mod_4 == 0) ? b.s7 : __builtin_amdgcn_alignbyte (b.s7, a.s7, 4 - c_mod_4); + #endif + + #if VECT_SIZE >= 16 + r.s8 = (c_mod_4 == 0) ? b.s8 : __builtin_amdgcn_alignbyte (b.s8, a.s8, 4 - c_mod_4); + r.s9 = (c_mod_4 == 0) ? b.s9 : __builtin_amdgcn_alignbyte (b.s9, a.s9, 4 - c_mod_4); + r.sa = (c_mod_4 == 0) ? b.sa : __builtin_amdgcn_alignbyte (b.sa, a.sa, 4 - c_mod_4); + r.sb = (c_mod_4 == 0) ? b.sb : __builtin_amdgcn_alignbyte (b.sb, a.sb, 4 - c_mod_4); + r.sc = (c_mod_4 == 0) ? b.sc : __builtin_amdgcn_alignbyte (b.sc, a.sc, 4 - c_mod_4); + r.sd = (c_mod_4 == 0) ? b.sd : __builtin_amdgcn_alignbyte (b.sd, a.sd, 4 - c_mod_4); + r.se = (c_mod_4 == 0) ? b.se : __builtin_amdgcn_alignbyte (b.se, a.se, 4 - c_mod_4); + r.sf = (c_mod_4 == 0) ? b.sf : __builtin_amdgcn_alignbyte (b.sf, a.sf, 4 - c_mod_4); + #endif return r; } DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { - u32 r = 0; + const int c_mod_4 = c & 3; - const int cm = c & 3; - - if (cm == 0) { r = b; } - else if (cm == 1) { r = (a >> 24) | (b << 8); } - else if (cm == 2) { r = (a >> 16) | (b << 16); } - else if (cm == 3) { r = (a >> 8) | (b << 24); } - - return r; + return (c_mod_4 == 0) ? b : __builtin_amdgcn_alignbyte (b, a, 4 - c_mod_4); } -#if HAS_VPERM == 1 DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) { u32x r = 0; #if VECT_SIZE == 1 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); + r = __builtin_amdgcn_perm (b, a, c); #endif #if VECT_SIZE >= 2 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s0) : "v"(b.s0), "v"(a.s0), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s1) : "v"(b.s1), "v"(a.s1), "v"(c)); + r.s0 = __builtin_amdgcn_perm (b.s0, a.s0, c); + r.s1 = __builtin_amdgcn_perm (b.s1, a.s1, c); #endif #if VECT_SIZE >= 4 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s2) : "v"(b.s2), "v"(a.s2), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s3) : "v"(b.s3), "v"(a.s3), "v"(c)); + r.s2 = __builtin_amdgcn_perm (b.s2, a.s2, c); + r.s3 = __builtin_amdgcn_perm (b.s3, a.s3, c); #endif #if VECT_SIZE >= 8 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s4) : "v"(b.s4), "v"(a.s4), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s5) : "v"(b.s5), "v"(a.s5), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s6) : "v"(b.s6), "v"(a.s6), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s7) : "v"(b.s7), "v"(a.s7), "v"(c)); + r.s4 = __builtin_amdgcn_perm (b.s4, a.s4, c); + r.s5 = __builtin_amdgcn_perm (b.s5, a.s5, c); + r.s6 = __builtin_amdgcn_perm (b.s6, a.s6, c); + r.s7 = __builtin_amdgcn_perm (b.s7, a.s7, c); #endif #if VECT_SIZE >= 16 - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s8) : "v"(b.s8), "v"(a.s8), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.s9) : "v"(b.s9), "v"(a.s9), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sa) : "v"(b.sa), "v"(a.sa), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sb) : "v"(b.sb), "v"(a.sb), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sc) : "v"(b.sc), "v"(a.sc), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sd) : "v"(b.sd), "v"(a.sd), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.se) : "v"(b.se), "v"(a.se), "v"(c)); - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r.sf) : "v"(b.sf), "v"(a.sf), "v"(c)); + r.s8 = __builtin_amdgcn_perm (b.s8, a.s8, c); + r.s9 = __builtin_amdgcn_perm (b.s9, a.s9, c); + r.sa = __builtin_amdgcn_perm (b.sa, a.sa, c); + r.sb = __builtin_amdgcn_perm (b.sb, a.sb, c); + r.sc = __builtin_amdgcn_perm (b.sc, a.sc, c); + r.sd = __builtin_amdgcn_perm (b.sd, a.sd, c); + r.se = __builtin_amdgcn_perm (b.se, a.se, c); + r.sf = __builtin_amdgcn_perm (b.sf, a.sf, c); #endif return r; @@ -1547,13 +1600,8 @@ DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const int c) DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const int c) { - u32 r = 0; - - __asm__ __volatile__ ("V_PERM_B32 %0, %1, %2, %3;" : "=v"(r) : "v"(b), "v"(a), "v"(c)); - - return r; + return __builtin_amdgcn_perm (b, a, c); } -#endif #if HAS_VADD3 == 1 DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c) @@ -1733,11 +1781,52 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c) return r; } +DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const int c) +{ + const int c_mod_4 = c & 3; + + u32x r; + + #if VECT_SIZE == 1 + r = __funnelshift_r (b, a, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 2 + r.s0 = __funnelshift_r (b.s0, a.s0, c_mod_4 * 8); + r.s1 = __funnelshift_r (b.s1, a.s1, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __funnelshift_r (b.s2, a.s2, c_mod_4 * 8); + r.s3 = __funnelshift_r (b.s3, a.s3, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __funnelshift_r (b.s4, a.s4, c_mod_4 * 8); + r.s5 = __funnelshift_r (b.s5, a.s5, c_mod_4 * 8); + r.s6 = __funnelshift_r (b.s6, a.s6, c_mod_4 * 8); + r.s7 = __funnelshift_r (b.s7, a.s7, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __funnelshift_r (b.s8, a.s8, c_mod_4 * 8); + r.s9 = __funnelshift_r (b.s9, a.s9, c_mod_4 * 8); + r.sa = __funnelshift_r (b.sa, a.sa, c_mod_4 * 8); + r.sb = __funnelshift_r (b.sb, a.sb, c_mod_4 * 8); + r.sc = __funnelshift_r (b.sc, a.sc, c_mod_4 * 8); + r.sd = __funnelshift_r (b.sd, a.sd, c_mod_4 * 8); + r.se = __funnelshift_r (b.se, a.se, c_mod_4 * 8); + r.sf = __funnelshift_r (b.sf, a.sf, c_mod_4 * 8); + #endif + + return r; +} + DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; - const u32 r = hc_byte_perm_S (b, a, (0x76543210 >> (c_mod_4 * 4)) & 0xffff); + const u32 r = __funnelshift_r (b, a, c_mod_4 * 8); return r; } @@ -1746,9 +1835,39 @@ DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const int c) { const int c_mod_4 = c & 3; - const int c_minus_4 = 4 - c_mod_4; + u32x r; - const u32x r = hc_byte_perm (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); + #if VECT_SIZE == 1 + r = __funnelshift_l (a, b, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 2 + r.s0 = __funnelshift_l (a.s0, b.s0, c_mod_4 * 8); + r.s1 = __funnelshift_l (a.s1, b.s1, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 4 + r.s2 = __funnelshift_l (a.s2, b.s2, c_mod_4 * 8); + r.s3 = __funnelshift_l (a.s3, b.s3, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 8 + r.s4 = __funnelshift_l (a.s4, b.s4, c_mod_4 * 8); + r.s5 = __funnelshift_l (a.s5, b.s5, c_mod_4 * 8); + r.s6 = __funnelshift_l (a.s6, b.s6, c_mod_4 * 8); + r.s7 = __funnelshift_l (a.s7, b.s7, c_mod_4 * 8); + #endif + + #if VECT_SIZE >= 16 + r.s8 = __funnelshift_l (a.s8, b.s8, c_mod_4 * 8); + r.s9 = __funnelshift_l (a.s9, b.s9, c_mod_4 * 8); + r.sa = __funnelshift_l (a.sa, b.sa, c_mod_4 * 8); + r.sb = __funnelshift_l (a.sb, b.sb, c_mod_4 * 8); + r.sc = __funnelshift_l (a.sc, b.sc, c_mod_4 * 8); + r.sd = __funnelshift_l (a.sd, b.sd, c_mod_4 * 8); + r.se = __funnelshift_l (a.se, b.se, c_mod_4 * 8); + r.sf = __funnelshift_l (a.sf, b.sf, c_mod_4 * 8); + #endif return r; } @@ -1757,9 +1876,7 @@ DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const int c) { const int c_mod_4 = c & 3; - const int c_minus_4 = 4 - c_mod_4; - - const u32 r = hc_byte_perm_S (a, b, (0x76543210 >> (c_minus_4 * 4)) & 0xffff); + const u32 r = __funnelshift_l (a, b, c_mod_4 * 8); return r; } @@ -3095,7 +3212,7 @@ DECLSPEC int count_bits_32 (const u32 v0, const u32 v1) DECLSPEC void make_utf16be (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PRIVATE_AS u32x *out2) { #if defined IS_NV - + out2[3] = hc_byte_perm (in[3], 0, 0x3727); out2[2] = hc_byte_perm (in[3], 0, 0x1707); out2[1] = hc_byte_perm (in[2], 0, 0x3727); @@ -3105,7 +3222,7 @@ DECLSPEC void make_utf16be (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PR out1[1] = hc_byte_perm (in[0], 0, 0x3727); out1[0] = hc_byte_perm (in[0], 0, 0x1707); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x03070207); out2[2] = hc_byte_perm (in[3], 0, 0x01070007); @@ -3143,7 +3260,7 @@ DECLSPEC void make_utf16beN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, P out1[1] = hc_byte_perm (in[0], 0, 0x1707); out1[0] = hc_byte_perm (in[0], 0, 0x3727); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x01070007); out2[2] = hc_byte_perm (in[3], 0, 0x03070207); @@ -3181,7 +3298,7 @@ DECLSPEC void make_utf16le (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, PR out1[1] = hc_byte_perm (in[0], 0, 0x7372); out1[0] = hc_byte_perm (in[0], 0, 0x7170); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x07030702); out2[2] = hc_byte_perm (in[3], 0, 0x07010700); @@ -3219,7 +3336,7 @@ DECLSPEC void make_utf16leN (PRIVATE_AS const u32x *in, PRIVATE_AS u32x *out1, P out1[1] = hc_byte_perm (in[0], 0, 0x7170); out1[0] = hc_byte_perm (in[0], 0, 0x7372); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm (in[3], 0, 0x07010700); out2[2] = hc_byte_perm (in[3], 0, 0x07030702); @@ -3253,7 +3370,7 @@ DECLSPEC void undo_utf16be (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *i out[2] = hc_byte_perm (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm (in2[2], in2[3], 0x4602); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm (in1[2], in1[3], 0x04060002); @@ -3283,7 +3400,7 @@ DECLSPEC void undo_utf16le (PRIVATE_AS const u32x *in1, PRIVATE_AS const u32x *i out[2] = hc_byte_perm (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm (in2[2], in2[3], 0x6420); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm (in1[2], in1[3], 0x06040200); @@ -3407,7 +3524,6 @@ DECLSPEC void switch_buffer_by_offset_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -3730,352 +3846,12 @@ DECLSPEC void switch_buffer_by_offset_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - - break; - - case 1: - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -4534,476 +4310,12 @@ DECLSPEC void switch_buffer_by_offset_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS break; } - #endif - - #ifdef IS_NV - // atm only same code as for AMD, but could be improved - switch (offset_switch) - { - case 0: - c0[0] = hc_bytealign (w3[3], 0, offset); - w3[3] = hc_bytealign (w3[2], w3[3], offset); - w3[2] = hc_bytealign (w3[1], w3[2], offset); - w3[1] = hc_bytealign (w3[0], w3[1], offset); - w3[0] = hc_bytealign (w2[3], w3[0], offset); - w2[3] = hc_bytealign (w2[2], w2[3], offset); - w2[2] = hc_bytealign (w2[1], w2[2], offset); - w2[1] = hc_bytealign (w2[0], w2[1], offset); - w2[0] = hc_bytealign (w1[3], w2[0], offset); - w1[3] = hc_bytealign (w1[2], w1[3], offset); - w1[2] = hc_bytealign (w1[1], w1[2], offset); - w1[1] = hc_bytealign (w1[0], w1[1], offset); - w1[0] = hc_bytealign (w0[3], w1[0], offset); - w0[3] = hc_bytealign (w0[2], w0[3], offset); - w0[2] = hc_bytealign (w0[1], w0[2], offset); - w0[1] = hc_bytealign (w0[0], w0[1], offset); - w0[0] = hc_bytealign ( 0, w0[0], offset); - - break; - - case 1: - c0[1] = hc_bytealign (w3[3], 0, offset); - c0[0] = hc_bytealign (w3[2], w3[3], offset); - w3[3] = hc_bytealign (w3[1], w3[2], offset); - w3[2] = hc_bytealign (w3[0], w3[1], offset); - w3[1] = hc_bytealign (w2[3], w3[0], offset); - w3[0] = hc_bytealign (w2[2], w2[3], offset); - w2[3] = hc_bytealign (w2[1], w2[2], offset); - w2[2] = hc_bytealign (w2[0], w2[1], offset); - w2[1] = hc_bytealign (w1[3], w2[0], offset); - w2[0] = hc_bytealign (w1[2], w1[3], offset); - w1[3] = hc_bytealign (w1[1], w1[2], offset); - w1[2] = hc_bytealign (w1[0], w1[1], offset); - w1[1] = hc_bytealign (w0[3], w1[0], offset); - w1[0] = hc_bytealign (w0[2], w0[3], offset); - w0[3] = hc_bytealign (w0[1], w0[2], offset); - w0[2] = hc_bytealign (w0[0], w0[1], offset); - w0[1] = hc_bytealign ( 0, w0[0], offset); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_bytealign (w3[3], 0, offset); - c0[1] = hc_bytealign (w3[2], w3[3], offset); - c0[0] = hc_bytealign (w3[1], w3[2], offset); - w3[3] = hc_bytealign (w3[0], w3[1], offset); - w3[2] = hc_bytealign (w2[3], w3[0], offset); - w3[1] = hc_bytealign (w2[2], w2[3], offset); - w3[0] = hc_bytealign (w2[1], w2[2], offset); - w2[3] = hc_bytealign (w2[0], w2[1], offset); - w2[2] = hc_bytealign (w1[3], w2[0], offset); - w2[1] = hc_bytealign (w1[2], w1[3], offset); - w2[0] = hc_bytealign (w1[1], w1[2], offset); - w1[3] = hc_bytealign (w1[0], w1[1], offset); - w1[2] = hc_bytealign (w0[3], w1[0], offset); - w1[1] = hc_bytealign (w0[2], w0[3], offset); - w1[0] = hc_bytealign (w0[1], w0[2], offset); - w0[3] = hc_bytealign (w0[0], w0[1], offset); - w0[2] = hc_bytealign ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_bytealign (w3[3], 0, offset); - c0[2] = hc_bytealign (w3[2], w3[3], offset); - c0[1] = hc_bytealign (w3[1], w3[2], offset); - c0[0] = hc_bytealign (w3[0], w3[1], offset); - w3[3] = hc_bytealign (w2[3], w3[0], offset); - w3[2] = hc_bytealign (w2[2], w2[3], offset); - w3[1] = hc_bytealign (w2[1], w2[2], offset); - w3[0] = hc_bytealign (w2[0], w2[1], offset); - w2[3] = hc_bytealign (w1[3], w2[0], offset); - w2[2] = hc_bytealign (w1[2], w1[3], offset); - w2[1] = hc_bytealign (w1[1], w1[2], offset); - w2[0] = hc_bytealign (w1[0], w1[1], offset); - w1[3] = hc_bytealign (w0[3], w1[0], offset); - w1[2] = hc_bytealign (w0[2], w0[3], offset); - w1[1] = hc_bytealign (w0[1], w0[2], offset); - w1[0] = hc_bytealign (w0[0], w0[1], offset); - w0[3] = hc_bytealign ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_bytealign (w3[3], 0, offset); - c0[3] = hc_bytealign (w3[2], w3[3], offset); - c0[2] = hc_bytealign (w3[1], w3[2], offset); - c0[1] = hc_bytealign (w3[0], w3[1], offset); - c0[0] = hc_bytealign (w2[3], w3[0], offset); - w3[3] = hc_bytealign (w2[2], w2[3], offset); - w3[2] = hc_bytealign (w2[1], w2[2], offset); - w3[1] = hc_bytealign (w2[0], w2[1], offset); - w3[0] = hc_bytealign (w1[3], w2[0], offset); - w2[3] = hc_bytealign (w1[2], w1[3], offset); - w2[2] = hc_bytealign (w1[1], w1[2], offset); - w2[1] = hc_bytealign (w1[0], w1[1], offset); - w2[0] = hc_bytealign (w0[3], w1[0], offset); - w1[3] = hc_bytealign (w0[2], w0[3], offset); - w1[2] = hc_bytealign (w0[1], w0[2], offset); - w1[1] = hc_bytealign (w0[0], w0[1], offset); - w1[0] = hc_bytealign ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_bytealign (w3[3], 0, offset); - c1[0] = hc_bytealign (w3[2], w3[3], offset); - c0[3] = hc_bytealign (w3[1], w3[2], offset); - c0[2] = hc_bytealign (w3[0], w3[1], offset); - c0[1] = hc_bytealign (w2[3], w3[0], offset); - c0[0] = hc_bytealign (w2[2], w2[3], offset); - w3[3] = hc_bytealign (w2[1], w2[2], offset); - w3[2] = hc_bytealign (w2[0], w2[1], offset); - w3[1] = hc_bytealign (w1[3], w2[0], offset); - w3[0] = hc_bytealign (w1[2], w1[3], offset); - w2[3] = hc_bytealign (w1[1], w1[2], offset); - w2[2] = hc_bytealign (w1[0], w1[1], offset); - w2[1] = hc_bytealign (w0[3], w1[0], offset); - w2[0] = hc_bytealign (w0[2], w0[3], offset); - w1[3] = hc_bytealign (w0[1], w0[2], offset); - w1[2] = hc_bytealign (w0[0], w0[1], offset); - w1[1] = hc_bytealign ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_bytealign (w3[3], 0, offset); - c1[1] = hc_bytealign (w3[2], w3[3], offset); - c1[0] = hc_bytealign (w3[1], w3[2], offset); - c0[3] = hc_bytealign (w3[0], w3[1], offset); - c0[2] = hc_bytealign (w2[3], w3[0], offset); - c0[1] = hc_bytealign (w2[2], w2[3], offset); - c0[0] = hc_bytealign (w2[1], w2[2], offset); - w3[3] = hc_bytealign (w2[0], w2[1], offset); - w3[2] = hc_bytealign (w1[3], w2[0], offset); - w3[1] = hc_bytealign (w1[2], w1[3], offset); - w3[0] = hc_bytealign (w1[1], w1[2], offset); - w2[3] = hc_bytealign (w1[0], w1[1], offset); - w2[2] = hc_bytealign (w0[3], w1[0], offset); - w2[1] = hc_bytealign (w0[2], w0[3], offset); - w2[0] = hc_bytealign (w0[1], w0[2], offset); - w1[3] = hc_bytealign (w0[0], w0[1], offset); - w1[2] = hc_bytealign ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_bytealign (w3[3], 0, offset); - c1[2] = hc_bytealign (w3[2], w3[3], offset); - c1[1] = hc_bytealign (w3[1], w3[2], offset); - c1[0] = hc_bytealign (w3[0], w3[1], offset); - c0[3] = hc_bytealign (w2[3], w3[0], offset); - c0[2] = hc_bytealign (w2[2], w2[3], offset); - c0[1] = hc_bytealign (w2[1], w2[2], offset); - c0[0] = hc_bytealign (w2[0], w2[1], offset); - w3[3] = hc_bytealign (w1[3], w2[0], offset); - w3[2] = hc_bytealign (w1[2], w1[3], offset); - w3[1] = hc_bytealign (w1[1], w1[2], offset); - w3[0] = hc_bytealign (w1[0], w1[1], offset); - w2[3] = hc_bytealign (w0[3], w1[0], offset); - w2[2] = hc_bytealign (w0[2], w0[3], offset); - w2[1] = hc_bytealign (w0[1], w0[2], offset); - w2[0] = hc_bytealign (w0[0], w0[1], offset); - w1[3] = hc_bytealign ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_bytealign (w3[3], 0, offset); - c1[3] = hc_bytealign (w3[2], w3[3], offset); - c1[2] = hc_bytealign (w3[1], w3[2], offset); - c1[1] = hc_bytealign (w3[0], w3[1], offset); - c1[0] = hc_bytealign (w2[3], w3[0], offset); - c0[3] = hc_bytealign (w2[2], w2[3], offset); - c0[2] = hc_bytealign (w2[1], w2[2], offset); - c0[1] = hc_bytealign (w2[0], w2[1], offset); - c0[0] = hc_bytealign (w1[3], w2[0], offset); - w3[3] = hc_bytealign (w1[2], w1[3], offset); - w3[2] = hc_bytealign (w1[1], w1[2], offset); - w3[1] = hc_bytealign (w1[0], w1[1], offset); - w3[0] = hc_bytealign (w0[3], w1[0], offset); - w2[3] = hc_bytealign (w0[2], w0[3], offset); - w2[2] = hc_bytealign (w0[1], w0[2], offset); - w2[1] = hc_bytealign (w0[0], w0[1], offset); - w2[0] = hc_bytealign ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_bytealign (w3[3], 0, offset); - c2[0] = hc_bytealign (w3[2], w3[3], offset); - c1[3] = hc_bytealign (w3[1], w3[2], offset); - c1[2] = hc_bytealign (w3[0], w3[1], offset); - c1[1] = hc_bytealign (w2[3], w3[0], offset); - c1[0] = hc_bytealign (w2[2], w2[3], offset); - c0[3] = hc_bytealign (w2[1], w2[2], offset); - c0[2] = hc_bytealign (w2[0], w2[1], offset); - c0[1] = hc_bytealign (w1[3], w2[0], offset); - c0[0] = hc_bytealign (w1[2], w1[3], offset); - w3[3] = hc_bytealign (w1[1], w1[2], offset); - w3[2] = hc_bytealign (w1[0], w1[1], offset); - w3[1] = hc_bytealign (w0[3], w1[0], offset); - w3[0] = hc_bytealign (w0[2], w0[3], offset); - w2[3] = hc_bytealign (w0[1], w0[2], offset); - w2[2] = hc_bytealign (w0[0], w0[1], offset); - w2[1] = hc_bytealign ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_bytealign (w3[3], 0, offset); - c2[1] = hc_bytealign (w3[2], w3[3], offset); - c2[0] = hc_bytealign (w3[1], w3[2], offset); - c1[3] = hc_bytealign (w3[0], w3[1], offset); - c1[2] = hc_bytealign (w2[3], w3[0], offset); - c1[1] = hc_bytealign (w2[2], w2[3], offset); - c1[0] = hc_bytealign (w2[1], w2[2], offset); - c0[3] = hc_bytealign (w2[0], w2[1], offset); - c0[2] = hc_bytealign (w1[3], w2[0], offset); - c0[1] = hc_bytealign (w1[2], w1[3], offset); - c0[0] = hc_bytealign (w1[1], w1[2], offset); - w3[3] = hc_bytealign (w1[0], w1[1], offset); - w3[2] = hc_bytealign (w0[3], w1[0], offset); - w3[1] = hc_bytealign (w0[2], w0[3], offset); - w3[0] = hc_bytealign (w0[1], w0[2], offset); - w2[3] = hc_bytealign (w0[0], w0[1], offset); - w2[2] = hc_bytealign ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_bytealign (w3[3], 0, offset); - c2[2] = hc_bytealign (w3[2], w3[3], offset); - c2[1] = hc_bytealign (w3[1], w3[2], offset); - c2[0] = hc_bytealign (w3[0], w3[1], offset); - c1[3] = hc_bytealign (w2[3], w3[0], offset); - c1[2] = hc_bytealign (w2[2], w2[3], offset); - c1[1] = hc_bytealign (w2[1], w2[2], offset); - c1[0] = hc_bytealign (w2[0], w2[1], offset); - c0[3] = hc_bytealign (w1[3], w2[0], offset); - c0[2] = hc_bytealign (w1[2], w1[3], offset); - c0[1] = hc_bytealign (w1[1], w1[2], offset); - c0[0] = hc_bytealign (w1[0], w1[1], offset); - w3[3] = hc_bytealign (w0[3], w1[0], offset); - w3[2] = hc_bytealign (w0[2], w0[3], offset); - w3[1] = hc_bytealign (w0[1], w0[2], offset); - w3[0] = hc_bytealign (w0[0], w0[1], offset); - w2[3] = hc_bytealign ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_bytealign (w3[3], 0, offset); - c2[3] = hc_bytealign (w3[2], w3[3], offset); - c2[2] = hc_bytealign (w3[1], w3[2], offset); - c2[1] = hc_bytealign (w3[0], w3[1], offset); - c2[0] = hc_bytealign (w2[3], w3[0], offset); - c1[3] = hc_bytealign (w2[2], w2[3], offset); - c1[2] = hc_bytealign (w2[1], w2[2], offset); - c1[1] = hc_bytealign (w2[0], w2[1], offset); - c1[0] = hc_bytealign (w1[3], w2[0], offset); - c0[3] = hc_bytealign (w1[2], w1[3], offset); - c0[2] = hc_bytealign (w1[1], w1[2], offset); - c0[1] = hc_bytealign (w1[0], w1[1], offset); - c0[0] = hc_bytealign (w0[3], w1[0], offset); - w3[3] = hc_bytealign (w0[2], w0[3], offset); - w3[2] = hc_bytealign (w0[1], w0[2], offset); - w3[1] = hc_bytealign (w0[0], w0[1], offset); - w3[0] = hc_bytealign ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_bytealign (w3[3], 0, offset); - c3[0] = hc_bytealign (w3[2], w3[3], offset); - c2[3] = hc_bytealign (w3[1], w3[2], offset); - c2[2] = hc_bytealign (w3[0], w3[1], offset); - c2[1] = hc_bytealign (w2[3], w3[0], offset); - c2[0] = hc_bytealign (w2[2], w2[3], offset); - c1[3] = hc_bytealign (w2[1], w2[2], offset); - c1[2] = hc_bytealign (w2[0], w2[1], offset); - c1[1] = hc_bytealign (w1[3], w2[0], offset); - c1[0] = hc_bytealign (w1[2], w1[3], offset); - c0[3] = hc_bytealign (w1[1], w1[2], offset); - c0[2] = hc_bytealign (w1[0], w1[1], offset); - c0[1] = hc_bytealign (w0[3], w1[0], offset); - c0[0] = hc_bytealign (w0[2], w0[3], offset); - w3[3] = hc_bytealign (w0[1], w0[2], offset); - w3[2] = hc_bytealign (w0[0], w0[1], offset); - w3[1] = hc_bytealign ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_bytealign (w3[3], 0, offset); - c3[1] = hc_bytealign (w3[2], w3[3], offset); - c3[0] = hc_bytealign (w3[1], w3[2], offset); - c2[3] = hc_bytealign (w3[0], w3[1], offset); - c2[2] = hc_bytealign (w2[3], w3[0], offset); - c2[1] = hc_bytealign (w2[2], w2[3], offset); - c2[0] = hc_bytealign (w2[1], w2[2], offset); - c1[3] = hc_bytealign (w2[0], w2[1], offset); - c1[2] = hc_bytealign (w1[3], w2[0], offset); - c1[1] = hc_bytealign (w1[2], w1[3], offset); - c1[0] = hc_bytealign (w1[1], w1[2], offset); - c0[3] = hc_bytealign (w1[0], w1[1], offset); - c0[2] = hc_bytealign (w0[3], w1[0], offset); - c0[1] = hc_bytealign (w0[2], w0[3], offset); - c0[0] = hc_bytealign (w0[1], w0[2], offset); - w3[3] = hc_bytealign (w0[0], w0[1], offset); - w3[2] = hc_bytealign ( 0, w0[0], offset); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_bytealign (w3[3], 0, offset); - c3[2] = hc_bytealign (w3[2], w3[3], offset); - c3[1] = hc_bytealign (w3[1], w3[2], offset); - c3[0] = hc_bytealign (w3[0], w3[1], offset); - c2[3] = hc_bytealign (w2[3], w3[0], offset); - c2[2] = hc_bytealign (w2[2], w2[3], offset); - c2[1] = hc_bytealign (w2[1], w2[2], offset); - c2[0] = hc_bytealign (w2[0], w2[1], offset); - c1[3] = hc_bytealign (w1[3], w2[0], offset); - c1[2] = hc_bytealign (w1[2], w1[3], offset); - c1[1] = hc_bytealign (w1[1], w1[2], offset); - c1[0] = hc_bytealign (w1[0], w1[1], offset); - c0[3] = hc_bytealign (w0[3], w1[0], offset); - c0[2] = hc_bytealign (w0[2], w0[3], offset); - c0[1] = hc_bytealign (w0[1], w0[2], offset); - c0[0] = hc_bytealign (w0[0], w0[1], offset); - w3[3] = hc_bytealign ( 0, w0[0], offset); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -5326,348 +4638,12 @@ DECLSPEC void switch_buffer_by_offset_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -6126,484 +5102,12 @@ DECLSPEC void switch_buffer_by_offset_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm ( 0, w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm ( 0, w3[3], selector); - c0[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm ( 0, w3[3], selector); - c0[1] = hc_byte_perm (w3[3], w3[2], selector); - c0[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm ( 0, w3[3], selector); - c0[2] = hc_byte_perm (w3[3], w3[2], selector); - c0[1] = hc_byte_perm (w3[2], w3[1], selector); - c0[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm ( 0, w3[3], selector); - c0[3] = hc_byte_perm (w3[3], w3[2], selector); - c0[2] = hc_byte_perm (w3[2], w3[1], selector); - c0[1] = hc_byte_perm (w3[1], w3[0], selector); - c0[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm ( 0, w3[3], selector); - c1[0] = hc_byte_perm (w3[3], w3[2], selector); - c0[3] = hc_byte_perm (w3[2], w3[1], selector); - c0[2] = hc_byte_perm (w3[1], w3[0], selector); - c0[1] = hc_byte_perm (w3[0], w2[3], selector); - c0[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm ( 0, w3[3], selector); - c1[1] = hc_byte_perm (w3[3], w3[2], selector); - c1[0] = hc_byte_perm (w3[2], w3[1], selector); - c0[3] = hc_byte_perm (w3[1], w3[0], selector); - c0[2] = hc_byte_perm (w3[0], w2[3], selector); - c0[1] = hc_byte_perm (w2[3], w2[2], selector); - c0[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm ( 0, w3[3], selector); - c1[2] = hc_byte_perm (w3[3], w3[2], selector); - c1[1] = hc_byte_perm (w3[2], w3[1], selector); - c1[0] = hc_byte_perm (w3[1], w3[0], selector); - c0[3] = hc_byte_perm (w3[0], w2[3], selector); - c0[2] = hc_byte_perm (w2[3], w2[2], selector); - c0[1] = hc_byte_perm (w2[2], w2[1], selector); - c0[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm ( 0, w3[3], selector); - c1[3] = hc_byte_perm (w3[3], w3[2], selector); - c1[2] = hc_byte_perm (w3[2], w3[1], selector); - c1[1] = hc_byte_perm (w3[1], w3[0], selector); - c1[0] = hc_byte_perm (w3[0], w2[3], selector); - c0[3] = hc_byte_perm (w2[3], w2[2], selector); - c0[2] = hc_byte_perm (w2[2], w2[1], selector); - c0[1] = hc_byte_perm (w2[1], w2[0], selector); - c0[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm ( 0, w3[3], selector); - c2[0] = hc_byte_perm (w3[3], w3[2], selector); - c1[3] = hc_byte_perm (w3[2], w3[1], selector); - c1[2] = hc_byte_perm (w3[1], w3[0], selector); - c1[1] = hc_byte_perm (w3[0], w2[3], selector); - c1[0] = hc_byte_perm (w2[3], w2[2], selector); - c0[3] = hc_byte_perm (w2[2], w2[1], selector); - c0[2] = hc_byte_perm (w2[1], w2[0], selector); - c0[1] = hc_byte_perm (w2[0], w1[3], selector); - c0[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm ( 0, w3[3], selector); - c2[1] = hc_byte_perm (w3[3], w3[2], selector); - c2[0] = hc_byte_perm (w3[2], w3[1], selector); - c1[3] = hc_byte_perm (w3[1], w3[0], selector); - c1[2] = hc_byte_perm (w3[0], w2[3], selector); - c1[1] = hc_byte_perm (w2[3], w2[2], selector); - c1[0] = hc_byte_perm (w2[2], w2[1], selector); - c0[3] = hc_byte_perm (w2[1], w2[0], selector); - c0[2] = hc_byte_perm (w2[0], w1[3], selector); - c0[1] = hc_byte_perm (w1[3], w1[2], selector); - c0[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm ( 0, w3[3], selector); - c2[2] = hc_byte_perm (w3[3], w3[2], selector); - c2[1] = hc_byte_perm (w3[2], w3[1], selector); - c2[0] = hc_byte_perm (w3[1], w3[0], selector); - c1[3] = hc_byte_perm (w3[0], w2[3], selector); - c1[2] = hc_byte_perm (w2[3], w2[2], selector); - c1[1] = hc_byte_perm (w2[2], w2[1], selector); - c1[0] = hc_byte_perm (w2[1], w2[0], selector); - c0[3] = hc_byte_perm (w2[0], w1[3], selector); - c0[2] = hc_byte_perm (w1[3], w1[2], selector); - c0[1] = hc_byte_perm (w1[2], w1[1], selector); - c0[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm ( 0, w3[3], selector); - c2[3] = hc_byte_perm (w3[3], w3[2], selector); - c2[2] = hc_byte_perm (w3[2], w3[1], selector); - c2[1] = hc_byte_perm (w3[1], w3[0], selector); - c2[0] = hc_byte_perm (w3[0], w2[3], selector); - c1[3] = hc_byte_perm (w2[3], w2[2], selector); - c1[2] = hc_byte_perm (w2[2], w2[1], selector); - c1[1] = hc_byte_perm (w2[1], w2[0], selector); - c1[0] = hc_byte_perm (w2[0], w1[3], selector); - c0[3] = hc_byte_perm (w1[3], w1[2], selector); - c0[2] = hc_byte_perm (w1[2], w1[1], selector); - c0[1] = hc_byte_perm (w1[1], w1[0], selector); - c0[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm ( 0, w3[3], selector); - c3[0] = hc_byte_perm (w3[3], w3[2], selector); - c2[3] = hc_byte_perm (w3[2], w3[1], selector); - c2[2] = hc_byte_perm (w3[1], w3[0], selector); - c2[1] = hc_byte_perm (w3[0], w2[3], selector); - c2[0] = hc_byte_perm (w2[3], w2[2], selector); - c1[3] = hc_byte_perm (w2[2], w2[1], selector); - c1[2] = hc_byte_perm (w2[1], w2[0], selector); - c1[1] = hc_byte_perm (w2[0], w1[3], selector); - c1[0] = hc_byte_perm (w1[3], w1[2], selector); - c0[3] = hc_byte_perm (w1[2], w1[1], selector); - c0[2] = hc_byte_perm (w1[1], w1[0], selector); - c0[1] = hc_byte_perm (w1[0], w0[3], selector); - c0[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm ( 0, w3[3], selector); - c3[1] = hc_byte_perm (w3[3], w3[2], selector); - c3[0] = hc_byte_perm (w3[2], w3[1], selector); - c2[3] = hc_byte_perm (w3[1], w3[0], selector); - c2[2] = hc_byte_perm (w3[0], w2[3], selector); - c2[1] = hc_byte_perm (w2[3], w2[2], selector); - c2[0] = hc_byte_perm (w2[2], w2[1], selector); - c1[3] = hc_byte_perm (w2[1], w2[0], selector); - c1[2] = hc_byte_perm (w2[0], w1[3], selector); - c1[1] = hc_byte_perm (w1[3], w1[2], selector); - c1[0] = hc_byte_perm (w1[2], w1[1], selector); - c0[3] = hc_byte_perm (w1[1], w1[0], selector); - c0[2] = hc_byte_perm (w1[0], w0[3], selector); - c0[1] = hc_byte_perm (w0[3], w0[2], selector); - c0[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm ( 0, w3[3], selector); - c3[2] = hc_byte_perm (w3[3], w3[2], selector); - c3[1] = hc_byte_perm (w3[2], w3[1], selector); - c3[0] = hc_byte_perm (w3[1], w3[0], selector); - c2[3] = hc_byte_perm (w3[0], w2[3], selector); - c2[2] = hc_byte_perm (w2[3], w2[2], selector); - c2[1] = hc_byte_perm (w2[2], w2[1], selector); - c2[0] = hc_byte_perm (w2[1], w2[0], selector); - c1[3] = hc_byte_perm (w2[0], w1[3], selector); - c1[2] = hc_byte_perm (w1[3], w1[2], selector); - c1[1] = hc_byte_perm (w1[2], w1[1], selector); - c1[0] = hc_byte_perm (w1[1], w1[0], selector); - c0[3] = hc_byte_perm (w1[0], w0[3], selector); - c0[2] = hc_byte_perm (w0[3], w0[2], selector); - c0[1] = hc_byte_perm (w0[2], w0[1], selector); - c0[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -7758,592 +6262,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_le (PRIVATE_AS u32x *w0, PRIVATE_AS u3 break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm (w7[2], w7[3], selector); - w7[2] = hc_byte_perm (w7[1], w7[2], selector); - w7[1] = hc_byte_perm (w7[0], w7[1], selector); - w7[0] = hc_byte_perm (w6[3], w7[0], selector); - w6[3] = hc_byte_perm (w6[2], w6[3], selector); - w6[2] = hc_byte_perm (w6[1], w6[2], selector); - w6[1] = hc_byte_perm (w6[0], w6[1], selector); - w6[0] = hc_byte_perm (w5[3], w6[0], selector); - w5[3] = hc_byte_perm (w5[2], w5[3], selector); - w5[2] = hc_byte_perm (w5[1], w5[2], selector); - w5[1] = hc_byte_perm (w5[0], w5[1], selector); - w5[0] = hc_byte_perm (w4[3], w5[0], selector); - w4[3] = hc_byte_perm (w4[2], w4[3], selector); - w4[2] = hc_byte_perm (w4[1], w4[2], selector); - w4[1] = hc_byte_perm (w4[0], w4[1], selector); - w4[0] = hc_byte_perm (w3[3], w4[0], selector); - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - break; - - case 1: - w7[3] = hc_byte_perm (w7[1], w7[2], selector); - w7[2] = hc_byte_perm (w7[0], w7[1], selector); - w7[1] = hc_byte_perm (w6[3], w7[0], selector); - w7[0] = hc_byte_perm (w6[2], w6[3], selector); - w6[3] = hc_byte_perm (w6[1], w6[2], selector); - w6[2] = hc_byte_perm (w6[0], w6[1], selector); - w6[1] = hc_byte_perm (w5[3], w6[0], selector); - w6[0] = hc_byte_perm (w5[2], w5[3], selector); - w5[3] = hc_byte_perm (w5[1], w5[2], selector); - w5[2] = hc_byte_perm (w5[0], w5[1], selector); - w5[1] = hc_byte_perm (w4[3], w5[0], selector); - w5[0] = hc_byte_perm (w4[2], w4[3], selector); - w4[3] = hc_byte_perm (w4[1], w4[2], selector); - w4[2] = hc_byte_perm (w4[0], w4[1], selector); - w4[1] = hc_byte_perm (w3[3], w4[0], selector); - w4[0] = hc_byte_perm (w3[2], w3[3], selector); - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - break; - - case 2: - w7[3] = hc_byte_perm (w7[0], w7[1], selector); - w7[2] = hc_byte_perm (w6[3], w7[0], selector); - w7[1] = hc_byte_perm (w6[2], w6[3], selector); - w7[0] = hc_byte_perm (w6[1], w6[2], selector); - w6[3] = hc_byte_perm (w6[0], w6[1], selector); - w6[2] = hc_byte_perm (w5[3], w6[0], selector); - w6[1] = hc_byte_perm (w5[2], w5[3], selector); - w6[0] = hc_byte_perm (w5[1], w5[2], selector); - w5[3] = hc_byte_perm (w5[0], w5[1], selector); - w5[2] = hc_byte_perm (w4[3], w5[0], selector); - w5[1] = hc_byte_perm (w4[2], w4[3], selector); - w5[0] = hc_byte_perm (w4[1], w4[2], selector); - w4[3] = hc_byte_perm (w4[0], w4[1], selector); - w4[2] = hc_byte_perm (w3[3], w4[0], selector); - w4[1] = hc_byte_perm (w3[2], w3[3], selector); - w4[0] = hc_byte_perm (w3[1], w3[2], selector); - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w7[3] = hc_byte_perm (w6[3], w7[0], selector); - w7[2] = hc_byte_perm (w6[2], w6[3], selector); - w7[1] = hc_byte_perm (w6[1], w6[2], selector); - w7[0] = hc_byte_perm (w6[0], w6[1], selector); - w6[3] = hc_byte_perm (w5[3], w6[0], selector); - w6[2] = hc_byte_perm (w5[2], w5[3], selector); - w6[1] = hc_byte_perm (w5[1], w5[2], selector); - w6[0] = hc_byte_perm (w5[0], w5[1], selector); - w5[3] = hc_byte_perm (w4[3], w5[0], selector); - w5[2] = hc_byte_perm (w4[2], w4[3], selector); - w5[1] = hc_byte_perm (w4[1], w4[2], selector); - w5[0] = hc_byte_perm (w4[0], w4[1], selector); - w4[3] = hc_byte_perm (w3[3], w4[0], selector); - w4[2] = hc_byte_perm (w3[2], w3[3], selector); - w4[1] = hc_byte_perm (w3[1], w3[2], selector); - w4[0] = hc_byte_perm (w3[0], w3[1], selector); - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w7[3] = hc_byte_perm (w6[2], w6[3], selector); - w7[2] = hc_byte_perm (w6[1], w6[2], selector); - w7[1] = hc_byte_perm (w6[0], w6[1], selector); - w7[0] = hc_byte_perm (w5[3], w6[0], selector); - w6[3] = hc_byte_perm (w5[2], w5[3], selector); - w6[2] = hc_byte_perm (w5[1], w5[2], selector); - w6[1] = hc_byte_perm (w5[0], w5[1], selector); - w6[0] = hc_byte_perm (w4[3], w5[0], selector); - w5[3] = hc_byte_perm (w4[2], w4[3], selector); - w5[2] = hc_byte_perm (w4[1], w4[2], selector); - w5[1] = hc_byte_perm (w4[0], w4[1], selector); - w5[0] = hc_byte_perm (w3[3], w4[0], selector); - w4[3] = hc_byte_perm (w3[2], w3[3], selector); - w4[2] = hc_byte_perm (w3[1], w3[2], selector); - w4[1] = hc_byte_perm (w3[0], w3[1], selector); - w4[0] = hc_byte_perm (w2[3], w3[0], selector); - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w7[3] = hc_byte_perm (w6[1], w6[2], selector); - w7[2] = hc_byte_perm (w6[0], w6[1], selector); - w7[1] = hc_byte_perm (w5[3], w6[0], selector); - w7[0] = hc_byte_perm (w5[2], w5[3], selector); - w6[3] = hc_byte_perm (w5[1], w5[2], selector); - w6[2] = hc_byte_perm (w5[0], w5[1], selector); - w6[1] = hc_byte_perm (w4[3], w5[0], selector); - w6[0] = hc_byte_perm (w4[2], w4[3], selector); - w5[3] = hc_byte_perm (w4[1], w4[2], selector); - w5[2] = hc_byte_perm (w4[0], w4[1], selector); - w5[1] = hc_byte_perm (w3[3], w4[0], selector); - w5[0] = hc_byte_perm (w3[2], w3[3], selector); - w4[3] = hc_byte_perm (w3[1], w3[2], selector); - w4[2] = hc_byte_perm (w3[0], w3[1], selector); - w4[1] = hc_byte_perm (w2[3], w3[0], selector); - w4[0] = hc_byte_perm (w2[2], w2[3], selector); - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w7[3] = hc_byte_perm (w6[0], w6[1], selector); - w7[2] = hc_byte_perm (w5[3], w6[0], selector); - w7[1] = hc_byte_perm (w5[2], w5[3], selector); - w7[0] = hc_byte_perm (w5[1], w5[2], selector); - w6[3] = hc_byte_perm (w5[0], w5[1], selector); - w6[2] = hc_byte_perm (w4[3], w5[0], selector); - w6[1] = hc_byte_perm (w4[2], w4[3], selector); - w6[0] = hc_byte_perm (w4[1], w4[2], selector); - w5[3] = hc_byte_perm (w4[0], w4[1], selector); - w5[2] = hc_byte_perm (w3[3], w4[0], selector); - w5[1] = hc_byte_perm (w3[2], w3[3], selector); - w5[0] = hc_byte_perm (w3[1], w3[2], selector); - w4[3] = hc_byte_perm (w3[0], w3[1], selector); - w4[2] = hc_byte_perm (w2[3], w3[0], selector); - w4[1] = hc_byte_perm (w2[2], w2[3], selector); - w4[0] = hc_byte_perm (w2[1], w2[2], selector); - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w7[3] = hc_byte_perm (w5[3], w6[0], selector); - w7[2] = hc_byte_perm (w5[2], w5[3], selector); - w7[1] = hc_byte_perm (w5[1], w5[2], selector); - w7[0] = hc_byte_perm (w5[0], w5[1], selector); - w6[3] = hc_byte_perm (w4[3], w5[0], selector); - w6[2] = hc_byte_perm (w4[2], w4[3], selector); - w6[1] = hc_byte_perm (w4[1], w4[2], selector); - w6[0] = hc_byte_perm (w4[0], w4[1], selector); - w5[3] = hc_byte_perm (w3[3], w4[0], selector); - w5[2] = hc_byte_perm (w3[2], w3[3], selector); - w5[1] = hc_byte_perm (w3[1], w3[2], selector); - w5[0] = hc_byte_perm (w3[0], w3[1], selector); - w4[3] = hc_byte_perm (w2[3], w3[0], selector); - w4[2] = hc_byte_perm (w2[2], w2[3], selector); - w4[1] = hc_byte_perm (w2[1], w2[2], selector); - w4[0] = hc_byte_perm (w2[0], w2[1], selector); - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w7[3] = hc_byte_perm (w5[2], w5[3], selector); - w7[2] = hc_byte_perm (w5[1], w5[2], selector); - w7[1] = hc_byte_perm (w5[0], w5[1], selector); - w7[0] = hc_byte_perm (w4[3], w5[0], selector); - w6[3] = hc_byte_perm (w4[2], w4[3], selector); - w6[2] = hc_byte_perm (w4[1], w4[2], selector); - w6[1] = hc_byte_perm (w4[0], w4[1], selector); - w6[0] = hc_byte_perm (w3[3], w4[0], selector); - w5[3] = hc_byte_perm (w3[2], w3[3], selector); - w5[2] = hc_byte_perm (w3[1], w3[2], selector); - w5[1] = hc_byte_perm (w3[0], w3[1], selector); - w5[0] = hc_byte_perm (w2[3], w3[0], selector); - w4[3] = hc_byte_perm (w2[2], w2[3], selector); - w4[2] = hc_byte_perm (w2[1], w2[2], selector); - w4[1] = hc_byte_perm (w2[0], w2[1], selector); - w4[0] = hc_byte_perm (w1[3], w2[0], selector); - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w7[3] = hc_byte_perm (w5[1], w5[2], selector); - w7[2] = hc_byte_perm (w5[0], w5[1], selector); - w7[1] = hc_byte_perm (w4[3], w5[0], selector); - w7[0] = hc_byte_perm (w4[2], w4[3], selector); - w6[3] = hc_byte_perm (w4[1], w4[2], selector); - w6[2] = hc_byte_perm (w4[0], w4[1], selector); - w6[1] = hc_byte_perm (w3[3], w4[0], selector); - w6[0] = hc_byte_perm (w3[2], w3[3], selector); - w5[3] = hc_byte_perm (w3[1], w3[2], selector); - w5[2] = hc_byte_perm (w3[0], w3[1], selector); - w5[1] = hc_byte_perm (w2[3], w3[0], selector); - w5[0] = hc_byte_perm (w2[2], w2[3], selector); - w4[3] = hc_byte_perm (w2[1], w2[2], selector); - w4[2] = hc_byte_perm (w2[0], w2[1], selector); - w4[1] = hc_byte_perm (w1[3], w2[0], selector); - w4[0] = hc_byte_perm (w1[2], w1[3], selector); - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w7[3] = hc_byte_perm (w5[0], w5[1], selector); - w7[2] = hc_byte_perm (w4[3], w5[0], selector); - w7[1] = hc_byte_perm (w4[2], w4[3], selector); - w7[0] = hc_byte_perm (w4[1], w4[2], selector); - w6[3] = hc_byte_perm (w4[0], w4[1], selector); - w6[2] = hc_byte_perm (w3[3], w4[0], selector); - w6[1] = hc_byte_perm (w3[2], w3[3], selector); - w6[0] = hc_byte_perm (w3[1], w3[2], selector); - w5[3] = hc_byte_perm (w3[0], w3[1], selector); - w5[2] = hc_byte_perm (w2[3], w3[0], selector); - w5[1] = hc_byte_perm (w2[2], w2[3], selector); - w5[0] = hc_byte_perm (w2[1], w2[2], selector); - w4[3] = hc_byte_perm (w2[0], w2[1], selector); - w4[2] = hc_byte_perm (w1[3], w2[0], selector); - w4[1] = hc_byte_perm (w1[2], w1[3], selector); - w4[0] = hc_byte_perm (w1[1], w1[2], selector); - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w7[3] = hc_byte_perm (w4[3], w5[0], selector); - w7[2] = hc_byte_perm (w4[2], w4[3], selector); - w7[1] = hc_byte_perm (w4[1], w4[2], selector); - w7[0] = hc_byte_perm (w4[0], w4[1], selector); - w6[3] = hc_byte_perm (w3[3], w4[0], selector); - w6[2] = hc_byte_perm (w3[2], w3[3], selector); - w6[1] = hc_byte_perm (w3[1], w3[2], selector); - w6[0] = hc_byte_perm (w3[0], w3[1], selector); - w5[3] = hc_byte_perm (w2[3], w3[0], selector); - w5[2] = hc_byte_perm (w2[2], w2[3], selector); - w5[1] = hc_byte_perm (w2[1], w2[2], selector); - w5[0] = hc_byte_perm (w2[0], w2[1], selector); - w4[3] = hc_byte_perm (w1[3], w2[0], selector); - w4[2] = hc_byte_perm (w1[2], w1[3], selector); - w4[1] = hc_byte_perm (w1[1], w1[2], selector); - w4[0] = hc_byte_perm (w1[0], w1[1], selector); - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w7[3] = hc_byte_perm (w4[2], w4[3], selector); - w7[2] = hc_byte_perm (w4[1], w4[2], selector); - w7[1] = hc_byte_perm (w4[0], w4[1], selector); - w7[0] = hc_byte_perm (w3[3], w4[0], selector); - w6[3] = hc_byte_perm (w3[2], w3[3], selector); - w6[2] = hc_byte_perm (w3[1], w3[2], selector); - w6[1] = hc_byte_perm (w3[0], w3[1], selector); - w6[0] = hc_byte_perm (w2[3], w3[0], selector); - w5[3] = hc_byte_perm (w2[2], w2[3], selector); - w5[2] = hc_byte_perm (w2[1], w2[2], selector); - w5[1] = hc_byte_perm (w2[0], w2[1], selector); - w5[0] = hc_byte_perm (w1[3], w2[0], selector); - w4[3] = hc_byte_perm (w1[2], w1[3], selector); - w4[2] = hc_byte_perm (w1[1], w1[2], selector); - w4[1] = hc_byte_perm (w1[0], w1[1], selector); - w4[0] = hc_byte_perm (w0[3], w1[0], selector); - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w7[3] = hc_byte_perm (w4[1], w4[2], selector); - w7[2] = hc_byte_perm (w4[0], w4[1], selector); - w7[1] = hc_byte_perm (w3[3], w4[0], selector); - w7[0] = hc_byte_perm (w3[2], w3[3], selector); - w6[3] = hc_byte_perm (w3[1], w3[2], selector); - w6[2] = hc_byte_perm (w3[0], w3[1], selector); - w6[1] = hc_byte_perm (w2[3], w3[0], selector); - w6[0] = hc_byte_perm (w2[2], w2[3], selector); - w5[3] = hc_byte_perm (w2[1], w2[2], selector); - w5[2] = hc_byte_perm (w2[0], w2[1], selector); - w5[1] = hc_byte_perm (w1[3], w2[0], selector); - w5[0] = hc_byte_perm (w1[2], w1[3], selector); - w4[3] = hc_byte_perm (w1[1], w1[2], selector); - w4[2] = hc_byte_perm (w1[0], w1[1], selector); - w4[1] = hc_byte_perm (w0[3], w1[0], selector); - w4[0] = hc_byte_perm (w0[2], w0[3], selector); - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 14: - w7[3] = hc_byte_perm (w4[0], w4[1], selector); - w7[2] = hc_byte_perm (w3[3], w4[0], selector); - w7[1] = hc_byte_perm (w3[2], w3[3], selector); - w7[0] = hc_byte_perm (w3[1], w3[2], selector); - w6[3] = hc_byte_perm (w3[0], w3[1], selector); - w6[2] = hc_byte_perm (w2[3], w3[0], selector); - w6[1] = hc_byte_perm (w2[2], w2[3], selector); - w6[0] = hc_byte_perm (w2[1], w2[2], selector); - w5[3] = hc_byte_perm (w2[0], w2[1], selector); - w5[2] = hc_byte_perm (w1[3], w2[0], selector); - w5[1] = hc_byte_perm (w1[2], w1[3], selector); - w5[0] = hc_byte_perm (w1[1], w1[2], selector); - w4[3] = hc_byte_perm (w1[0], w1[1], selector); - w4[2] = hc_byte_perm (w0[3], w1[0], selector); - w4[1] = hc_byte_perm (w0[2], w0[3], selector); - w4[0] = hc_byte_perm (w0[1], w0[2], selector); - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 15: - w7[3] = hc_byte_perm (w3[3], w4[0], selector); - w7[2] = hc_byte_perm (w3[2], w3[3], selector); - w7[1] = hc_byte_perm (w3[1], w3[2], selector); - w7[0] = hc_byte_perm (w3[0], w3[1], selector); - w6[3] = hc_byte_perm (w2[3], w3[0], selector); - w6[2] = hc_byte_perm (w2[2], w2[3], selector); - w6[1] = hc_byte_perm (w2[1], w2[2], selector); - w6[0] = hc_byte_perm (w2[0], w2[1], selector); - w5[3] = hc_byte_perm (w1[3], w2[0], selector); - w5[2] = hc_byte_perm (w1[2], w1[3], selector); - w5[1] = hc_byte_perm (w1[1], w1[2], selector); - w5[0] = hc_byte_perm (w1[0], w1[1], selector); - w4[3] = hc_byte_perm (w0[3], w1[0], selector); - w4[2] = hc_byte_perm (w0[2], w0[3], selector); - w4[1] = hc_byte_perm (w0[1], w0[2], selector); - w4[0] = hc_byte_perm (w0[0], w0[1], selector); - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -10026,1712 +7950,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le (PRIVATE_AS u32x *w0, PRIVATE break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm (w7[3], 0, selector); - w7[3] = hc_byte_perm (w7[2], w7[3], selector); - w7[2] = hc_byte_perm (w7[1], w7[2], selector); - w7[1] = hc_byte_perm (w7[0], w7[1], selector); - w7[0] = hc_byte_perm (w6[3], w7[0], selector); - w6[3] = hc_byte_perm (w6[2], w6[3], selector); - w6[2] = hc_byte_perm (w6[1], w6[2], selector); - w6[1] = hc_byte_perm (w6[0], w6[1], selector); - w6[0] = hc_byte_perm (w5[3], w6[0], selector); - w5[3] = hc_byte_perm (w5[2], w5[3], selector); - w5[2] = hc_byte_perm (w5[1], w5[2], selector); - w5[1] = hc_byte_perm (w5[0], w5[1], selector); - w5[0] = hc_byte_perm (w4[3], w5[0], selector); - w4[3] = hc_byte_perm (w4[2], w4[3], selector); - w4[2] = hc_byte_perm (w4[1], w4[2], selector); - w4[1] = hc_byte_perm (w4[0], w4[1], selector); - w4[0] = hc_byte_perm (w3[3], w4[0], selector); - w3[3] = hc_byte_perm (w3[2], w3[3], selector); - w3[2] = hc_byte_perm (w3[1], w3[2], selector); - w3[1] = hc_byte_perm (w3[0], w3[1], selector); - w3[0] = hc_byte_perm (w2[3], w3[0], selector); - w2[3] = hc_byte_perm (w2[2], w2[3], selector); - w2[2] = hc_byte_perm (w2[1], w2[2], selector); - w2[1] = hc_byte_perm (w2[0], w2[1], selector); - w2[0] = hc_byte_perm (w1[3], w2[0], selector); - w1[3] = hc_byte_perm (w1[2], w1[3], selector); - w1[2] = hc_byte_perm (w1[1], w1[2], selector); - w1[1] = hc_byte_perm (w1[0], w1[1], selector); - w1[0] = hc_byte_perm (w0[3], w1[0], selector); - w0[3] = hc_byte_perm (w0[2], w0[3], selector); - w0[2] = hc_byte_perm (w0[1], w0[2], selector); - w0[1] = hc_byte_perm (w0[0], w0[1], selector); - w0[0] = hc_byte_perm ( 0, w0[0], selector); - - break; - - case 1: - c0[1] = hc_byte_perm (w7[3], 0, selector); - c0[0] = hc_byte_perm (w7[2], w7[3], selector); - w7[3] = hc_byte_perm (w7[1], w7[2], selector); - w7[2] = hc_byte_perm (w7[0], w7[1], selector); - w7[1] = hc_byte_perm (w6[3], w7[0], selector); - w7[0] = hc_byte_perm (w6[2], w6[3], selector); - w6[3] = hc_byte_perm (w6[1], w6[2], selector); - w6[2] = hc_byte_perm (w6[0], w6[1], selector); - w6[1] = hc_byte_perm (w5[3], w6[0], selector); - w6[0] = hc_byte_perm (w5[2], w5[3], selector); - w5[3] = hc_byte_perm (w5[1], w5[2], selector); - w5[2] = hc_byte_perm (w5[0], w5[1], selector); - w5[1] = hc_byte_perm (w4[3], w5[0], selector); - w5[0] = hc_byte_perm (w4[2], w4[3], selector); - w4[3] = hc_byte_perm (w4[1], w4[2], selector); - w4[2] = hc_byte_perm (w4[0], w4[1], selector); - w4[1] = hc_byte_perm (w3[3], w4[0], selector); - w4[0] = hc_byte_perm (w3[2], w3[3], selector); - w3[3] = hc_byte_perm (w3[1], w3[2], selector); - w3[2] = hc_byte_perm (w3[0], w3[1], selector); - w3[1] = hc_byte_perm (w2[3], w3[0], selector); - w3[0] = hc_byte_perm (w2[2], w2[3], selector); - w2[3] = hc_byte_perm (w2[1], w2[2], selector); - w2[2] = hc_byte_perm (w2[0], w2[1], selector); - w2[1] = hc_byte_perm (w1[3], w2[0], selector); - w2[0] = hc_byte_perm (w1[2], w1[3], selector); - w1[3] = hc_byte_perm (w1[1], w1[2], selector); - w1[2] = hc_byte_perm (w1[0], w1[1], selector); - w1[1] = hc_byte_perm (w0[3], w1[0], selector); - w1[0] = hc_byte_perm (w0[2], w0[3], selector); - w0[3] = hc_byte_perm (w0[1], w0[2], selector); - w0[2] = hc_byte_perm (w0[0], w0[1], selector); - w0[1] = hc_byte_perm ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm (w7[3], 0, selector); - c0[1] = hc_byte_perm (w7[2], w7[3], selector); - c0[0] = hc_byte_perm (w7[1], w7[2], selector); - w7[3] = hc_byte_perm (w7[0], w7[1], selector); - w7[2] = hc_byte_perm (w6[3], w7[0], selector); - w7[1] = hc_byte_perm (w6[2], w6[3], selector); - w7[0] = hc_byte_perm (w6[1], w6[2], selector); - w6[3] = hc_byte_perm (w6[0], w6[1], selector); - w6[2] = hc_byte_perm (w5[3], w6[0], selector); - w6[1] = hc_byte_perm (w5[2], w5[3], selector); - w6[0] = hc_byte_perm (w5[1], w5[2], selector); - w5[3] = hc_byte_perm (w5[0], w5[1], selector); - w5[2] = hc_byte_perm (w4[3], w5[0], selector); - w5[1] = hc_byte_perm (w4[2], w4[3], selector); - w5[0] = hc_byte_perm (w4[1], w4[2], selector); - w4[3] = hc_byte_perm (w4[0], w4[1], selector); - w4[2] = hc_byte_perm (w3[3], w4[0], selector); - w4[1] = hc_byte_perm (w3[2], w3[3], selector); - w4[0] = hc_byte_perm (w3[1], w3[2], selector); - w3[3] = hc_byte_perm (w3[0], w3[1], selector); - w3[2] = hc_byte_perm (w2[3], w3[0], selector); - w3[1] = hc_byte_perm (w2[2], w2[3], selector); - w3[0] = hc_byte_perm (w2[1], w2[2], selector); - w2[3] = hc_byte_perm (w2[0], w2[1], selector); - w2[2] = hc_byte_perm (w1[3], w2[0], selector); - w2[1] = hc_byte_perm (w1[2], w1[3], selector); - w2[0] = hc_byte_perm (w1[1], w1[2], selector); - w1[3] = hc_byte_perm (w1[0], w1[1], selector); - w1[2] = hc_byte_perm (w0[3], w1[0], selector); - w1[1] = hc_byte_perm (w0[2], w0[3], selector); - w1[0] = hc_byte_perm (w0[1], w0[2], selector); - w0[3] = hc_byte_perm (w0[0], w0[1], selector); - w0[2] = hc_byte_perm ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm (w7[3], 0, selector); - c0[2] = hc_byte_perm (w7[2], w7[3], selector); - c0[1] = hc_byte_perm (w7[1], w7[2], selector); - c0[0] = hc_byte_perm (w7[0], w7[1], selector); - w7[3] = hc_byte_perm (w6[3], w7[0], selector); - w7[2] = hc_byte_perm (w6[2], w6[3], selector); - w7[1] = hc_byte_perm (w6[1], w6[2], selector); - w7[0] = hc_byte_perm (w6[0], w6[1], selector); - w6[3] = hc_byte_perm (w5[3], w6[0], selector); - w6[2] = hc_byte_perm (w5[2], w5[3], selector); - w6[1] = hc_byte_perm (w5[1], w5[2], selector); - w6[0] = hc_byte_perm (w5[0], w5[1], selector); - w5[3] = hc_byte_perm (w4[3], w5[0], selector); - w5[2] = hc_byte_perm (w4[2], w4[3], selector); - w5[1] = hc_byte_perm (w4[1], w4[2], selector); - w5[0] = hc_byte_perm (w4[0], w4[1], selector); - w4[3] = hc_byte_perm (w3[3], w4[0], selector); - w4[2] = hc_byte_perm (w3[2], w3[3], selector); - w4[1] = hc_byte_perm (w3[1], w3[2], selector); - w4[0] = hc_byte_perm (w3[0], w3[1], selector); - w3[3] = hc_byte_perm (w2[3], w3[0], selector); - w3[2] = hc_byte_perm (w2[2], w2[3], selector); - w3[1] = hc_byte_perm (w2[1], w2[2], selector); - w3[0] = hc_byte_perm (w2[0], w2[1], selector); - w2[3] = hc_byte_perm (w1[3], w2[0], selector); - w2[2] = hc_byte_perm (w1[2], w1[3], selector); - w2[1] = hc_byte_perm (w1[1], w1[2], selector); - w2[0] = hc_byte_perm (w1[0], w1[1], selector); - w1[3] = hc_byte_perm (w0[3], w1[0], selector); - w1[2] = hc_byte_perm (w0[2], w0[3], selector); - w1[1] = hc_byte_perm (w0[1], w0[2], selector); - w1[0] = hc_byte_perm (w0[0], w0[1], selector); - w0[3] = hc_byte_perm ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm (w7[3], 0, selector); - c0[3] = hc_byte_perm (w7[2], w7[3], selector); - c0[2] = hc_byte_perm (w7[1], w7[2], selector); - c0[1] = hc_byte_perm (w7[0], w7[1], selector); - c0[0] = hc_byte_perm (w6[3], w7[0], selector); - w7[3] = hc_byte_perm (w6[2], w6[3], selector); - w7[2] = hc_byte_perm (w6[1], w6[2], selector); - w7[1] = hc_byte_perm (w6[0], w6[1], selector); - w7[0] = hc_byte_perm (w5[3], w6[0], selector); - w6[3] = hc_byte_perm (w5[2], w5[3], selector); - w6[2] = hc_byte_perm (w5[1], w5[2], selector); - w6[1] = hc_byte_perm (w5[0], w5[1], selector); - w6[0] = hc_byte_perm (w4[3], w5[0], selector); - w5[3] = hc_byte_perm (w4[2], w4[3], selector); - w5[2] = hc_byte_perm (w4[1], w4[2], selector); - w5[1] = hc_byte_perm (w4[0], w4[1], selector); - w5[0] = hc_byte_perm (w3[3], w4[0], selector); - w4[3] = hc_byte_perm (w3[2], w3[3], selector); - w4[2] = hc_byte_perm (w3[1], w3[2], selector); - w4[1] = hc_byte_perm (w3[0], w3[1], selector); - w4[0] = hc_byte_perm (w2[3], w3[0], selector); - w3[3] = hc_byte_perm (w2[2], w2[3], selector); - w3[2] = hc_byte_perm (w2[1], w2[2], selector); - w3[1] = hc_byte_perm (w2[0], w2[1], selector); - w3[0] = hc_byte_perm (w1[3], w2[0], selector); - w2[3] = hc_byte_perm (w1[2], w1[3], selector); - w2[2] = hc_byte_perm (w1[1], w1[2], selector); - w2[1] = hc_byte_perm (w1[0], w1[1], selector); - w2[0] = hc_byte_perm (w0[3], w1[0], selector); - w1[3] = hc_byte_perm (w0[2], w0[3], selector); - w1[2] = hc_byte_perm (w0[1], w0[2], selector); - w1[1] = hc_byte_perm (w0[0], w0[1], selector); - w1[0] = hc_byte_perm ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm (w7[3], 0, selector); - c1[0] = hc_byte_perm (w7[2], w7[3], selector); - c0[3] = hc_byte_perm (w7[1], w7[2], selector); - c0[2] = hc_byte_perm (w7[0], w7[1], selector); - c0[1] = hc_byte_perm (w6[3], w7[0], selector); - c0[0] = hc_byte_perm (w6[2], w6[3], selector); - w7[3] = hc_byte_perm (w6[1], w6[2], selector); - w7[2] = hc_byte_perm (w6[0], w6[1], selector); - w7[1] = hc_byte_perm (w5[3], w6[0], selector); - w7[0] = hc_byte_perm (w5[2], w5[3], selector); - w6[3] = hc_byte_perm (w5[1], w5[2], selector); - w6[2] = hc_byte_perm (w5[0], w5[1], selector); - w6[1] = hc_byte_perm (w4[3], w5[0], selector); - w6[0] = hc_byte_perm (w4[2], w4[3], selector); - w5[3] = hc_byte_perm (w4[1], w4[2], selector); - w5[2] = hc_byte_perm (w4[0], w4[1], selector); - w5[1] = hc_byte_perm (w3[3], w4[0], selector); - w5[0] = hc_byte_perm (w3[2], w3[3], selector); - w4[3] = hc_byte_perm (w3[1], w3[2], selector); - w4[2] = hc_byte_perm (w3[0], w3[1], selector); - w4[1] = hc_byte_perm (w2[3], w3[0], selector); - w4[0] = hc_byte_perm (w2[2], w2[3], selector); - w3[3] = hc_byte_perm (w2[1], w2[2], selector); - w3[2] = hc_byte_perm (w2[0], w2[1], selector); - w3[1] = hc_byte_perm (w1[3], w2[0], selector); - w3[0] = hc_byte_perm (w1[2], w1[3], selector); - w2[3] = hc_byte_perm (w1[1], w1[2], selector); - w2[2] = hc_byte_perm (w1[0], w1[1], selector); - w2[1] = hc_byte_perm (w0[3], w1[0], selector); - w2[0] = hc_byte_perm (w0[2], w0[3], selector); - w1[3] = hc_byte_perm (w0[1], w0[2], selector); - w1[2] = hc_byte_perm (w0[0], w0[1], selector); - w1[1] = hc_byte_perm ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm (w7[3], 0, selector); - c1[1] = hc_byte_perm (w7[2], w7[3], selector); - c1[0] = hc_byte_perm (w7[1], w7[2], selector); - c0[3] = hc_byte_perm (w7[0], w7[1], selector); - c0[2] = hc_byte_perm (w6[3], w7[0], selector); - c0[1] = hc_byte_perm (w6[2], w6[3], selector); - c0[0] = hc_byte_perm (w6[1], w6[2], selector); - w7[3] = hc_byte_perm (w6[0], w6[1], selector); - w7[2] = hc_byte_perm (w5[3], w6[0], selector); - w7[1] = hc_byte_perm (w5[2], w5[3], selector); - w7[0] = hc_byte_perm (w5[1], w5[2], selector); - w6[3] = hc_byte_perm (w5[0], w5[1], selector); - w6[2] = hc_byte_perm (w4[3], w5[0], selector); - w6[1] = hc_byte_perm (w4[2], w4[3], selector); - w6[0] = hc_byte_perm (w4[1], w4[2], selector); - w5[3] = hc_byte_perm (w4[0], w4[1], selector); - w5[2] = hc_byte_perm (w3[3], w4[0], selector); - w5[1] = hc_byte_perm (w3[2], w3[3], selector); - w5[0] = hc_byte_perm (w3[1], w3[2], selector); - w4[3] = hc_byte_perm (w3[0], w3[1], selector); - w4[2] = hc_byte_perm (w2[3], w3[0], selector); - w4[1] = hc_byte_perm (w2[2], w2[3], selector); - w4[0] = hc_byte_perm (w2[1], w2[2], selector); - w3[3] = hc_byte_perm (w2[0], w2[1], selector); - w3[2] = hc_byte_perm (w1[3], w2[0], selector); - w3[1] = hc_byte_perm (w1[2], w1[3], selector); - w3[0] = hc_byte_perm (w1[1], w1[2], selector); - w2[3] = hc_byte_perm (w1[0], w1[1], selector); - w2[2] = hc_byte_perm (w0[3], w1[0], selector); - w2[1] = hc_byte_perm (w0[2], w0[3], selector); - w2[0] = hc_byte_perm (w0[1], w0[2], selector); - w1[3] = hc_byte_perm (w0[0], w0[1], selector); - w1[2] = hc_byte_perm ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm (w7[3], 0, selector); - c1[2] = hc_byte_perm (w7[2], w7[3], selector); - c1[1] = hc_byte_perm (w7[1], w7[2], selector); - c1[0] = hc_byte_perm (w7[0], w7[1], selector); - c0[3] = hc_byte_perm (w6[3], w7[0], selector); - c0[2] = hc_byte_perm (w6[2], w6[3], selector); - c0[1] = hc_byte_perm (w6[1], w6[2], selector); - c0[0] = hc_byte_perm (w6[0], w6[1], selector); - w7[3] = hc_byte_perm (w5[3], w6[0], selector); - w7[2] = hc_byte_perm (w5[2], w5[3], selector); - w7[1] = hc_byte_perm (w5[1], w5[2], selector); - w7[0] = hc_byte_perm (w5[0], w5[1], selector); - w6[3] = hc_byte_perm (w4[3], w5[0], selector); - w6[2] = hc_byte_perm (w4[2], w4[3], selector); - w6[1] = hc_byte_perm (w4[1], w4[2], selector); - w6[0] = hc_byte_perm (w4[0], w4[1], selector); - w5[3] = hc_byte_perm (w3[3], w4[0], selector); - w5[2] = hc_byte_perm (w3[2], w3[3], selector); - w5[1] = hc_byte_perm (w3[1], w3[2], selector); - w5[0] = hc_byte_perm (w3[0], w3[1], selector); - w4[3] = hc_byte_perm (w2[3], w3[0], selector); - w4[2] = hc_byte_perm (w2[2], w2[3], selector); - w4[1] = hc_byte_perm (w2[1], w2[2], selector); - w4[0] = hc_byte_perm (w2[0], w2[1], selector); - w3[3] = hc_byte_perm (w1[3], w2[0], selector); - w3[2] = hc_byte_perm (w1[2], w1[3], selector); - w3[1] = hc_byte_perm (w1[1], w1[2], selector); - w3[0] = hc_byte_perm (w1[0], w1[1], selector); - w2[3] = hc_byte_perm (w0[3], w1[0], selector); - w2[2] = hc_byte_perm (w0[2], w0[3], selector); - w2[1] = hc_byte_perm (w0[1], w0[2], selector); - w2[0] = hc_byte_perm (w0[0], w0[1], selector); - w1[3] = hc_byte_perm ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm (w7[3], 0, selector); - c1[3] = hc_byte_perm (w7[2], w7[3], selector); - c1[2] = hc_byte_perm (w7[1], w7[2], selector); - c1[1] = hc_byte_perm (w7[0], w7[1], selector); - c1[0] = hc_byte_perm (w6[3], w7[0], selector); - c0[3] = hc_byte_perm (w6[2], w6[3], selector); - c0[2] = hc_byte_perm (w6[1], w6[2], selector); - c0[1] = hc_byte_perm (w6[0], w6[1], selector); - c0[0] = hc_byte_perm (w5[3], w6[0], selector); - w7[3] = hc_byte_perm (w5[2], w5[3], selector); - w7[2] = hc_byte_perm (w5[1], w5[2], selector); - w7[1] = hc_byte_perm (w5[0], w5[1], selector); - w7[0] = hc_byte_perm (w4[3], w5[0], selector); - w6[3] = hc_byte_perm (w4[2], w4[3], selector); - w6[2] = hc_byte_perm (w4[1], w4[2], selector); - w6[1] = hc_byte_perm (w4[0], w4[1], selector); - w6[0] = hc_byte_perm (w3[3], w4[0], selector); - w5[3] = hc_byte_perm (w3[2], w3[3], selector); - w5[2] = hc_byte_perm (w3[1], w3[2], selector); - w5[1] = hc_byte_perm (w3[0], w3[1], selector); - w5[0] = hc_byte_perm (w2[3], w3[0], selector); - w4[3] = hc_byte_perm (w2[2], w2[3], selector); - w4[2] = hc_byte_perm (w2[1], w2[2], selector); - w4[1] = hc_byte_perm (w2[0], w2[1], selector); - w4[0] = hc_byte_perm (w1[3], w2[0], selector); - w3[3] = hc_byte_perm (w1[2], w1[3], selector); - w3[2] = hc_byte_perm (w1[1], w1[2], selector); - w3[1] = hc_byte_perm (w1[0], w1[1], selector); - w3[0] = hc_byte_perm (w0[3], w1[0], selector); - w2[3] = hc_byte_perm (w0[2], w0[3], selector); - w2[2] = hc_byte_perm (w0[1], w0[2], selector); - w2[1] = hc_byte_perm (w0[0], w0[1], selector); - w2[0] = hc_byte_perm ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm (w7[3], 0, selector); - c2[0] = hc_byte_perm (w7[2], w7[3], selector); - c1[3] = hc_byte_perm (w7[1], w7[2], selector); - c1[2] = hc_byte_perm (w7[0], w7[1], selector); - c1[1] = hc_byte_perm (w6[3], w7[0], selector); - c1[0] = hc_byte_perm (w6[2], w6[3], selector); - c0[3] = hc_byte_perm (w6[1], w6[2], selector); - c0[2] = hc_byte_perm (w6[0], w6[1], selector); - c0[1] = hc_byte_perm (w5[3], w6[0], selector); - c0[0] = hc_byte_perm (w5[2], w5[3], selector); - w7[3] = hc_byte_perm (w5[1], w5[2], selector); - w7[2] = hc_byte_perm (w5[0], w5[1], selector); - w7[1] = hc_byte_perm (w4[3], w5[0], selector); - w7[0] = hc_byte_perm (w4[2], w4[3], selector); - w6[3] = hc_byte_perm (w4[1], w4[2], selector); - w6[2] = hc_byte_perm (w4[0], w4[1], selector); - w6[1] = hc_byte_perm (w3[3], w4[0], selector); - w6[0] = hc_byte_perm (w3[2], w3[3], selector); - w5[3] = hc_byte_perm (w3[1], w3[2], selector); - w5[2] = hc_byte_perm (w3[0], w3[1], selector); - w5[1] = hc_byte_perm (w2[3], w3[0], selector); - w5[0] = hc_byte_perm (w2[2], w2[3], selector); - w4[3] = hc_byte_perm (w2[1], w2[2], selector); - w4[2] = hc_byte_perm (w2[0], w2[1], selector); - w4[1] = hc_byte_perm (w1[3], w2[0], selector); - w4[0] = hc_byte_perm (w1[2], w1[3], selector); - w3[3] = hc_byte_perm (w1[1], w1[2], selector); - w3[2] = hc_byte_perm (w1[0], w1[1], selector); - w3[1] = hc_byte_perm (w0[3], w1[0], selector); - w3[0] = hc_byte_perm (w0[2], w0[3], selector); - w2[3] = hc_byte_perm (w0[1], w0[2], selector); - w2[2] = hc_byte_perm (w0[0], w0[1], selector); - w2[1] = hc_byte_perm ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm (w7[3], 0, selector); - c2[1] = hc_byte_perm (w7[2], w7[3], selector); - c2[0] = hc_byte_perm (w7[1], w7[2], selector); - c1[3] = hc_byte_perm (w7[0], w7[1], selector); - c1[2] = hc_byte_perm (w6[3], w7[0], selector); - c1[1] = hc_byte_perm (w6[2], w6[3], selector); - c1[0] = hc_byte_perm (w6[1], w6[2], selector); - c0[3] = hc_byte_perm (w6[0], w6[1], selector); - c0[2] = hc_byte_perm (w5[3], w6[0], selector); - c0[1] = hc_byte_perm (w5[2], w5[3], selector); - c0[0] = hc_byte_perm (w5[1], w5[2], selector); - w7[3] = hc_byte_perm (w5[0], w5[1], selector); - w7[2] = hc_byte_perm (w4[3], w5[0], selector); - w7[1] = hc_byte_perm (w4[2], w4[3], selector); - w7[0] = hc_byte_perm (w4[1], w4[2], selector); - w6[3] = hc_byte_perm (w4[0], w4[1], selector); - w6[2] = hc_byte_perm (w3[3], w4[0], selector); - w6[1] = hc_byte_perm (w3[2], w3[3], selector); - w6[0] = hc_byte_perm (w3[1], w3[2], selector); - w5[3] = hc_byte_perm (w3[0], w3[1], selector); - w5[2] = hc_byte_perm (w2[3], w3[0], selector); - w5[1] = hc_byte_perm (w2[2], w2[3], selector); - w5[0] = hc_byte_perm (w2[1], w2[2], selector); - w4[3] = hc_byte_perm (w2[0], w2[1], selector); - w4[2] = hc_byte_perm (w1[3], w2[0], selector); - w4[1] = hc_byte_perm (w1[2], w1[3], selector); - w4[0] = hc_byte_perm (w1[1], w1[2], selector); - w3[3] = hc_byte_perm (w1[0], w1[1], selector); - w3[2] = hc_byte_perm (w0[3], w1[0], selector); - w3[1] = hc_byte_perm (w0[2], w0[3], selector); - w3[0] = hc_byte_perm (w0[1], w0[2], selector); - w2[3] = hc_byte_perm (w0[0], w0[1], selector); - w2[2] = hc_byte_perm ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm (w7[3], 0, selector); - c2[2] = hc_byte_perm (w7[2], w7[3], selector); - c2[1] = hc_byte_perm (w7[1], w7[2], selector); - c2[0] = hc_byte_perm (w7[0], w7[1], selector); - c1[3] = hc_byte_perm (w6[3], w7[0], selector); - c1[2] = hc_byte_perm (w6[2], w6[3], selector); - c1[1] = hc_byte_perm (w6[1], w6[2], selector); - c1[0] = hc_byte_perm (w6[0], w6[1], selector); - c0[3] = hc_byte_perm (w5[3], w6[0], selector); - c0[2] = hc_byte_perm (w5[2], w5[3], selector); - c0[1] = hc_byte_perm (w5[1], w5[2], selector); - c0[0] = hc_byte_perm (w5[0], w5[1], selector); - w7[3] = hc_byte_perm (w4[3], w5[0], selector); - w7[2] = hc_byte_perm (w4[2], w4[3], selector); - w7[1] = hc_byte_perm (w4[1], w4[2], selector); - w7[0] = hc_byte_perm (w4[0], w4[1], selector); - w6[3] = hc_byte_perm (w3[3], w4[0], selector); - w6[2] = hc_byte_perm (w3[2], w3[3], selector); - w6[1] = hc_byte_perm (w3[1], w3[2], selector); - w6[0] = hc_byte_perm (w3[0], w3[1], selector); - w5[3] = hc_byte_perm (w2[3], w3[0], selector); - w5[2] = hc_byte_perm (w2[2], w2[3], selector); - w5[1] = hc_byte_perm (w2[1], w2[2], selector); - w5[0] = hc_byte_perm (w2[0], w2[1], selector); - w4[3] = hc_byte_perm (w1[3], w2[0], selector); - w4[2] = hc_byte_perm (w1[2], w1[3], selector); - w4[1] = hc_byte_perm (w1[1], w1[2], selector); - w4[0] = hc_byte_perm (w1[0], w1[1], selector); - w3[3] = hc_byte_perm (w0[3], w1[0], selector); - w3[2] = hc_byte_perm (w0[2], w0[3], selector); - w3[1] = hc_byte_perm (w0[1], w0[2], selector); - w3[0] = hc_byte_perm (w0[0], w0[1], selector); - w2[3] = hc_byte_perm ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm (w7[3], 0, selector); - c2[3] = hc_byte_perm (w7[2], w7[3], selector); - c2[2] = hc_byte_perm (w7[1], w7[2], selector); - c2[1] = hc_byte_perm (w7[0], w7[1], selector); - c2[0] = hc_byte_perm (w6[3], w7[0], selector); - c1[3] = hc_byte_perm (w6[2], w6[3], selector); - c1[2] = hc_byte_perm (w6[1], w6[2], selector); - c1[1] = hc_byte_perm (w6[0], w6[1], selector); - c1[0] = hc_byte_perm (w5[3], w6[0], selector); - c0[3] = hc_byte_perm (w5[2], w5[3], selector); - c0[2] = hc_byte_perm (w5[1], w5[2], selector); - c0[1] = hc_byte_perm (w5[0], w5[1], selector); - c0[0] = hc_byte_perm (w4[3], w5[0], selector); - w7[3] = hc_byte_perm (w4[2], w4[3], selector); - w7[2] = hc_byte_perm (w4[1], w4[2], selector); - w7[1] = hc_byte_perm (w4[0], w4[1], selector); - w7[0] = hc_byte_perm (w3[3], w4[0], selector); - w6[3] = hc_byte_perm (w3[2], w3[3], selector); - w6[2] = hc_byte_perm (w3[1], w3[2], selector); - w6[1] = hc_byte_perm (w3[0], w3[1], selector); - w6[0] = hc_byte_perm (w2[3], w3[0], selector); - w5[3] = hc_byte_perm (w2[2], w2[3], selector); - w5[2] = hc_byte_perm (w2[1], w2[2], selector); - w5[1] = hc_byte_perm (w2[0], w2[1], selector); - w5[0] = hc_byte_perm (w1[3], w2[0], selector); - w4[3] = hc_byte_perm (w1[2], w1[3], selector); - w4[2] = hc_byte_perm (w1[1], w1[2], selector); - w4[1] = hc_byte_perm (w1[0], w1[1], selector); - w4[0] = hc_byte_perm (w0[3], w1[0], selector); - w3[3] = hc_byte_perm (w0[2], w0[3], selector); - w3[2] = hc_byte_perm (w0[1], w0[2], selector); - w3[1] = hc_byte_perm (w0[0], w0[1], selector); - w3[0] = hc_byte_perm ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm (w7[3], 0, selector); - c3[0] = hc_byte_perm (w7[2], w7[3], selector); - c2[3] = hc_byte_perm (w7[1], w7[2], selector); - c2[2] = hc_byte_perm (w7[0], w7[1], selector); - c2[1] = hc_byte_perm (w6[3], w7[0], selector); - c2[0] = hc_byte_perm (w6[2], w6[3], selector); - c1[3] = hc_byte_perm (w6[1], w6[2], selector); - c1[2] = hc_byte_perm (w6[0], w6[1], selector); - c1[1] = hc_byte_perm (w5[3], w6[0], selector); - c1[0] = hc_byte_perm (w5[2], w5[3], selector); - c0[3] = hc_byte_perm (w5[1], w5[2], selector); - c0[2] = hc_byte_perm (w5[0], w5[1], selector); - c0[1] = hc_byte_perm (w4[3], w5[0], selector); - c0[0] = hc_byte_perm (w4[2], w4[3], selector); - w7[3] = hc_byte_perm (w4[1], w4[2], selector); - w7[2] = hc_byte_perm (w4[0], w4[1], selector); - w7[1] = hc_byte_perm (w3[3], w4[0], selector); - w7[0] = hc_byte_perm (w3[2], w3[3], selector); - w6[3] = hc_byte_perm (w3[1], w3[2], selector); - w6[2] = hc_byte_perm (w3[0], w3[1], selector); - w6[1] = hc_byte_perm (w2[3], w3[0], selector); - w6[0] = hc_byte_perm (w2[2], w2[3], selector); - w5[3] = hc_byte_perm (w2[1], w2[2], selector); - w5[2] = hc_byte_perm (w2[0], w2[1], selector); - w5[1] = hc_byte_perm (w1[3], w2[0], selector); - w5[0] = hc_byte_perm (w1[2], w1[3], selector); - w4[3] = hc_byte_perm (w1[1], w1[2], selector); - w4[2] = hc_byte_perm (w1[0], w1[1], selector); - w4[1] = hc_byte_perm (w0[3], w1[0], selector); - w4[0] = hc_byte_perm (w0[2], w0[3], selector); - w3[3] = hc_byte_perm (w0[1], w0[2], selector); - w3[2] = hc_byte_perm (w0[0], w0[1], selector); - w3[1] = hc_byte_perm ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm (w7[3], 0, selector); - c3[1] = hc_byte_perm (w7[2], w7[3], selector); - c3[0] = hc_byte_perm (w7[1], w7[2], selector); - c2[3] = hc_byte_perm (w7[0], w7[1], selector); - c2[2] = hc_byte_perm (w6[3], w7[0], selector); - c2[1] = hc_byte_perm (w6[2], w6[3], selector); - c2[0] = hc_byte_perm (w6[1], w6[2], selector); - c1[3] = hc_byte_perm (w6[0], w6[1], selector); - c1[2] = hc_byte_perm (w5[3], w6[0], selector); - c1[1] = hc_byte_perm (w5[2], w5[3], selector); - c1[0] = hc_byte_perm (w5[1], w5[2], selector); - c0[3] = hc_byte_perm (w5[0], w5[1], selector); - c0[2] = hc_byte_perm (w4[3], w5[0], selector); - c0[1] = hc_byte_perm (w4[2], w4[3], selector); - c0[0] = hc_byte_perm (w4[1], w4[2], selector); - w7[3] = hc_byte_perm (w4[0], w4[1], selector); - w7[2] = hc_byte_perm (w3[3], w4[0], selector); - w7[1] = hc_byte_perm (w3[2], w3[3], selector); - w7[0] = hc_byte_perm (w3[1], w3[2], selector); - w6[3] = hc_byte_perm (w3[0], w3[1], selector); - w6[2] = hc_byte_perm (w2[3], w3[0], selector); - w6[1] = hc_byte_perm (w2[2], w2[3], selector); - w6[0] = hc_byte_perm (w2[1], w2[2], selector); - w5[3] = hc_byte_perm (w2[0], w2[1], selector); - w5[2] = hc_byte_perm (w1[3], w2[0], selector); - w5[1] = hc_byte_perm (w1[2], w1[3], selector); - w5[0] = hc_byte_perm (w1[1], w1[2], selector); - w4[3] = hc_byte_perm (w1[0], w1[1], selector); - w4[2] = hc_byte_perm (w0[3], w1[0], selector); - w4[1] = hc_byte_perm (w0[2], w0[3], selector); - w4[0] = hc_byte_perm (w0[1], w0[2], selector); - w3[3] = hc_byte_perm (w0[0], w0[1], selector); - w3[2] = hc_byte_perm ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm (w7[3], 0, selector); - c3[2] = hc_byte_perm (w7[2], w7[3], selector); - c3[1] = hc_byte_perm (w7[1], w7[2], selector); - c3[0] = hc_byte_perm (w7[0], w7[1], selector); - c2[3] = hc_byte_perm (w6[3], w7[0], selector); - c2[2] = hc_byte_perm (w6[2], w6[3], selector); - c2[1] = hc_byte_perm (w6[1], w6[2], selector); - c2[0] = hc_byte_perm (w6[0], w6[1], selector); - c1[3] = hc_byte_perm (w5[3], w6[0], selector); - c1[2] = hc_byte_perm (w5[2], w5[3], selector); - c1[1] = hc_byte_perm (w5[1], w5[2], selector); - c1[0] = hc_byte_perm (w5[0], w5[1], selector); - c0[3] = hc_byte_perm (w4[3], w5[0], selector); - c0[2] = hc_byte_perm (w4[2], w4[3], selector); - c0[1] = hc_byte_perm (w4[1], w4[2], selector); - c0[0] = hc_byte_perm (w4[0], w4[1], selector); - w7[3] = hc_byte_perm (w3[3], w4[0], selector); - w7[2] = hc_byte_perm (w3[2], w3[3], selector); - w7[1] = hc_byte_perm (w3[1], w3[2], selector); - w7[0] = hc_byte_perm (w3[0], w3[1], selector); - w6[3] = hc_byte_perm (w2[3], w3[0], selector); - w6[2] = hc_byte_perm (w2[2], w2[3], selector); - w6[1] = hc_byte_perm (w2[1], w2[2], selector); - w6[0] = hc_byte_perm (w2[0], w2[1], selector); - w5[3] = hc_byte_perm (w1[3], w2[0], selector); - w5[2] = hc_byte_perm (w1[2], w1[3], selector); - w5[1] = hc_byte_perm (w1[1], w1[2], selector); - w5[0] = hc_byte_perm (w1[0], w1[1], selector); - w4[3] = hc_byte_perm (w0[3], w1[0], selector); - w4[2] = hc_byte_perm (w0[2], w0[3], selector); - w4[1] = hc_byte_perm (w0[1], w0[2], selector); - w4[0] = hc_byte_perm (w0[0], w0[1], selector); - w3[3] = hc_byte_perm ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm (w7[3], 0, selector); - c3[3] = hc_byte_perm (w7[2], w7[3], selector); - c3[2] = hc_byte_perm (w7[1], w7[2], selector); - c3[1] = hc_byte_perm (w7[0], w7[1], selector); - c3[0] = hc_byte_perm (w6[3], w7[0], selector); - c2[3] = hc_byte_perm (w6[2], w6[3], selector); - c2[2] = hc_byte_perm (w6[1], w6[2], selector); - c2[1] = hc_byte_perm (w6[0], w6[1], selector); - c2[0] = hc_byte_perm (w5[3], w6[0], selector); - c1[3] = hc_byte_perm (w5[2], w5[3], selector); - c1[2] = hc_byte_perm (w5[1], w5[2], selector); - c1[1] = hc_byte_perm (w5[0], w5[1], selector); - c1[0] = hc_byte_perm (w4[3], w5[0], selector); - c0[3] = hc_byte_perm (w4[2], w4[3], selector); - c0[2] = hc_byte_perm (w4[1], w4[2], selector); - c0[1] = hc_byte_perm (w4[0], w4[1], selector); - c0[0] = hc_byte_perm (w3[3], w4[0], selector); - w7[3] = hc_byte_perm (w3[2], w3[3], selector); - w7[2] = hc_byte_perm (w3[1], w3[2], selector); - w7[1] = hc_byte_perm (w3[0], w3[1], selector); - w7[0] = hc_byte_perm (w2[3], w3[0], selector); - w6[3] = hc_byte_perm (w2[2], w2[3], selector); - w6[2] = hc_byte_perm (w2[1], w2[2], selector); - w6[1] = hc_byte_perm (w2[0], w2[1], selector); - w6[0] = hc_byte_perm (w1[3], w2[0], selector); - w5[3] = hc_byte_perm (w1[2], w1[3], selector); - w5[2] = hc_byte_perm (w1[1], w1[2], selector); - w5[1] = hc_byte_perm (w1[0], w1[1], selector); - w5[0] = hc_byte_perm (w0[3], w1[0], selector); - w4[3] = hc_byte_perm (w0[2], w0[3], selector); - w4[2] = hc_byte_perm (w0[1], w0[2], selector); - w4[1] = hc_byte_perm (w0[0], w0[1], selector); - w4[0] = hc_byte_perm ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm (w7[3], 0, selector); - c4[0] = hc_byte_perm (w7[2], w7[3], selector); - c3[3] = hc_byte_perm (w7[1], w7[2], selector); - c3[2] = hc_byte_perm (w7[0], w7[1], selector); - c3[1] = hc_byte_perm (w6[3], w7[0], selector); - c3[0] = hc_byte_perm (w6[2], w6[3], selector); - c2[3] = hc_byte_perm (w6[1], w6[2], selector); - c2[2] = hc_byte_perm (w6[0], w6[1], selector); - c2[1] = hc_byte_perm (w5[3], w6[0], selector); - c2[0] = hc_byte_perm (w5[2], w5[3], selector); - c1[3] = hc_byte_perm (w5[1], w5[2], selector); - c1[2] = hc_byte_perm (w5[0], w5[1], selector); - c1[1] = hc_byte_perm (w4[3], w5[0], selector); - c1[0] = hc_byte_perm (w4[2], w4[3], selector); - c0[3] = hc_byte_perm (w4[1], w4[2], selector); - c0[2] = hc_byte_perm (w4[0], w4[1], selector); - c0[1] = hc_byte_perm (w3[3], w4[0], selector); - c0[0] = hc_byte_perm (w3[2], w3[3], selector); - w7[3] = hc_byte_perm (w3[1], w3[2], selector); - w7[2] = hc_byte_perm (w3[0], w3[1], selector); - w7[1] = hc_byte_perm (w2[3], w3[0], selector); - w7[0] = hc_byte_perm (w2[2], w2[3], selector); - w6[3] = hc_byte_perm (w2[1], w2[2], selector); - w6[2] = hc_byte_perm (w2[0], w2[1], selector); - w6[1] = hc_byte_perm (w1[3], w2[0], selector); - w6[0] = hc_byte_perm (w1[2], w1[3], selector); - w5[3] = hc_byte_perm (w1[1], w1[2], selector); - w5[2] = hc_byte_perm (w1[0], w1[1], selector); - w5[1] = hc_byte_perm (w0[3], w1[0], selector); - w5[0] = hc_byte_perm (w0[2], w0[3], selector); - w4[3] = hc_byte_perm (w0[1], w0[2], selector); - w4[2] = hc_byte_perm (w0[0], w0[1], selector); - w4[1] = hc_byte_perm ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm (w7[3], 0, selector); - c4[1] = hc_byte_perm (w7[2], w7[3], selector); - c4[0] = hc_byte_perm (w7[1], w7[2], selector); - c3[3] = hc_byte_perm (w7[0], w7[1], selector); - c3[2] = hc_byte_perm (w6[3], w7[0], selector); - c3[1] = hc_byte_perm (w6[2], w6[3], selector); - c3[0] = hc_byte_perm (w6[1], w6[2], selector); - c2[3] = hc_byte_perm (w6[0], w6[1], selector); - c2[2] = hc_byte_perm (w5[3], w6[0], selector); - c2[1] = hc_byte_perm (w5[2], w5[3], selector); - c2[0] = hc_byte_perm (w5[1], w5[2], selector); - c1[3] = hc_byte_perm (w5[0], w5[1], selector); - c1[2] = hc_byte_perm (w4[3], w5[0], selector); - c1[1] = hc_byte_perm (w4[2], w4[3], selector); - c1[0] = hc_byte_perm (w4[1], w4[2], selector); - c0[3] = hc_byte_perm (w4[0], w4[1], selector); - c0[2] = hc_byte_perm (w3[3], w4[0], selector); - c0[1] = hc_byte_perm (w3[2], w3[3], selector); - c0[0] = hc_byte_perm (w3[1], w3[2], selector); - w7[3] = hc_byte_perm (w3[0], w3[1], selector); - w7[2] = hc_byte_perm (w2[3], w3[0], selector); - w7[1] = hc_byte_perm (w2[2], w2[3], selector); - w7[0] = hc_byte_perm (w2[1], w2[2], selector); - w6[3] = hc_byte_perm (w2[0], w2[1], selector); - w6[2] = hc_byte_perm (w1[3], w2[0], selector); - w6[1] = hc_byte_perm (w1[2], w1[3], selector); - w6[0] = hc_byte_perm (w1[1], w1[2], selector); - w5[3] = hc_byte_perm (w1[0], w1[1], selector); - w5[2] = hc_byte_perm (w0[3], w1[0], selector); - w5[1] = hc_byte_perm (w0[2], w0[3], selector); - w5[0] = hc_byte_perm (w0[1], w0[2], selector); - w4[3] = hc_byte_perm (w0[0], w0[1], selector); - w4[2] = hc_byte_perm ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm (w7[3], 0, selector); - c4[2] = hc_byte_perm (w7[2], w7[3], selector); - c4[1] = hc_byte_perm (w7[1], w7[2], selector); - c4[0] = hc_byte_perm (w7[0], w7[1], selector); - c3[3] = hc_byte_perm (w6[3], w7[0], selector); - c3[2] = hc_byte_perm (w6[2], w6[3], selector); - c3[1] = hc_byte_perm (w6[1], w6[2], selector); - c3[0] = hc_byte_perm (w6[0], w6[1], selector); - c2[3] = hc_byte_perm (w5[3], w6[0], selector); - c2[2] = hc_byte_perm (w5[2], w5[3], selector); - c2[1] = hc_byte_perm (w5[1], w5[2], selector); - c2[0] = hc_byte_perm (w5[0], w5[1], selector); - c1[3] = hc_byte_perm (w4[3], w5[0], selector); - c1[2] = hc_byte_perm (w4[2], w4[3], selector); - c1[1] = hc_byte_perm (w4[1], w4[2], selector); - c1[0] = hc_byte_perm (w4[0], w4[1], selector); - c0[3] = hc_byte_perm (w3[3], w4[0], selector); - c0[2] = hc_byte_perm (w3[2], w3[3], selector); - c0[1] = hc_byte_perm (w3[1], w3[2], selector); - c0[0] = hc_byte_perm (w3[0], w3[1], selector); - w7[3] = hc_byte_perm (w2[3], w3[0], selector); - w7[2] = hc_byte_perm (w2[2], w2[3], selector); - w7[1] = hc_byte_perm (w2[1], w2[2], selector); - w7[0] = hc_byte_perm (w2[0], w2[1], selector); - w6[3] = hc_byte_perm (w1[3], w2[0], selector); - w6[2] = hc_byte_perm (w1[2], w1[3], selector); - w6[1] = hc_byte_perm (w1[1], w1[2], selector); - w6[0] = hc_byte_perm (w1[0], w1[1], selector); - w5[3] = hc_byte_perm (w0[3], w1[0], selector); - w5[2] = hc_byte_perm (w0[2], w0[3], selector); - w5[1] = hc_byte_perm (w0[1], w0[2], selector); - w5[0] = hc_byte_perm (w0[0], w0[1], selector); - w4[3] = hc_byte_perm ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm (w7[3], 0, selector); - c4[3] = hc_byte_perm (w7[2], w7[3], selector); - c4[2] = hc_byte_perm (w7[1], w7[2], selector); - c4[1] = hc_byte_perm (w7[0], w7[1], selector); - c4[0] = hc_byte_perm (w6[3], w7[0], selector); - c3[3] = hc_byte_perm (w6[2], w6[3], selector); - c3[2] = hc_byte_perm (w6[1], w6[2], selector); - c3[1] = hc_byte_perm (w6[0], w6[1], selector); - c3[0] = hc_byte_perm (w5[3], w6[0], selector); - c2[3] = hc_byte_perm (w5[2], w5[3], selector); - c2[2] = hc_byte_perm (w5[1], w5[2], selector); - c2[1] = hc_byte_perm (w5[0], w5[1], selector); - c2[0] = hc_byte_perm (w4[3], w5[0], selector); - c1[3] = hc_byte_perm (w4[2], w4[3], selector); - c1[2] = hc_byte_perm (w4[1], w4[2], selector); - c1[1] = hc_byte_perm (w4[0], w4[1], selector); - c1[0] = hc_byte_perm (w3[3], w4[0], selector); - c0[3] = hc_byte_perm (w3[2], w3[3], selector); - c0[2] = hc_byte_perm (w3[1], w3[2], selector); - c0[1] = hc_byte_perm (w3[0], w3[1], selector); - c0[0] = hc_byte_perm (w2[3], w3[0], selector); - w7[3] = hc_byte_perm (w2[2], w2[3], selector); - w7[2] = hc_byte_perm (w2[1], w2[2], selector); - w7[1] = hc_byte_perm (w2[0], w2[1], selector); - w7[0] = hc_byte_perm (w1[3], w2[0], selector); - w6[3] = hc_byte_perm (w1[2], w1[3], selector); - w6[2] = hc_byte_perm (w1[1], w1[2], selector); - w6[1] = hc_byte_perm (w1[0], w1[1], selector); - w6[0] = hc_byte_perm (w0[3], w1[0], selector); - w5[3] = hc_byte_perm (w0[2], w0[3], selector); - w5[2] = hc_byte_perm (w0[1], w0[2], selector); - w5[1] = hc_byte_perm (w0[0], w0[1], selector); - w5[0] = hc_byte_perm ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm (w7[3], 0, selector); - c5[0] = hc_byte_perm (w7[2], w7[3], selector); - c4[3] = hc_byte_perm (w7[1], w7[2], selector); - c4[2] = hc_byte_perm (w7[0], w7[1], selector); - c4[1] = hc_byte_perm (w6[3], w7[0], selector); - c4[0] = hc_byte_perm (w6[2], w6[3], selector); - c3[3] = hc_byte_perm (w6[1], w6[2], selector); - c3[2] = hc_byte_perm (w6[0], w6[1], selector); - c3[1] = hc_byte_perm (w5[3], w6[0], selector); - c3[0] = hc_byte_perm (w5[2], w5[3], selector); - c2[3] = hc_byte_perm (w5[1], w5[2], selector); - c2[2] = hc_byte_perm (w5[0], w5[1], selector); - c2[1] = hc_byte_perm (w4[3], w5[0], selector); - c2[0] = hc_byte_perm (w4[2], w4[3], selector); - c1[3] = hc_byte_perm (w4[1], w4[2], selector); - c1[2] = hc_byte_perm (w4[0], w4[1], selector); - c1[1] = hc_byte_perm (w3[3], w4[0], selector); - c1[0] = hc_byte_perm (w3[2], w3[3], selector); - c0[3] = hc_byte_perm (w3[1], w3[2], selector); - c0[2] = hc_byte_perm (w3[0], w3[1], selector); - c0[1] = hc_byte_perm (w2[3], w3[0], selector); - c0[0] = hc_byte_perm (w2[2], w2[3], selector); - w7[3] = hc_byte_perm (w2[1], w2[2], selector); - w7[2] = hc_byte_perm (w2[0], w2[1], selector); - w7[1] = hc_byte_perm (w1[3], w2[0], selector); - w7[0] = hc_byte_perm (w1[2], w1[3], selector); - w6[3] = hc_byte_perm (w1[1], w1[2], selector); - w6[2] = hc_byte_perm (w1[0], w1[1], selector); - w6[1] = hc_byte_perm (w0[3], w1[0], selector); - w6[0] = hc_byte_perm (w0[2], w0[3], selector); - w5[3] = hc_byte_perm (w0[1], w0[2], selector); - w5[2] = hc_byte_perm (w0[0], w0[1], selector); - w5[1] = hc_byte_perm ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm (w7[3], 0, selector); - c5[1] = hc_byte_perm (w7[2], w7[3], selector); - c5[0] = hc_byte_perm (w7[1], w7[2], selector); - c4[3] = hc_byte_perm (w7[0], w7[1], selector); - c4[2] = hc_byte_perm (w6[3], w7[0], selector); - c4[1] = hc_byte_perm (w6[2], w6[3], selector); - c4[0] = hc_byte_perm (w6[1], w6[2], selector); - c3[3] = hc_byte_perm (w6[0], w6[1], selector); - c3[2] = hc_byte_perm (w5[3], w6[0], selector); - c3[1] = hc_byte_perm (w5[2], w5[3], selector); - c3[0] = hc_byte_perm (w5[1], w5[2], selector); - c2[3] = hc_byte_perm (w5[0], w5[1], selector); - c2[2] = hc_byte_perm (w4[3], w5[0], selector); - c2[1] = hc_byte_perm (w4[2], w4[3], selector); - c2[0] = hc_byte_perm (w4[1], w4[2], selector); - c1[3] = hc_byte_perm (w4[0], w4[1], selector); - c1[2] = hc_byte_perm (w3[3], w4[0], selector); - c1[1] = hc_byte_perm (w3[2], w3[3], selector); - c1[0] = hc_byte_perm (w3[1], w3[2], selector); - c0[3] = hc_byte_perm (w3[0], w3[1], selector); - c0[2] = hc_byte_perm (w2[3], w3[0], selector); - c0[1] = hc_byte_perm (w2[2], w2[3], selector); - c0[0] = hc_byte_perm (w2[1], w2[2], selector); - w7[3] = hc_byte_perm (w2[0], w2[1], selector); - w7[2] = hc_byte_perm (w1[3], w2[0], selector); - w7[1] = hc_byte_perm (w1[2], w1[3], selector); - w7[0] = hc_byte_perm (w1[1], w1[2], selector); - w6[3] = hc_byte_perm (w1[0], w1[1], selector); - w6[2] = hc_byte_perm (w0[3], w1[0], selector); - w6[1] = hc_byte_perm (w0[2], w0[3], selector); - w6[0] = hc_byte_perm (w0[1], w0[2], selector); - w5[3] = hc_byte_perm (w0[0], w0[1], selector); - w5[2] = hc_byte_perm ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm (w7[3], 0, selector); - c5[2] = hc_byte_perm (w7[2], w7[3], selector); - c5[1] = hc_byte_perm (w7[1], w7[2], selector); - c5[0] = hc_byte_perm (w7[0], w7[1], selector); - c4[3] = hc_byte_perm (w6[3], w7[0], selector); - c4[2] = hc_byte_perm (w6[2], w6[3], selector); - c4[1] = hc_byte_perm (w6[1], w6[2], selector); - c4[0] = hc_byte_perm (w6[0], w6[1], selector); - c3[3] = hc_byte_perm (w5[3], w6[0], selector); - c3[2] = hc_byte_perm (w5[2], w5[3], selector); - c3[1] = hc_byte_perm (w5[1], w5[2], selector); - c3[0] = hc_byte_perm (w5[0], w5[1], selector); - c2[3] = hc_byte_perm (w4[3], w5[0], selector); - c2[2] = hc_byte_perm (w4[2], w4[3], selector); - c2[1] = hc_byte_perm (w4[1], w4[2], selector); - c2[0] = hc_byte_perm (w4[0], w4[1], selector); - c1[3] = hc_byte_perm (w3[3], w4[0], selector); - c1[2] = hc_byte_perm (w3[2], w3[3], selector); - c1[1] = hc_byte_perm (w3[1], w3[2], selector); - c1[0] = hc_byte_perm (w3[0], w3[1], selector); - c0[3] = hc_byte_perm (w2[3], w3[0], selector); - c0[2] = hc_byte_perm (w2[2], w2[3], selector); - c0[1] = hc_byte_perm (w2[1], w2[2], selector); - c0[0] = hc_byte_perm (w2[0], w2[1], selector); - w7[3] = hc_byte_perm (w1[3], w2[0], selector); - w7[2] = hc_byte_perm (w1[2], w1[3], selector); - w7[1] = hc_byte_perm (w1[1], w1[2], selector); - w7[0] = hc_byte_perm (w1[0], w1[1], selector); - w6[3] = hc_byte_perm (w0[3], w1[0], selector); - w6[2] = hc_byte_perm (w0[2], w0[3], selector); - w6[1] = hc_byte_perm (w0[1], w0[2], selector); - w6[0] = hc_byte_perm (w0[0], w0[1], selector); - w5[3] = hc_byte_perm ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm (w7[3], 0, selector); - c5[3] = hc_byte_perm (w7[2], w7[3], selector); - c5[2] = hc_byte_perm (w7[1], w7[2], selector); - c5[1] = hc_byte_perm (w7[0], w7[1], selector); - c5[0] = hc_byte_perm (w6[3], w7[0], selector); - c4[3] = hc_byte_perm (w6[2], w6[3], selector); - c4[2] = hc_byte_perm (w6[1], w6[2], selector); - c4[1] = hc_byte_perm (w6[0], w6[1], selector); - c4[0] = hc_byte_perm (w5[3], w6[0], selector); - c3[3] = hc_byte_perm (w5[2], w5[3], selector); - c3[2] = hc_byte_perm (w5[1], w5[2], selector); - c3[1] = hc_byte_perm (w5[0], w5[1], selector); - c3[0] = hc_byte_perm (w4[3], w5[0], selector); - c2[3] = hc_byte_perm (w4[2], w4[3], selector); - c2[2] = hc_byte_perm (w4[1], w4[2], selector); - c2[1] = hc_byte_perm (w4[0], w4[1], selector); - c2[0] = hc_byte_perm (w3[3], w4[0], selector); - c1[3] = hc_byte_perm (w3[2], w3[3], selector); - c1[2] = hc_byte_perm (w3[1], w3[2], selector); - c1[1] = hc_byte_perm (w3[0], w3[1], selector); - c1[0] = hc_byte_perm (w2[3], w3[0], selector); - c0[3] = hc_byte_perm (w2[2], w2[3], selector); - c0[2] = hc_byte_perm (w2[1], w2[2], selector); - c0[1] = hc_byte_perm (w2[0], w2[1], selector); - c0[0] = hc_byte_perm (w1[3], w2[0], selector); - w7[3] = hc_byte_perm (w1[2], w1[3], selector); - w7[2] = hc_byte_perm (w1[1], w1[2], selector); - w7[1] = hc_byte_perm (w1[0], w1[1], selector); - w7[0] = hc_byte_perm (w0[3], w1[0], selector); - w6[3] = hc_byte_perm (w0[2], w0[3], selector); - w6[2] = hc_byte_perm (w0[1], w0[2], selector); - w6[1] = hc_byte_perm (w0[0], w0[1], selector); - w6[0] = hc_byte_perm ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm (w7[3], 0, selector); - c6[0] = hc_byte_perm (w7[2], w7[3], selector); - c5[3] = hc_byte_perm (w7[1], w7[2], selector); - c5[2] = hc_byte_perm (w7[0], w7[1], selector); - c5[1] = hc_byte_perm (w6[3], w7[0], selector); - c5[0] = hc_byte_perm (w6[2], w6[3], selector); - c4[3] = hc_byte_perm (w6[1], w6[2], selector); - c4[2] = hc_byte_perm (w6[0], w6[1], selector); - c4[1] = hc_byte_perm (w5[3], w6[0], selector); - c4[0] = hc_byte_perm (w5[2], w5[3], selector); - c3[3] = hc_byte_perm (w5[1], w5[2], selector); - c3[2] = hc_byte_perm (w5[0], w5[1], selector); - c3[1] = hc_byte_perm (w4[3], w5[0], selector); - c3[0] = hc_byte_perm (w4[2], w4[3], selector); - c2[3] = hc_byte_perm (w4[1], w4[2], selector); - c2[2] = hc_byte_perm (w4[0], w4[1], selector); - c2[1] = hc_byte_perm (w3[3], w4[0], selector); - c2[0] = hc_byte_perm (w3[2], w3[3], selector); - c1[3] = hc_byte_perm (w3[1], w3[2], selector); - c1[2] = hc_byte_perm (w3[0], w3[1], selector); - c1[1] = hc_byte_perm (w2[3], w3[0], selector); - c1[0] = hc_byte_perm (w2[2], w2[3], selector); - c0[3] = hc_byte_perm (w2[1], w2[2], selector); - c0[2] = hc_byte_perm (w2[0], w2[1], selector); - c0[1] = hc_byte_perm (w1[3], w2[0], selector); - c0[0] = hc_byte_perm (w1[2], w1[3], selector); - w7[3] = hc_byte_perm (w1[1], w1[2], selector); - w7[2] = hc_byte_perm (w1[0], w1[1], selector); - w7[1] = hc_byte_perm (w0[3], w1[0], selector); - w7[0] = hc_byte_perm (w0[2], w0[3], selector); - w6[3] = hc_byte_perm (w0[1], w0[2], selector); - w6[2] = hc_byte_perm (w0[0], w0[1], selector); - w6[1] = hc_byte_perm ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm (w7[3], 0, selector); - c6[1] = hc_byte_perm (w7[2], w7[3], selector); - c6[0] = hc_byte_perm (w7[1], w7[2], selector); - c5[3] = hc_byte_perm (w7[0], w7[1], selector); - c5[2] = hc_byte_perm (w6[3], w7[0], selector); - c5[1] = hc_byte_perm (w6[2], w6[3], selector); - c5[0] = hc_byte_perm (w6[1], w6[2], selector); - c4[3] = hc_byte_perm (w6[0], w6[1], selector); - c4[2] = hc_byte_perm (w5[3], w6[0], selector); - c4[1] = hc_byte_perm (w5[2], w5[3], selector); - c4[0] = hc_byte_perm (w5[1], w5[2], selector); - c3[3] = hc_byte_perm (w5[0], w5[1], selector); - c3[2] = hc_byte_perm (w4[3], w5[0], selector); - c3[1] = hc_byte_perm (w4[2], w4[3], selector); - c3[0] = hc_byte_perm (w4[1], w4[2], selector); - c2[3] = hc_byte_perm (w4[0], w4[1], selector); - c2[2] = hc_byte_perm (w3[3], w4[0], selector); - c2[1] = hc_byte_perm (w3[2], w3[3], selector); - c2[0] = hc_byte_perm (w3[1], w3[2], selector); - c1[3] = hc_byte_perm (w3[0], w3[1], selector); - c1[2] = hc_byte_perm (w2[3], w3[0], selector); - c1[1] = hc_byte_perm (w2[2], w2[3], selector); - c1[0] = hc_byte_perm (w2[1], w2[2], selector); - c0[3] = hc_byte_perm (w2[0], w2[1], selector); - c0[2] = hc_byte_perm (w1[3], w2[0], selector); - c0[1] = hc_byte_perm (w1[2], w1[3], selector); - c0[0] = hc_byte_perm (w1[1], w1[2], selector); - w7[3] = hc_byte_perm (w1[0], w1[1], selector); - w7[2] = hc_byte_perm (w0[3], w1[0], selector); - w7[1] = hc_byte_perm (w0[2], w0[3], selector); - w7[0] = hc_byte_perm (w0[1], w0[2], selector); - w6[3] = hc_byte_perm (w0[0], w0[1], selector); - w6[2] = hc_byte_perm ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm (w7[3], 0, selector); - c6[2] = hc_byte_perm (w7[2], w7[3], selector); - c6[1] = hc_byte_perm (w7[1], w7[2], selector); - c6[0] = hc_byte_perm (w7[0], w7[1], selector); - c5[3] = hc_byte_perm (w6[3], w7[0], selector); - c5[2] = hc_byte_perm (w6[2], w6[3], selector); - c5[1] = hc_byte_perm (w6[1], w6[2], selector); - c5[0] = hc_byte_perm (w6[0], w6[1], selector); - c4[3] = hc_byte_perm (w5[3], w6[0], selector); - c4[2] = hc_byte_perm (w5[2], w5[3], selector); - c4[1] = hc_byte_perm (w5[1], w5[2], selector); - c4[0] = hc_byte_perm (w5[0], w5[1], selector); - c3[3] = hc_byte_perm (w4[3], w5[0], selector); - c3[2] = hc_byte_perm (w4[2], w4[3], selector); - c3[1] = hc_byte_perm (w4[1], w4[2], selector); - c3[0] = hc_byte_perm (w4[0], w4[1], selector); - c2[3] = hc_byte_perm (w3[3], w4[0], selector); - c2[2] = hc_byte_perm (w3[2], w3[3], selector); - c2[1] = hc_byte_perm (w3[1], w3[2], selector); - c2[0] = hc_byte_perm (w3[0], w3[1], selector); - c1[3] = hc_byte_perm (w2[3], w3[0], selector); - c1[2] = hc_byte_perm (w2[2], w2[3], selector); - c1[1] = hc_byte_perm (w2[1], w2[2], selector); - c1[0] = hc_byte_perm (w2[0], w2[1], selector); - c0[3] = hc_byte_perm (w1[3], w2[0], selector); - c0[2] = hc_byte_perm (w1[2], w1[3], selector); - c0[1] = hc_byte_perm (w1[1], w1[2], selector); - c0[0] = hc_byte_perm (w1[0], w1[1], selector); - w7[3] = hc_byte_perm (w0[3], w1[0], selector); - w7[2] = hc_byte_perm (w0[2], w0[3], selector); - w7[1] = hc_byte_perm (w0[1], w0[2], selector); - w7[0] = hc_byte_perm (w0[0], w0[1], selector); - w6[3] = hc_byte_perm ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm (w7[3], 0, selector); - c6[3] = hc_byte_perm (w7[2], w7[3], selector); - c6[2] = hc_byte_perm (w7[1], w7[2], selector); - c6[1] = hc_byte_perm (w7[0], w7[1], selector); - c6[0] = hc_byte_perm (w6[3], w7[0], selector); - c5[3] = hc_byte_perm (w6[2], w6[3], selector); - c5[2] = hc_byte_perm (w6[1], w6[2], selector); - c5[1] = hc_byte_perm (w6[0], w6[1], selector); - c5[0] = hc_byte_perm (w5[3], w6[0], selector); - c4[3] = hc_byte_perm (w5[2], w5[3], selector); - c4[2] = hc_byte_perm (w5[1], w5[2], selector); - c4[1] = hc_byte_perm (w5[0], w5[1], selector); - c4[0] = hc_byte_perm (w4[3], w5[0], selector); - c3[3] = hc_byte_perm (w4[2], w4[3], selector); - c3[2] = hc_byte_perm (w4[1], w4[2], selector); - c3[1] = hc_byte_perm (w4[0], w4[1], selector); - c3[0] = hc_byte_perm (w3[3], w4[0], selector); - c2[3] = hc_byte_perm (w3[2], w3[3], selector); - c2[2] = hc_byte_perm (w3[1], w3[2], selector); - c2[1] = hc_byte_perm (w3[0], w3[1], selector); - c2[0] = hc_byte_perm (w2[3], w3[0], selector); - c1[3] = hc_byte_perm (w2[2], w2[3], selector); - c1[2] = hc_byte_perm (w2[1], w2[2], selector); - c1[1] = hc_byte_perm (w2[0], w2[1], selector); - c1[0] = hc_byte_perm (w1[3], w2[0], selector); - c0[3] = hc_byte_perm (w1[2], w1[3], selector); - c0[2] = hc_byte_perm (w1[1], w1[2], selector); - c0[1] = hc_byte_perm (w1[0], w1[1], selector); - c0[0] = hc_byte_perm (w0[3], w1[0], selector); - w7[3] = hc_byte_perm (w0[2], w0[3], selector); - w7[2] = hc_byte_perm (w0[1], w0[2], selector); - w7[1] = hc_byte_perm (w0[0], w0[1], selector); - w7[0] = hc_byte_perm ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm (w7[3], 0, selector); - c7[0] = hc_byte_perm (w7[2], w7[3], selector); - c6[3] = hc_byte_perm (w7[1], w7[2], selector); - c6[2] = hc_byte_perm (w7[0], w7[1], selector); - c6[1] = hc_byte_perm (w6[3], w7[0], selector); - c6[0] = hc_byte_perm (w6[2], w6[3], selector); - c5[3] = hc_byte_perm (w6[1], w6[2], selector); - c5[2] = hc_byte_perm (w6[0], w6[1], selector); - c5[1] = hc_byte_perm (w5[3], w6[0], selector); - c5[0] = hc_byte_perm (w5[2], w5[3], selector); - c4[3] = hc_byte_perm (w5[1], w5[2], selector); - c4[2] = hc_byte_perm (w5[0], w5[1], selector); - c4[1] = hc_byte_perm (w4[3], w5[0], selector); - c4[0] = hc_byte_perm (w4[2], w4[3], selector); - c3[3] = hc_byte_perm (w4[1], w4[2], selector); - c3[2] = hc_byte_perm (w4[0], w4[1], selector); - c3[1] = hc_byte_perm (w3[3], w4[0], selector); - c3[0] = hc_byte_perm (w3[2], w3[3], selector); - c2[3] = hc_byte_perm (w3[1], w3[2], selector); - c2[2] = hc_byte_perm (w3[0], w3[1], selector); - c2[1] = hc_byte_perm (w2[3], w3[0], selector); - c2[0] = hc_byte_perm (w2[2], w2[3], selector); - c1[3] = hc_byte_perm (w2[1], w2[2], selector); - c1[2] = hc_byte_perm (w2[0], w2[1], selector); - c1[1] = hc_byte_perm (w1[3], w2[0], selector); - c1[0] = hc_byte_perm (w1[2], w1[3], selector); - c0[3] = hc_byte_perm (w1[1], w1[2], selector); - c0[2] = hc_byte_perm (w1[0], w1[1], selector); - c0[1] = hc_byte_perm (w0[3], w1[0], selector); - c0[0] = hc_byte_perm (w0[2], w0[3], selector); - w7[3] = hc_byte_perm (w0[1], w0[2], selector); - w7[2] = hc_byte_perm (w0[0], w0[1], selector); - w7[1] = hc_byte_perm ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm (w7[3], 0, selector); - c7[1] = hc_byte_perm (w7[2], w7[3], selector); - c7[0] = hc_byte_perm (w7[1], w7[2], selector); - c6[3] = hc_byte_perm (w7[0], w7[1], selector); - c6[2] = hc_byte_perm (w6[3], w7[0], selector); - c6[1] = hc_byte_perm (w6[2], w6[3], selector); - c6[0] = hc_byte_perm (w6[1], w6[2], selector); - c5[3] = hc_byte_perm (w6[0], w6[1], selector); - c5[2] = hc_byte_perm (w5[3], w6[0], selector); - c5[1] = hc_byte_perm (w5[2], w5[3], selector); - c5[0] = hc_byte_perm (w5[1], w5[2], selector); - c4[3] = hc_byte_perm (w5[0], w5[1], selector); - c4[2] = hc_byte_perm (w4[3], w5[0], selector); - c4[1] = hc_byte_perm (w4[2], w4[3], selector); - c4[0] = hc_byte_perm (w4[1], w4[2], selector); - c3[3] = hc_byte_perm (w4[0], w4[1], selector); - c3[2] = hc_byte_perm (w3[3], w4[0], selector); - c3[1] = hc_byte_perm (w3[2], w3[3], selector); - c3[0] = hc_byte_perm (w3[1], w3[2], selector); - c2[3] = hc_byte_perm (w3[0], w3[1], selector); - c2[2] = hc_byte_perm (w2[3], w3[0], selector); - c2[1] = hc_byte_perm (w2[2], w2[3], selector); - c2[0] = hc_byte_perm (w2[1], w2[2], selector); - c1[3] = hc_byte_perm (w2[0], w2[1], selector); - c1[2] = hc_byte_perm (w1[3], w2[0], selector); - c1[1] = hc_byte_perm (w1[2], w1[3], selector); - c1[0] = hc_byte_perm (w1[1], w1[2], selector); - c0[3] = hc_byte_perm (w1[0], w1[1], selector); - c0[2] = hc_byte_perm (w0[3], w1[0], selector); - c0[1] = hc_byte_perm (w0[2], w0[3], selector); - c0[0] = hc_byte_perm (w0[1], w0[2], selector); - w7[3] = hc_byte_perm (w0[0], w0[1], selector); - w7[2] = hc_byte_perm ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm (w7[3], 0, selector); - c7[2] = hc_byte_perm (w7[2], w7[3], selector); - c7[1] = hc_byte_perm (w7[1], w7[2], selector); - c7[0] = hc_byte_perm (w7[0], w7[1], selector); - c6[3] = hc_byte_perm (w6[3], w7[0], selector); - c6[2] = hc_byte_perm (w6[2], w6[3], selector); - c6[1] = hc_byte_perm (w6[1], w6[2], selector); - c6[0] = hc_byte_perm (w6[0], w6[1], selector); - c5[3] = hc_byte_perm (w5[3], w6[0], selector); - c5[2] = hc_byte_perm (w5[2], w5[3], selector); - c5[1] = hc_byte_perm (w5[1], w5[2], selector); - c5[0] = hc_byte_perm (w5[0], w5[1], selector); - c4[3] = hc_byte_perm (w4[3], w5[0], selector); - c4[2] = hc_byte_perm (w4[2], w4[3], selector); - c4[1] = hc_byte_perm (w4[1], w4[2], selector); - c4[0] = hc_byte_perm (w4[0], w4[1], selector); - c3[3] = hc_byte_perm (w3[3], w4[0], selector); - c3[2] = hc_byte_perm (w3[2], w3[3], selector); - c3[1] = hc_byte_perm (w3[1], w3[2], selector); - c3[0] = hc_byte_perm (w3[0], w3[1], selector); - c2[3] = hc_byte_perm (w2[3], w3[0], selector); - c2[2] = hc_byte_perm (w2[2], w2[3], selector); - c2[1] = hc_byte_perm (w2[1], w2[2], selector); - c2[0] = hc_byte_perm (w2[0], w2[1], selector); - c1[3] = hc_byte_perm (w1[3], w2[0], selector); - c1[2] = hc_byte_perm (w1[2], w1[3], selector); - c1[1] = hc_byte_perm (w1[1], w1[2], selector); - c1[0] = hc_byte_perm (w1[0], w1[1], selector); - c0[3] = hc_byte_perm (w0[3], w1[0], selector); - c0[2] = hc_byte_perm (w0[2], w0[3], selector); - c0[1] = hc_byte_perm (w0[1], w0[2], selector); - c0[0] = hc_byte_perm (w0[0], w0[1], selector); - w7[3] = hc_byte_perm ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -12886,1180 +9110,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_be (PRIVATE_AS u32x *w0, PRIVATE_AS u3 break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm (w7[3], w7[2], selector); - w7[2] = hc_byte_perm (w7[2], w7[1], selector); - w7[1] = hc_byte_perm (w7[1], w7[0], selector); - w7[0] = hc_byte_perm (w7[0], w6[3], selector); - w6[3] = hc_byte_perm (w6[3], w6[2], selector); - w6[2] = hc_byte_perm (w6[2], w6[1], selector); - w6[1] = hc_byte_perm (w6[1], w6[0], selector); - w6[0] = hc_byte_perm (w6[0], w5[3], selector); - w5[3] = hc_byte_perm (w5[3], w5[2], selector); - w5[2] = hc_byte_perm (w5[2], w5[1], selector); - w5[1] = hc_byte_perm (w5[1], w5[0], selector); - w5[0] = hc_byte_perm (w5[0], w4[3], selector); - w4[3] = hc_byte_perm (w4[3], w4[2], selector); - w4[2] = hc_byte_perm (w4[2], w4[1], selector); - w4[1] = hc_byte_perm (w4[1], w4[0], selector); - w4[0] = hc_byte_perm (w4[0], w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - w7[3] = hc_byte_perm (w7[2], w7[1], selector); - w7[2] = hc_byte_perm (w7[1], w7[0], selector); - w7[1] = hc_byte_perm (w7[0], w6[3], selector); - w7[0] = hc_byte_perm (w6[3], w6[2], selector); - w6[3] = hc_byte_perm (w6[2], w6[1], selector); - w6[2] = hc_byte_perm (w6[1], w6[0], selector); - w6[1] = hc_byte_perm (w6[0], w5[3], selector); - w6[0] = hc_byte_perm (w5[3], w5[2], selector); - w5[3] = hc_byte_perm (w5[2], w5[1], selector); - w5[2] = hc_byte_perm (w5[1], w5[0], selector); - w5[1] = hc_byte_perm (w5[0], w4[3], selector); - w5[0] = hc_byte_perm (w4[3], w4[2], selector); - w4[3] = hc_byte_perm (w4[2], w4[1], selector); - w4[2] = hc_byte_perm (w4[1], w4[0], selector); - w4[1] = hc_byte_perm (w4[0], w3[3], selector); - w4[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w7[3] = hc_byte_perm (w7[1], w7[0], selector); - w7[2] = hc_byte_perm (w7[0], w6[3], selector); - w7[1] = hc_byte_perm (w6[3], w6[2], selector); - w7[0] = hc_byte_perm (w6[2], w6[1], selector); - w6[3] = hc_byte_perm (w6[1], w6[0], selector); - w6[2] = hc_byte_perm (w6[0], w5[3], selector); - w6[1] = hc_byte_perm (w5[3], w5[2], selector); - w6[0] = hc_byte_perm (w5[2], w5[1], selector); - w5[3] = hc_byte_perm (w5[1], w5[0], selector); - w5[2] = hc_byte_perm (w5[0], w4[3], selector); - w5[1] = hc_byte_perm (w4[3], w4[2], selector); - w5[0] = hc_byte_perm (w4[2], w4[1], selector); - w4[3] = hc_byte_perm (w4[1], w4[0], selector); - w4[2] = hc_byte_perm (w4[0], w3[3], selector); - w4[1] = hc_byte_perm (w3[3], w3[2], selector); - w4[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w7[3] = hc_byte_perm (w7[0], w6[3], selector); - w7[2] = hc_byte_perm (w6[3], w6[2], selector); - w7[1] = hc_byte_perm (w6[2], w6[1], selector); - w7[0] = hc_byte_perm (w6[1], w6[0], selector); - w6[3] = hc_byte_perm (w6[0], w5[3], selector); - w6[2] = hc_byte_perm (w5[3], w5[2], selector); - w6[1] = hc_byte_perm (w5[2], w5[1], selector); - w6[0] = hc_byte_perm (w5[1], w5[0], selector); - w5[3] = hc_byte_perm (w5[0], w4[3], selector); - w5[2] = hc_byte_perm (w4[3], w4[2], selector); - w5[1] = hc_byte_perm (w4[2], w4[1], selector); - w5[0] = hc_byte_perm (w4[1], w4[0], selector); - w4[3] = hc_byte_perm (w4[0], w3[3], selector); - w4[2] = hc_byte_perm (w3[3], w3[2], selector); - w4[1] = hc_byte_perm (w3[2], w3[1], selector); - w4[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w7[3] = hc_byte_perm (w6[3], w6[2], selector); - w7[2] = hc_byte_perm (w6[2], w6[1], selector); - w7[1] = hc_byte_perm (w6[1], w6[0], selector); - w7[0] = hc_byte_perm (w6[0], w5[3], selector); - w6[3] = hc_byte_perm (w5[3], w5[2], selector); - w6[2] = hc_byte_perm (w5[2], w5[1], selector); - w6[1] = hc_byte_perm (w5[1], w5[0], selector); - w6[0] = hc_byte_perm (w5[0], w4[3], selector); - w5[3] = hc_byte_perm (w4[3], w4[2], selector); - w5[2] = hc_byte_perm (w4[2], w4[1], selector); - w5[1] = hc_byte_perm (w4[1], w4[0], selector); - w5[0] = hc_byte_perm (w4[0], w3[3], selector); - w4[3] = hc_byte_perm (w3[3], w3[2], selector); - w4[2] = hc_byte_perm (w3[2], w3[1], selector); - w4[1] = hc_byte_perm (w3[1], w3[0], selector); - w4[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w7[3] = hc_byte_perm (w6[2], w6[1], selector); - w7[2] = hc_byte_perm (w6[1], w6[0], selector); - w7[1] = hc_byte_perm (w6[0], w5[3], selector); - w7[0] = hc_byte_perm (w5[3], w5[2], selector); - w6[3] = hc_byte_perm (w5[2], w5[1], selector); - w6[2] = hc_byte_perm (w5[1], w5[0], selector); - w6[1] = hc_byte_perm (w5[0], w4[3], selector); - w6[0] = hc_byte_perm (w4[3], w4[2], selector); - w5[3] = hc_byte_perm (w4[2], w4[1], selector); - w5[2] = hc_byte_perm (w4[1], w4[0], selector); - w5[1] = hc_byte_perm (w4[0], w3[3], selector); - w5[0] = hc_byte_perm (w3[3], w3[2], selector); - w4[3] = hc_byte_perm (w3[2], w3[1], selector); - w4[2] = hc_byte_perm (w3[1], w3[0], selector); - w4[1] = hc_byte_perm (w3[0], w2[3], selector); - w4[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w7[3] = hc_byte_perm (w6[1], w6[0], selector); - w7[2] = hc_byte_perm (w6[0], w5[3], selector); - w7[1] = hc_byte_perm (w5[3], w5[2], selector); - w7[0] = hc_byte_perm (w5[2], w5[1], selector); - w6[3] = hc_byte_perm (w5[1], w5[0], selector); - w6[2] = hc_byte_perm (w5[0], w4[3], selector); - w6[1] = hc_byte_perm (w4[3], w4[2], selector); - w6[0] = hc_byte_perm (w4[2], w4[1], selector); - w5[3] = hc_byte_perm (w4[1], w4[0], selector); - w5[2] = hc_byte_perm (w4[0], w3[3], selector); - w5[1] = hc_byte_perm (w3[3], w3[2], selector); - w5[0] = hc_byte_perm (w3[2], w3[1], selector); - w4[3] = hc_byte_perm (w3[1], w3[0], selector); - w4[2] = hc_byte_perm (w3[0], w2[3], selector); - w4[1] = hc_byte_perm (w2[3], w2[2], selector); - w4[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w7[3] = hc_byte_perm (w6[0], w5[3], selector); - w7[2] = hc_byte_perm (w5[3], w5[2], selector); - w7[1] = hc_byte_perm (w5[2], w5[1], selector); - w7[0] = hc_byte_perm (w5[1], w5[0], selector); - w6[3] = hc_byte_perm (w5[0], w4[3], selector); - w6[2] = hc_byte_perm (w4[3], w4[2], selector); - w6[1] = hc_byte_perm (w4[2], w4[1], selector); - w6[0] = hc_byte_perm (w4[1], w4[0], selector); - w5[3] = hc_byte_perm (w4[0], w3[3], selector); - w5[2] = hc_byte_perm (w3[3], w3[2], selector); - w5[1] = hc_byte_perm (w3[2], w3[1], selector); - w5[0] = hc_byte_perm (w3[1], w3[0], selector); - w4[3] = hc_byte_perm (w3[0], w2[3], selector); - w4[2] = hc_byte_perm (w2[3], w2[2], selector); - w4[1] = hc_byte_perm (w2[2], w2[1], selector); - w4[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w7[3] = hc_byte_perm (w5[3], w5[2], selector); - w7[2] = hc_byte_perm (w5[2], w5[1], selector); - w7[1] = hc_byte_perm (w5[1], w5[0], selector); - w7[0] = hc_byte_perm (w5[0], w4[3], selector); - w6[3] = hc_byte_perm (w4[3], w4[2], selector); - w6[2] = hc_byte_perm (w4[2], w4[1], selector); - w6[1] = hc_byte_perm (w4[1], w4[0], selector); - w6[0] = hc_byte_perm (w4[0], w3[3], selector); - w5[3] = hc_byte_perm (w3[3], w3[2], selector); - w5[2] = hc_byte_perm (w3[2], w3[1], selector); - w5[1] = hc_byte_perm (w3[1], w3[0], selector); - w5[0] = hc_byte_perm (w3[0], w2[3], selector); - w4[3] = hc_byte_perm (w2[3], w2[2], selector); - w4[2] = hc_byte_perm (w2[2], w2[1], selector); - w4[1] = hc_byte_perm (w2[1], w2[0], selector); - w4[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w7[3] = hc_byte_perm (w5[2], w5[1], selector); - w7[2] = hc_byte_perm (w5[1], w5[0], selector); - w7[1] = hc_byte_perm (w5[0], w4[3], selector); - w7[0] = hc_byte_perm (w4[3], w4[2], selector); - w6[3] = hc_byte_perm (w4[2], w4[1], selector); - w6[2] = hc_byte_perm (w4[1], w4[0], selector); - w6[1] = hc_byte_perm (w4[0], w3[3], selector); - w6[0] = hc_byte_perm (w3[3], w3[2], selector); - w5[3] = hc_byte_perm (w3[2], w3[1], selector); - w5[2] = hc_byte_perm (w3[1], w3[0], selector); - w5[1] = hc_byte_perm (w3[0], w2[3], selector); - w5[0] = hc_byte_perm (w2[3], w2[2], selector); - w4[3] = hc_byte_perm (w2[2], w2[1], selector); - w4[2] = hc_byte_perm (w2[1], w2[0], selector); - w4[1] = hc_byte_perm (w2[0], w1[3], selector); - w4[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w7[3] = hc_byte_perm (w5[1], w5[0], selector); - w7[2] = hc_byte_perm (w5[0], w4[3], selector); - w7[1] = hc_byte_perm (w4[3], w4[2], selector); - w7[0] = hc_byte_perm (w4[2], w4[1], selector); - w6[3] = hc_byte_perm (w4[1], w4[0], selector); - w6[2] = hc_byte_perm (w4[0], w3[3], selector); - w6[1] = hc_byte_perm (w3[3], w3[2], selector); - w6[0] = hc_byte_perm (w3[2], w3[1], selector); - w5[3] = hc_byte_perm (w3[1], w3[0], selector); - w5[2] = hc_byte_perm (w3[0], w2[3], selector); - w5[1] = hc_byte_perm (w2[3], w2[2], selector); - w5[0] = hc_byte_perm (w2[2], w2[1], selector); - w4[3] = hc_byte_perm (w2[1], w2[0], selector); - w4[2] = hc_byte_perm (w2[0], w1[3], selector); - w4[1] = hc_byte_perm (w1[3], w1[2], selector); - w4[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w7[3] = hc_byte_perm (w5[0], w4[3], selector); - w7[2] = hc_byte_perm (w4[3], w4[2], selector); - w7[1] = hc_byte_perm (w4[2], w4[1], selector); - w7[0] = hc_byte_perm (w4[1], w4[0], selector); - w6[3] = hc_byte_perm (w4[0], w3[3], selector); - w6[2] = hc_byte_perm (w3[3], w3[2], selector); - w6[1] = hc_byte_perm (w3[2], w3[1], selector); - w6[0] = hc_byte_perm (w3[1], w3[0], selector); - w5[3] = hc_byte_perm (w3[0], w2[3], selector); - w5[2] = hc_byte_perm (w2[3], w2[2], selector); - w5[1] = hc_byte_perm (w2[2], w2[1], selector); - w5[0] = hc_byte_perm (w2[1], w2[0], selector); - w4[3] = hc_byte_perm (w2[0], w1[3], selector); - w4[2] = hc_byte_perm (w1[3], w1[2], selector); - w4[1] = hc_byte_perm (w1[2], w1[1], selector); - w4[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w7[3] = hc_byte_perm (w4[3], w4[2], selector); - w7[2] = hc_byte_perm (w4[2], w4[1], selector); - w7[1] = hc_byte_perm (w4[1], w4[0], selector); - w7[0] = hc_byte_perm (w4[0], w3[3], selector); - w6[3] = hc_byte_perm (w3[3], w3[2], selector); - w6[2] = hc_byte_perm (w3[2], w3[1], selector); - w6[1] = hc_byte_perm (w3[1], w3[0], selector); - w6[0] = hc_byte_perm (w3[0], w2[3], selector); - w5[3] = hc_byte_perm (w2[3], w2[2], selector); - w5[2] = hc_byte_perm (w2[2], w2[1], selector); - w5[1] = hc_byte_perm (w2[1], w2[0], selector); - w5[0] = hc_byte_perm (w2[0], w1[3], selector); - w4[3] = hc_byte_perm (w1[3], w1[2], selector); - w4[2] = hc_byte_perm (w1[2], w1[1], selector); - w4[1] = hc_byte_perm (w1[1], w1[0], selector); - w4[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w7[3] = hc_byte_perm (w4[2], w4[1], selector); - w7[2] = hc_byte_perm (w4[1], w4[0], selector); - w7[1] = hc_byte_perm (w4[0], w3[3], selector); - w7[0] = hc_byte_perm (w3[3], w3[2], selector); - w6[3] = hc_byte_perm (w3[2], w3[1], selector); - w6[2] = hc_byte_perm (w3[1], w3[0], selector); - w6[1] = hc_byte_perm (w3[0], w2[3], selector); - w6[0] = hc_byte_perm (w2[3], w2[2], selector); - w5[3] = hc_byte_perm (w2[2], w2[1], selector); - w5[2] = hc_byte_perm (w2[1], w2[0], selector); - w5[1] = hc_byte_perm (w2[0], w1[3], selector); - w5[0] = hc_byte_perm (w1[3], w1[2], selector); - w4[3] = hc_byte_perm (w1[2], w1[1], selector); - w4[2] = hc_byte_perm (w1[1], w1[0], selector); - w4[1] = hc_byte_perm (w1[0], w0[3], selector); - w4[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w7[3] = hc_byte_perm (w4[1], w4[0], selector); - w7[2] = hc_byte_perm (w4[0], w3[3], selector); - w7[1] = hc_byte_perm (w3[3], w3[2], selector); - w7[0] = hc_byte_perm (w3[2], w3[1], selector); - w6[3] = hc_byte_perm (w3[1], w3[0], selector); - w6[2] = hc_byte_perm (w3[0], w2[3], selector); - w6[1] = hc_byte_perm (w2[3], w2[2], selector); - w6[0] = hc_byte_perm (w2[2], w2[1], selector); - w5[3] = hc_byte_perm (w2[1], w2[0], selector); - w5[2] = hc_byte_perm (w2[0], w1[3], selector); - w5[1] = hc_byte_perm (w1[3], w1[2], selector); - w5[0] = hc_byte_perm (w1[2], w1[1], selector); - w4[3] = hc_byte_perm (w1[1], w1[0], selector); - w4[2] = hc_byte_perm (w1[0], w0[3], selector); - w4[1] = hc_byte_perm (w0[3], w0[2], selector); - w4[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w7[3] = hc_byte_perm (w4[0], w3[3], selector); - w7[2] = hc_byte_perm (w3[3], w3[2], selector); - w7[1] = hc_byte_perm (w3[2], w3[1], selector); - w7[0] = hc_byte_perm (w3[1], w3[0], selector); - w6[3] = hc_byte_perm (w3[0], w2[3], selector); - w6[2] = hc_byte_perm (w2[3], w2[2], selector); - w6[1] = hc_byte_perm (w2[2], w2[1], selector); - w6[0] = hc_byte_perm (w2[1], w2[0], selector); - w5[3] = hc_byte_perm (w2[0], w1[3], selector); - w5[2] = hc_byte_perm (w1[3], w1[2], selector); - w5[1] = hc_byte_perm (w1[2], w1[1], selector); - w5[0] = hc_byte_perm (w1[1], w1[0], selector); - w4[3] = hc_byte_perm (w1[0], w0[3], selector); - w4[2] = hc_byte_perm (w0[3], w0[2], selector); - w4[1] = hc_byte_perm (w0[2], w0[1], selector); - w4[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - w7[3] = hc_byte_perm (w3[3], w3[2], selector); - w7[2] = hc_byte_perm (w3[2], w3[1], selector); - w7[1] = hc_byte_perm (w3[1], w3[0], selector); - w7[0] = hc_byte_perm (w3[0], w2[3], selector); - w6[3] = hc_byte_perm (w2[3], w2[2], selector); - w6[2] = hc_byte_perm (w2[2], w2[1], selector); - w6[1] = hc_byte_perm (w2[1], w2[0], selector); - w6[0] = hc_byte_perm (w2[0], w1[3], selector); - w5[3] = hc_byte_perm (w1[3], w1[2], selector); - w5[2] = hc_byte_perm (w1[2], w1[1], selector); - w5[1] = hc_byte_perm (w1[1], w1[0], selector); - w5[0] = hc_byte_perm (w1[0], w0[3], selector); - w4[3] = hc_byte_perm (w0[3], w0[2], selector); - w4[2] = hc_byte_perm (w0[2], w0[1], selector); - w4[1] = hc_byte_perm (w0[1], w0[0], selector); - w4[0] = hc_byte_perm (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm (w3[2], w3[1], selector); - w7[2] = hc_byte_perm (w3[1], w3[0], selector); - w7[1] = hc_byte_perm (w3[0], w2[3], selector); - w7[0] = hc_byte_perm (w2[3], w2[2], selector); - w6[3] = hc_byte_perm (w2[2], w2[1], selector); - w6[2] = hc_byte_perm (w2[1], w2[0], selector); - w6[1] = hc_byte_perm (w2[0], w1[3], selector); - w6[0] = hc_byte_perm (w1[3], w1[2], selector); - w5[3] = hc_byte_perm (w1[2], w1[1], selector); - w5[2] = hc_byte_perm (w1[1], w1[0], selector); - w5[1] = hc_byte_perm (w1[0], w0[3], selector); - w5[0] = hc_byte_perm (w0[3], w0[2], selector); - w4[3] = hc_byte_perm (w0[2], w0[1], selector); - w4[2] = hc_byte_perm (w0[1], w0[0], selector); - w4[1] = hc_byte_perm (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm (w3[1], w3[0], selector); - w7[2] = hc_byte_perm (w3[0], w2[3], selector); - w7[1] = hc_byte_perm (w2[3], w2[2], selector); - w7[0] = hc_byte_perm (w2[2], w2[1], selector); - w6[3] = hc_byte_perm (w2[1], w2[0], selector); - w6[2] = hc_byte_perm (w2[0], w1[3], selector); - w6[1] = hc_byte_perm (w1[3], w1[2], selector); - w6[0] = hc_byte_perm (w1[2], w1[1], selector); - w5[3] = hc_byte_perm (w1[1], w1[0], selector); - w5[2] = hc_byte_perm (w1[0], w0[3], selector); - w5[1] = hc_byte_perm (w0[3], w0[2], selector); - w5[0] = hc_byte_perm (w0[2], w0[1], selector); - w4[3] = hc_byte_perm (w0[1], w0[0], selector); - w4[2] = hc_byte_perm (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm (w3[0], w2[3], selector); - w7[2] = hc_byte_perm (w2[3], w2[2], selector); - w7[1] = hc_byte_perm (w2[2], w2[1], selector); - w7[0] = hc_byte_perm (w2[1], w2[0], selector); - w6[3] = hc_byte_perm (w2[0], w1[3], selector); - w6[2] = hc_byte_perm (w1[3], w1[2], selector); - w6[1] = hc_byte_perm (w1[2], w1[1], selector); - w6[0] = hc_byte_perm (w1[1], w1[0], selector); - w5[3] = hc_byte_perm (w1[0], w0[3], selector); - w5[2] = hc_byte_perm (w0[3], w0[2], selector); - w5[1] = hc_byte_perm (w0[2], w0[1], selector); - w5[0] = hc_byte_perm (w0[1], w0[0], selector); - w4[3] = hc_byte_perm (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm (w2[3], w2[2], selector); - w7[2] = hc_byte_perm (w2[2], w2[1], selector); - w7[1] = hc_byte_perm (w2[1], w2[0], selector); - w7[0] = hc_byte_perm (w2[0], w1[3], selector); - w6[3] = hc_byte_perm (w1[3], w1[2], selector); - w6[2] = hc_byte_perm (w1[2], w1[1], selector); - w6[1] = hc_byte_perm (w1[1], w1[0], selector); - w6[0] = hc_byte_perm (w1[0], w0[3], selector); - w5[3] = hc_byte_perm (w0[3], w0[2], selector); - w5[2] = hc_byte_perm (w0[2], w0[1], selector); - w5[1] = hc_byte_perm (w0[1], w0[0], selector); - w5[0] = hc_byte_perm (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm (w2[2], w2[1], selector); - w7[2] = hc_byte_perm (w2[1], w2[0], selector); - w7[1] = hc_byte_perm (w2[0], w1[3], selector); - w7[0] = hc_byte_perm (w1[3], w1[2], selector); - w6[3] = hc_byte_perm (w1[2], w1[1], selector); - w6[2] = hc_byte_perm (w1[1], w1[0], selector); - w6[1] = hc_byte_perm (w1[0], w0[3], selector); - w6[0] = hc_byte_perm (w0[3], w0[2], selector); - w5[3] = hc_byte_perm (w0[2], w0[1], selector); - w5[2] = hc_byte_perm (w0[1], w0[0], selector); - w5[1] = hc_byte_perm (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm (w2[1], w2[0], selector); - w7[2] = hc_byte_perm (w2[0], w1[3], selector); - w7[1] = hc_byte_perm (w1[3], w1[2], selector); - w7[0] = hc_byte_perm (w1[2], w1[1], selector); - w6[3] = hc_byte_perm (w1[1], w1[0], selector); - w6[2] = hc_byte_perm (w1[0], w0[3], selector); - w6[1] = hc_byte_perm (w0[3], w0[2], selector); - w6[0] = hc_byte_perm (w0[2], w0[1], selector); - w5[3] = hc_byte_perm (w0[1], w0[0], selector); - w5[2] = hc_byte_perm (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm (w2[0], w1[3], selector); - w7[2] = hc_byte_perm (w1[3], w1[2], selector); - w7[1] = hc_byte_perm (w1[2], w1[1], selector); - w7[0] = hc_byte_perm (w1[1], w1[0], selector); - w6[3] = hc_byte_perm (w1[0], w0[3], selector); - w6[2] = hc_byte_perm (w0[3], w0[2], selector); - w6[1] = hc_byte_perm (w0[2], w0[1], selector); - w6[0] = hc_byte_perm (w0[1], w0[0], selector); - w5[3] = hc_byte_perm (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm (w1[3], w1[2], selector); - w7[2] = hc_byte_perm (w1[2], w1[1], selector); - w7[1] = hc_byte_perm (w1[1], w1[0], selector); - w7[0] = hc_byte_perm (w1[0], w0[3], selector); - w6[3] = hc_byte_perm (w0[3], w0[2], selector); - w6[2] = hc_byte_perm (w0[2], w0[1], selector); - w6[1] = hc_byte_perm (w0[1], w0[0], selector); - w6[0] = hc_byte_perm (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm (w1[2], w1[1], selector); - w7[2] = hc_byte_perm (w1[1], w1[0], selector); - w7[1] = hc_byte_perm (w1[0], w0[3], selector); - w7[0] = hc_byte_perm (w0[3], w0[2], selector); - w6[3] = hc_byte_perm (w0[2], w0[1], selector); - w6[2] = hc_byte_perm (w0[1], w0[0], selector); - w6[1] = hc_byte_perm (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm (w1[1], w1[0], selector); - w7[2] = hc_byte_perm (w1[0], w0[3], selector); - w7[1] = hc_byte_perm (w0[3], w0[2], selector); - w7[0] = hc_byte_perm (w0[2], w0[1], selector); - w6[3] = hc_byte_perm (w0[1], w0[0], selector); - w6[2] = hc_byte_perm (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm (w1[0], w0[3], selector); - w7[2] = hc_byte_perm (w0[3], w0[2], selector); - w7[1] = hc_byte_perm (w0[2], w0[1], selector); - w7[0] = hc_byte_perm (w0[1], w0[0], selector); - w6[3] = hc_byte_perm (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm (w0[3], w0[2], selector); - w7[2] = hc_byte_perm (w0[2], w0[1], selector); - w7[1] = hc_byte_perm (w0[1], w0[0], selector); - w7[0] = hc_byte_perm (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm (w0[2], w0[1], selector); - w7[2] = hc_byte_perm (w0[1], w0[0], selector); - w7[1] = hc_byte_perm (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm (w0[1], w0[0], selector); - w7[2] = hc_byte_perm (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, PRIVATE_AS u32x *w4, PRIVATE_AS u32x *w5, PRIVATE_AS u32x *w6, PRIVATE_AS u32x *w7, PRIVATE_AS u32x *c0, PRIVATE_AS u32x *c1, PRIVATE_AS u32x *c2, PRIVATE_AS u32x *c3, PRIVATE_AS u32x *c4, PRIVATE_AS u32x *c5, PRIVATE_AS u32x *c6, PRIVATE_AS u32x *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -15742,1708 +10798,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be (PRIVATE_AS u32x *w0, PRIVATE break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm ( 0, w7[3], selector); - w7[3] = hc_byte_perm (w7[3], w7[2], selector); - w7[2] = hc_byte_perm (w7[2], w7[1], selector); - w7[1] = hc_byte_perm (w7[1], w7[0], selector); - w7[0] = hc_byte_perm (w7[0], w6[3], selector); - w6[3] = hc_byte_perm (w6[3], w6[2], selector); - w6[2] = hc_byte_perm (w6[2], w6[1], selector); - w6[1] = hc_byte_perm (w6[1], w6[0], selector); - w6[0] = hc_byte_perm (w6[0], w5[3], selector); - w5[3] = hc_byte_perm (w5[3], w5[2], selector); - w5[2] = hc_byte_perm (w5[2], w5[1], selector); - w5[1] = hc_byte_perm (w5[1], w5[0], selector); - w5[0] = hc_byte_perm (w5[0], w4[3], selector); - w4[3] = hc_byte_perm (w4[3], w4[2], selector); - w4[2] = hc_byte_perm (w4[2], w4[1], selector); - w4[1] = hc_byte_perm (w4[1], w4[0], selector); - w4[0] = hc_byte_perm (w4[0], w3[3], selector); - w3[3] = hc_byte_perm (w3[3], w3[2], selector); - w3[2] = hc_byte_perm (w3[2], w3[1], selector); - w3[1] = hc_byte_perm (w3[1], w3[0], selector); - w3[0] = hc_byte_perm (w3[0], w2[3], selector); - w2[3] = hc_byte_perm (w2[3], w2[2], selector); - w2[2] = hc_byte_perm (w2[2], w2[1], selector); - w2[1] = hc_byte_perm (w2[1], w2[0], selector); - w2[0] = hc_byte_perm (w2[0], w1[3], selector); - w1[3] = hc_byte_perm (w1[3], w1[2], selector); - w1[2] = hc_byte_perm (w1[2], w1[1], selector); - w1[1] = hc_byte_perm (w1[1], w1[0], selector); - w1[0] = hc_byte_perm (w1[0], w0[3], selector); - w0[3] = hc_byte_perm (w0[3], w0[2], selector); - w0[2] = hc_byte_perm (w0[2], w0[1], selector); - w0[1] = hc_byte_perm (w0[1], w0[0], selector); - w0[0] = hc_byte_perm (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm ( 0, w7[3], selector); - c0[0] = hc_byte_perm (w7[3], w7[2], selector); - w7[3] = hc_byte_perm (w7[2], w7[1], selector); - w7[2] = hc_byte_perm (w7[1], w7[0], selector); - w7[1] = hc_byte_perm (w7[0], w6[3], selector); - w7[0] = hc_byte_perm (w6[3], w6[2], selector); - w6[3] = hc_byte_perm (w6[2], w6[1], selector); - w6[2] = hc_byte_perm (w6[1], w6[0], selector); - w6[1] = hc_byte_perm (w6[0], w5[3], selector); - w6[0] = hc_byte_perm (w5[3], w5[2], selector); - w5[3] = hc_byte_perm (w5[2], w5[1], selector); - w5[2] = hc_byte_perm (w5[1], w5[0], selector); - w5[1] = hc_byte_perm (w5[0], w4[3], selector); - w5[0] = hc_byte_perm (w4[3], w4[2], selector); - w4[3] = hc_byte_perm (w4[2], w4[1], selector); - w4[2] = hc_byte_perm (w4[1], w4[0], selector); - w4[1] = hc_byte_perm (w4[0], w3[3], selector); - w4[0] = hc_byte_perm (w3[3], w3[2], selector); - w3[3] = hc_byte_perm (w3[2], w3[1], selector); - w3[2] = hc_byte_perm (w3[1], w3[0], selector); - w3[1] = hc_byte_perm (w3[0], w2[3], selector); - w3[0] = hc_byte_perm (w2[3], w2[2], selector); - w2[3] = hc_byte_perm (w2[2], w2[1], selector); - w2[2] = hc_byte_perm (w2[1], w2[0], selector); - w2[1] = hc_byte_perm (w2[0], w1[3], selector); - w2[0] = hc_byte_perm (w1[3], w1[2], selector); - w1[3] = hc_byte_perm (w1[2], w1[1], selector); - w1[2] = hc_byte_perm (w1[1], w1[0], selector); - w1[1] = hc_byte_perm (w1[0], w0[3], selector); - w1[0] = hc_byte_perm (w0[3], w0[2], selector); - w0[3] = hc_byte_perm (w0[2], w0[1], selector); - w0[2] = hc_byte_perm (w0[1], w0[0], selector); - w0[1] = hc_byte_perm (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm ( 0, w7[3], selector); - c0[1] = hc_byte_perm (w7[3], w7[2], selector); - c0[0] = hc_byte_perm (w7[2], w7[1], selector); - w7[3] = hc_byte_perm (w7[1], w7[0], selector); - w7[2] = hc_byte_perm (w7[0], w6[3], selector); - w7[1] = hc_byte_perm (w6[3], w6[2], selector); - w7[0] = hc_byte_perm (w6[2], w6[1], selector); - w6[3] = hc_byte_perm (w6[1], w6[0], selector); - w6[2] = hc_byte_perm (w6[0], w5[3], selector); - w6[1] = hc_byte_perm (w5[3], w5[2], selector); - w6[0] = hc_byte_perm (w5[2], w5[1], selector); - w5[3] = hc_byte_perm (w5[1], w5[0], selector); - w5[2] = hc_byte_perm (w5[0], w4[3], selector); - w5[1] = hc_byte_perm (w4[3], w4[2], selector); - w5[0] = hc_byte_perm (w4[2], w4[1], selector); - w4[3] = hc_byte_perm (w4[1], w4[0], selector); - w4[2] = hc_byte_perm (w4[0], w3[3], selector); - w4[1] = hc_byte_perm (w3[3], w3[2], selector); - w4[0] = hc_byte_perm (w3[2], w3[1], selector); - w3[3] = hc_byte_perm (w3[1], w3[0], selector); - w3[2] = hc_byte_perm (w3[0], w2[3], selector); - w3[1] = hc_byte_perm (w2[3], w2[2], selector); - w3[0] = hc_byte_perm (w2[2], w2[1], selector); - w2[3] = hc_byte_perm (w2[1], w2[0], selector); - w2[2] = hc_byte_perm (w2[0], w1[3], selector); - w2[1] = hc_byte_perm (w1[3], w1[2], selector); - w2[0] = hc_byte_perm (w1[2], w1[1], selector); - w1[3] = hc_byte_perm (w1[1], w1[0], selector); - w1[2] = hc_byte_perm (w1[0], w0[3], selector); - w1[1] = hc_byte_perm (w0[3], w0[2], selector); - w1[0] = hc_byte_perm (w0[2], w0[1], selector); - w0[3] = hc_byte_perm (w0[1], w0[0], selector); - w0[2] = hc_byte_perm (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm ( 0, w7[3], selector); - c0[2] = hc_byte_perm (w7[3], w7[2], selector); - c0[1] = hc_byte_perm (w7[2], w7[1], selector); - c0[0] = hc_byte_perm (w7[1], w7[0], selector); - w7[3] = hc_byte_perm (w7[0], w6[3], selector); - w7[2] = hc_byte_perm (w6[3], w6[2], selector); - w7[1] = hc_byte_perm (w6[2], w6[1], selector); - w7[0] = hc_byte_perm (w6[1], w6[0], selector); - w6[3] = hc_byte_perm (w6[0], w5[3], selector); - w6[2] = hc_byte_perm (w5[3], w5[2], selector); - w6[1] = hc_byte_perm (w5[2], w5[1], selector); - w6[0] = hc_byte_perm (w5[1], w5[0], selector); - w5[3] = hc_byte_perm (w5[0], w4[3], selector); - w5[2] = hc_byte_perm (w4[3], w4[2], selector); - w5[1] = hc_byte_perm (w4[2], w4[1], selector); - w5[0] = hc_byte_perm (w4[1], w4[0], selector); - w4[3] = hc_byte_perm (w4[0], w3[3], selector); - w4[2] = hc_byte_perm (w3[3], w3[2], selector); - w4[1] = hc_byte_perm (w3[2], w3[1], selector); - w4[0] = hc_byte_perm (w3[1], w3[0], selector); - w3[3] = hc_byte_perm (w3[0], w2[3], selector); - w3[2] = hc_byte_perm (w2[3], w2[2], selector); - w3[1] = hc_byte_perm (w2[2], w2[1], selector); - w3[0] = hc_byte_perm (w2[1], w2[0], selector); - w2[3] = hc_byte_perm (w2[0], w1[3], selector); - w2[2] = hc_byte_perm (w1[3], w1[2], selector); - w2[1] = hc_byte_perm (w1[2], w1[1], selector); - w2[0] = hc_byte_perm (w1[1], w1[0], selector); - w1[3] = hc_byte_perm (w1[0], w0[3], selector); - w1[2] = hc_byte_perm (w0[3], w0[2], selector); - w1[1] = hc_byte_perm (w0[2], w0[1], selector); - w1[0] = hc_byte_perm (w0[1], w0[0], selector); - w0[3] = hc_byte_perm (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm ( 0, w7[3], selector); - c0[3] = hc_byte_perm (w7[3], w7[2], selector); - c0[2] = hc_byte_perm (w7[2], w7[1], selector); - c0[1] = hc_byte_perm (w7[1], w7[0], selector); - c0[0] = hc_byte_perm (w7[0], w6[3], selector); - w7[3] = hc_byte_perm (w6[3], w6[2], selector); - w7[2] = hc_byte_perm (w6[2], w6[1], selector); - w7[1] = hc_byte_perm (w6[1], w6[0], selector); - w7[0] = hc_byte_perm (w6[0], w5[3], selector); - w6[3] = hc_byte_perm (w5[3], w5[2], selector); - w6[2] = hc_byte_perm (w5[2], w5[1], selector); - w6[1] = hc_byte_perm (w5[1], w5[0], selector); - w6[0] = hc_byte_perm (w5[0], w4[3], selector); - w5[3] = hc_byte_perm (w4[3], w4[2], selector); - w5[2] = hc_byte_perm (w4[2], w4[1], selector); - w5[1] = hc_byte_perm (w4[1], w4[0], selector); - w5[0] = hc_byte_perm (w4[0], w3[3], selector); - w4[3] = hc_byte_perm (w3[3], w3[2], selector); - w4[2] = hc_byte_perm (w3[2], w3[1], selector); - w4[1] = hc_byte_perm (w3[1], w3[0], selector); - w4[0] = hc_byte_perm (w3[0], w2[3], selector); - w3[3] = hc_byte_perm (w2[3], w2[2], selector); - w3[2] = hc_byte_perm (w2[2], w2[1], selector); - w3[1] = hc_byte_perm (w2[1], w2[0], selector); - w3[0] = hc_byte_perm (w2[0], w1[3], selector); - w2[3] = hc_byte_perm (w1[3], w1[2], selector); - w2[2] = hc_byte_perm (w1[2], w1[1], selector); - w2[1] = hc_byte_perm (w1[1], w1[0], selector); - w2[0] = hc_byte_perm (w1[0], w0[3], selector); - w1[3] = hc_byte_perm (w0[3], w0[2], selector); - w1[2] = hc_byte_perm (w0[2], w0[1], selector); - w1[1] = hc_byte_perm (w0[1], w0[0], selector); - w1[0] = hc_byte_perm (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm ( 0, w7[3], selector); - c1[0] = hc_byte_perm (w7[3], w7[2], selector); - c0[3] = hc_byte_perm (w7[2], w7[1], selector); - c0[2] = hc_byte_perm (w7[1], w7[0], selector); - c0[1] = hc_byte_perm (w7[0], w6[3], selector); - c0[0] = hc_byte_perm (w6[3], w6[2], selector); - w7[3] = hc_byte_perm (w6[2], w6[1], selector); - w7[2] = hc_byte_perm (w6[1], w6[0], selector); - w7[1] = hc_byte_perm (w6[0], w5[3], selector); - w7[0] = hc_byte_perm (w5[3], w5[2], selector); - w6[3] = hc_byte_perm (w5[2], w5[1], selector); - w6[2] = hc_byte_perm (w5[1], w5[0], selector); - w6[1] = hc_byte_perm (w5[0], w4[3], selector); - w6[0] = hc_byte_perm (w4[3], w4[2], selector); - w5[3] = hc_byte_perm (w4[2], w4[1], selector); - w5[2] = hc_byte_perm (w4[1], w4[0], selector); - w5[1] = hc_byte_perm (w4[0], w3[3], selector); - w5[0] = hc_byte_perm (w3[3], w3[2], selector); - w4[3] = hc_byte_perm (w3[2], w3[1], selector); - w4[2] = hc_byte_perm (w3[1], w3[0], selector); - w4[1] = hc_byte_perm (w3[0], w2[3], selector); - w4[0] = hc_byte_perm (w2[3], w2[2], selector); - w3[3] = hc_byte_perm (w2[2], w2[1], selector); - w3[2] = hc_byte_perm (w2[1], w2[0], selector); - w3[1] = hc_byte_perm (w2[0], w1[3], selector); - w3[0] = hc_byte_perm (w1[3], w1[2], selector); - w2[3] = hc_byte_perm (w1[2], w1[1], selector); - w2[2] = hc_byte_perm (w1[1], w1[0], selector); - w2[1] = hc_byte_perm (w1[0], w0[3], selector); - w2[0] = hc_byte_perm (w0[3], w0[2], selector); - w1[3] = hc_byte_perm (w0[2], w0[1], selector); - w1[2] = hc_byte_perm (w0[1], w0[0], selector); - w1[1] = hc_byte_perm (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm ( 0, w7[3], selector); - c1[1] = hc_byte_perm (w7[3], w7[2], selector); - c1[0] = hc_byte_perm (w7[2], w7[1], selector); - c0[3] = hc_byte_perm (w7[1], w7[0], selector); - c0[2] = hc_byte_perm (w7[0], w6[3], selector); - c0[1] = hc_byte_perm (w6[3], w6[2], selector); - c0[0] = hc_byte_perm (w6[2], w6[1], selector); - w7[3] = hc_byte_perm (w6[1], w6[0], selector); - w7[2] = hc_byte_perm (w6[0], w5[3], selector); - w7[1] = hc_byte_perm (w5[3], w5[2], selector); - w7[0] = hc_byte_perm (w5[2], w5[1], selector); - w6[3] = hc_byte_perm (w5[1], w5[0], selector); - w6[2] = hc_byte_perm (w5[0], w4[3], selector); - w6[1] = hc_byte_perm (w4[3], w4[2], selector); - w6[0] = hc_byte_perm (w4[2], w4[1], selector); - w5[3] = hc_byte_perm (w4[1], w4[0], selector); - w5[2] = hc_byte_perm (w4[0], w3[3], selector); - w5[1] = hc_byte_perm (w3[3], w3[2], selector); - w5[0] = hc_byte_perm (w3[2], w3[1], selector); - w4[3] = hc_byte_perm (w3[1], w3[0], selector); - w4[2] = hc_byte_perm (w3[0], w2[3], selector); - w4[1] = hc_byte_perm (w2[3], w2[2], selector); - w4[0] = hc_byte_perm (w2[2], w2[1], selector); - w3[3] = hc_byte_perm (w2[1], w2[0], selector); - w3[2] = hc_byte_perm (w2[0], w1[3], selector); - w3[1] = hc_byte_perm (w1[3], w1[2], selector); - w3[0] = hc_byte_perm (w1[2], w1[1], selector); - w2[3] = hc_byte_perm (w1[1], w1[0], selector); - w2[2] = hc_byte_perm (w1[0], w0[3], selector); - w2[1] = hc_byte_perm (w0[3], w0[2], selector); - w2[0] = hc_byte_perm (w0[2], w0[1], selector); - w1[3] = hc_byte_perm (w0[1], w0[0], selector); - w1[2] = hc_byte_perm (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm ( 0, w7[3], selector); - c1[2] = hc_byte_perm (w7[3], w7[2], selector); - c1[1] = hc_byte_perm (w7[2], w7[1], selector); - c1[0] = hc_byte_perm (w7[1], w7[0], selector); - c0[3] = hc_byte_perm (w7[0], w6[3], selector); - c0[2] = hc_byte_perm (w6[3], w6[2], selector); - c0[1] = hc_byte_perm (w6[2], w6[1], selector); - c0[0] = hc_byte_perm (w6[1], w6[0], selector); - w7[3] = hc_byte_perm (w6[0], w5[3], selector); - w7[2] = hc_byte_perm (w5[3], w5[2], selector); - w7[1] = hc_byte_perm (w5[2], w5[1], selector); - w7[0] = hc_byte_perm (w5[1], w5[0], selector); - w6[3] = hc_byte_perm (w5[0], w4[3], selector); - w6[2] = hc_byte_perm (w4[3], w4[2], selector); - w6[1] = hc_byte_perm (w4[2], w4[1], selector); - w6[0] = hc_byte_perm (w4[1], w4[0], selector); - w5[3] = hc_byte_perm (w4[0], w3[3], selector); - w5[2] = hc_byte_perm (w3[3], w3[2], selector); - w5[1] = hc_byte_perm (w3[2], w3[1], selector); - w5[0] = hc_byte_perm (w3[1], w3[0], selector); - w4[3] = hc_byte_perm (w3[0], w2[3], selector); - w4[2] = hc_byte_perm (w2[3], w2[2], selector); - w4[1] = hc_byte_perm (w2[2], w2[1], selector); - w4[0] = hc_byte_perm (w2[1], w2[0], selector); - w3[3] = hc_byte_perm (w2[0], w1[3], selector); - w3[2] = hc_byte_perm (w1[3], w1[2], selector); - w3[1] = hc_byte_perm (w1[2], w1[1], selector); - w3[0] = hc_byte_perm (w1[1], w1[0], selector); - w2[3] = hc_byte_perm (w1[0], w0[3], selector); - w2[2] = hc_byte_perm (w0[3], w0[2], selector); - w2[1] = hc_byte_perm (w0[2], w0[1], selector); - w2[0] = hc_byte_perm (w0[1], w0[0], selector); - w1[3] = hc_byte_perm (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm ( 0, w7[3], selector); - c1[3] = hc_byte_perm (w7[3], w7[2], selector); - c1[2] = hc_byte_perm (w7[2], w7[1], selector); - c1[1] = hc_byte_perm (w7[1], w7[0], selector); - c1[0] = hc_byte_perm (w7[0], w6[3], selector); - c0[3] = hc_byte_perm (w6[3], w6[2], selector); - c0[2] = hc_byte_perm (w6[2], w6[1], selector); - c0[1] = hc_byte_perm (w6[1], w6[0], selector); - c0[0] = hc_byte_perm (w6[0], w5[3], selector); - w7[3] = hc_byte_perm (w5[3], w5[2], selector); - w7[2] = hc_byte_perm (w5[2], w5[1], selector); - w7[1] = hc_byte_perm (w5[1], w5[0], selector); - w7[0] = hc_byte_perm (w5[0], w4[3], selector); - w6[3] = hc_byte_perm (w4[3], w4[2], selector); - w6[2] = hc_byte_perm (w4[2], w4[1], selector); - w6[1] = hc_byte_perm (w4[1], w4[0], selector); - w6[0] = hc_byte_perm (w4[0], w3[3], selector); - w5[3] = hc_byte_perm (w3[3], w3[2], selector); - w5[2] = hc_byte_perm (w3[2], w3[1], selector); - w5[1] = hc_byte_perm (w3[1], w3[0], selector); - w5[0] = hc_byte_perm (w3[0], w2[3], selector); - w4[3] = hc_byte_perm (w2[3], w2[2], selector); - w4[2] = hc_byte_perm (w2[2], w2[1], selector); - w4[1] = hc_byte_perm (w2[1], w2[0], selector); - w4[0] = hc_byte_perm (w2[0], w1[3], selector); - w3[3] = hc_byte_perm (w1[3], w1[2], selector); - w3[2] = hc_byte_perm (w1[2], w1[1], selector); - w3[1] = hc_byte_perm (w1[1], w1[0], selector); - w3[0] = hc_byte_perm (w1[0], w0[3], selector); - w2[3] = hc_byte_perm (w0[3], w0[2], selector); - w2[2] = hc_byte_perm (w0[2], w0[1], selector); - w2[1] = hc_byte_perm (w0[1], w0[0], selector); - w2[0] = hc_byte_perm (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm ( 0, w7[3], selector); - c2[0] = hc_byte_perm (w7[3], w7[2], selector); - c1[3] = hc_byte_perm (w7[2], w7[1], selector); - c1[2] = hc_byte_perm (w7[1], w7[0], selector); - c1[1] = hc_byte_perm (w7[0], w6[3], selector); - c1[0] = hc_byte_perm (w6[3], w6[2], selector); - c0[3] = hc_byte_perm (w6[2], w6[1], selector); - c0[2] = hc_byte_perm (w6[1], w6[0], selector); - c0[1] = hc_byte_perm (w6[0], w5[3], selector); - c0[0] = hc_byte_perm (w5[3], w5[2], selector); - w7[3] = hc_byte_perm (w5[2], w5[1], selector); - w7[2] = hc_byte_perm (w5[1], w5[0], selector); - w7[1] = hc_byte_perm (w5[0], w4[3], selector); - w7[0] = hc_byte_perm (w4[3], w4[2], selector); - w6[3] = hc_byte_perm (w4[2], w4[1], selector); - w6[2] = hc_byte_perm (w4[1], w4[0], selector); - w6[1] = hc_byte_perm (w4[0], w3[3], selector); - w6[0] = hc_byte_perm (w3[3], w3[2], selector); - w5[3] = hc_byte_perm (w3[2], w3[1], selector); - w5[2] = hc_byte_perm (w3[1], w3[0], selector); - w5[1] = hc_byte_perm (w3[0], w2[3], selector); - w5[0] = hc_byte_perm (w2[3], w2[2], selector); - w4[3] = hc_byte_perm (w2[2], w2[1], selector); - w4[2] = hc_byte_perm (w2[1], w2[0], selector); - w4[1] = hc_byte_perm (w2[0], w1[3], selector); - w4[0] = hc_byte_perm (w1[3], w1[2], selector); - w3[3] = hc_byte_perm (w1[2], w1[1], selector); - w3[2] = hc_byte_perm (w1[1], w1[0], selector); - w3[1] = hc_byte_perm (w1[0], w0[3], selector); - w3[0] = hc_byte_perm (w0[3], w0[2], selector); - w2[3] = hc_byte_perm (w0[2], w0[1], selector); - w2[2] = hc_byte_perm (w0[1], w0[0], selector); - w2[1] = hc_byte_perm (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm ( 0, w7[3], selector); - c2[1] = hc_byte_perm (w7[3], w7[2], selector); - c2[0] = hc_byte_perm (w7[2], w7[1], selector); - c1[3] = hc_byte_perm (w7[1], w7[0], selector); - c1[2] = hc_byte_perm (w7[0], w6[3], selector); - c1[1] = hc_byte_perm (w6[3], w6[2], selector); - c1[0] = hc_byte_perm (w6[2], w6[1], selector); - c0[3] = hc_byte_perm (w6[1], w6[0], selector); - c0[2] = hc_byte_perm (w6[0], w5[3], selector); - c0[1] = hc_byte_perm (w5[3], w5[2], selector); - c0[0] = hc_byte_perm (w5[2], w5[1], selector); - w7[3] = hc_byte_perm (w5[1], w5[0], selector); - w7[2] = hc_byte_perm (w5[0], w4[3], selector); - w7[1] = hc_byte_perm (w4[3], w4[2], selector); - w7[0] = hc_byte_perm (w4[2], w4[1], selector); - w6[3] = hc_byte_perm (w4[1], w4[0], selector); - w6[2] = hc_byte_perm (w4[0], w3[3], selector); - w6[1] = hc_byte_perm (w3[3], w3[2], selector); - w6[0] = hc_byte_perm (w3[2], w3[1], selector); - w5[3] = hc_byte_perm (w3[1], w3[0], selector); - w5[2] = hc_byte_perm (w3[0], w2[3], selector); - w5[1] = hc_byte_perm (w2[3], w2[2], selector); - w5[0] = hc_byte_perm (w2[2], w2[1], selector); - w4[3] = hc_byte_perm (w2[1], w2[0], selector); - w4[2] = hc_byte_perm (w2[0], w1[3], selector); - w4[1] = hc_byte_perm (w1[3], w1[2], selector); - w4[0] = hc_byte_perm (w1[2], w1[1], selector); - w3[3] = hc_byte_perm (w1[1], w1[0], selector); - w3[2] = hc_byte_perm (w1[0], w0[3], selector); - w3[1] = hc_byte_perm (w0[3], w0[2], selector); - w3[0] = hc_byte_perm (w0[2], w0[1], selector); - w2[3] = hc_byte_perm (w0[1], w0[0], selector); - w2[2] = hc_byte_perm (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm ( 0, w7[3], selector); - c2[2] = hc_byte_perm (w7[3], w7[2], selector); - c2[1] = hc_byte_perm (w7[2], w7[1], selector); - c2[0] = hc_byte_perm (w7[1], w7[0], selector); - c1[3] = hc_byte_perm (w7[0], w6[3], selector); - c1[2] = hc_byte_perm (w6[3], w6[2], selector); - c1[1] = hc_byte_perm (w6[2], w6[1], selector); - c1[0] = hc_byte_perm (w6[1], w6[0], selector); - c0[3] = hc_byte_perm (w6[0], w5[3], selector); - c0[2] = hc_byte_perm (w5[3], w5[2], selector); - c0[1] = hc_byte_perm (w5[2], w5[1], selector); - c0[0] = hc_byte_perm (w5[1], w5[0], selector); - w7[3] = hc_byte_perm (w5[0], w4[3], selector); - w7[2] = hc_byte_perm (w4[3], w4[2], selector); - w7[1] = hc_byte_perm (w4[2], w4[1], selector); - w7[0] = hc_byte_perm (w4[1], w4[0], selector); - w6[3] = hc_byte_perm (w4[0], w3[3], selector); - w6[2] = hc_byte_perm (w3[3], w3[2], selector); - w6[1] = hc_byte_perm (w3[2], w3[1], selector); - w6[0] = hc_byte_perm (w3[1], w3[0], selector); - w5[3] = hc_byte_perm (w3[0], w2[3], selector); - w5[2] = hc_byte_perm (w2[3], w2[2], selector); - w5[1] = hc_byte_perm (w2[2], w2[1], selector); - w5[0] = hc_byte_perm (w2[1], w2[0], selector); - w4[3] = hc_byte_perm (w2[0], w1[3], selector); - w4[2] = hc_byte_perm (w1[3], w1[2], selector); - w4[1] = hc_byte_perm (w1[2], w1[1], selector); - w4[0] = hc_byte_perm (w1[1], w1[0], selector); - w3[3] = hc_byte_perm (w1[0], w0[3], selector); - w3[2] = hc_byte_perm (w0[3], w0[2], selector); - w3[1] = hc_byte_perm (w0[2], w0[1], selector); - w3[0] = hc_byte_perm (w0[1], w0[0], selector); - w2[3] = hc_byte_perm (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm ( 0, w7[3], selector); - c2[3] = hc_byte_perm (w7[3], w7[2], selector); - c2[2] = hc_byte_perm (w7[2], w7[1], selector); - c2[1] = hc_byte_perm (w7[1], w7[0], selector); - c2[0] = hc_byte_perm (w7[0], w6[3], selector); - c1[3] = hc_byte_perm (w6[3], w6[2], selector); - c1[2] = hc_byte_perm (w6[2], w6[1], selector); - c1[1] = hc_byte_perm (w6[1], w6[0], selector); - c1[0] = hc_byte_perm (w6[0], w5[3], selector); - c0[3] = hc_byte_perm (w5[3], w5[2], selector); - c0[2] = hc_byte_perm (w5[2], w5[1], selector); - c0[1] = hc_byte_perm (w5[1], w5[0], selector); - c0[0] = hc_byte_perm (w5[0], w4[3], selector); - w7[3] = hc_byte_perm (w4[3], w4[2], selector); - w7[2] = hc_byte_perm (w4[2], w4[1], selector); - w7[1] = hc_byte_perm (w4[1], w4[0], selector); - w7[0] = hc_byte_perm (w4[0], w3[3], selector); - w6[3] = hc_byte_perm (w3[3], w3[2], selector); - w6[2] = hc_byte_perm (w3[2], w3[1], selector); - w6[1] = hc_byte_perm (w3[1], w3[0], selector); - w6[0] = hc_byte_perm (w3[0], w2[3], selector); - w5[3] = hc_byte_perm (w2[3], w2[2], selector); - w5[2] = hc_byte_perm (w2[2], w2[1], selector); - w5[1] = hc_byte_perm (w2[1], w2[0], selector); - w5[0] = hc_byte_perm (w2[0], w1[3], selector); - w4[3] = hc_byte_perm (w1[3], w1[2], selector); - w4[2] = hc_byte_perm (w1[2], w1[1], selector); - w4[1] = hc_byte_perm (w1[1], w1[0], selector); - w4[0] = hc_byte_perm (w1[0], w0[3], selector); - w3[3] = hc_byte_perm (w0[3], w0[2], selector); - w3[2] = hc_byte_perm (w0[2], w0[1], selector); - w3[1] = hc_byte_perm (w0[1], w0[0], selector); - w3[0] = hc_byte_perm (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm ( 0, w7[3], selector); - c3[0] = hc_byte_perm (w7[3], w7[2], selector); - c2[3] = hc_byte_perm (w7[2], w7[1], selector); - c2[2] = hc_byte_perm (w7[1], w7[0], selector); - c2[1] = hc_byte_perm (w7[0], w6[3], selector); - c2[0] = hc_byte_perm (w6[3], w6[2], selector); - c1[3] = hc_byte_perm (w6[2], w6[1], selector); - c1[2] = hc_byte_perm (w6[1], w6[0], selector); - c1[1] = hc_byte_perm (w6[0], w5[3], selector); - c1[0] = hc_byte_perm (w5[3], w5[2], selector); - c0[3] = hc_byte_perm (w5[2], w5[1], selector); - c0[2] = hc_byte_perm (w5[1], w5[0], selector); - c0[1] = hc_byte_perm (w5[0], w4[3], selector); - c0[0] = hc_byte_perm (w4[3], w4[2], selector); - w7[3] = hc_byte_perm (w4[2], w4[1], selector); - w7[2] = hc_byte_perm (w4[1], w4[0], selector); - w7[1] = hc_byte_perm (w4[0], w3[3], selector); - w7[0] = hc_byte_perm (w3[3], w3[2], selector); - w6[3] = hc_byte_perm (w3[2], w3[1], selector); - w6[2] = hc_byte_perm (w3[1], w3[0], selector); - w6[1] = hc_byte_perm (w3[0], w2[3], selector); - w6[0] = hc_byte_perm (w2[3], w2[2], selector); - w5[3] = hc_byte_perm (w2[2], w2[1], selector); - w5[2] = hc_byte_perm (w2[1], w2[0], selector); - w5[1] = hc_byte_perm (w2[0], w1[3], selector); - w5[0] = hc_byte_perm (w1[3], w1[2], selector); - w4[3] = hc_byte_perm (w1[2], w1[1], selector); - w4[2] = hc_byte_perm (w1[1], w1[0], selector); - w4[1] = hc_byte_perm (w1[0], w0[3], selector); - w4[0] = hc_byte_perm (w0[3], w0[2], selector); - w3[3] = hc_byte_perm (w0[2], w0[1], selector); - w3[2] = hc_byte_perm (w0[1], w0[0], selector); - w3[1] = hc_byte_perm (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm ( 0, w7[3], selector); - c3[1] = hc_byte_perm (w7[3], w7[2], selector); - c3[0] = hc_byte_perm (w7[2], w7[1], selector); - c2[3] = hc_byte_perm (w7[1], w7[0], selector); - c2[2] = hc_byte_perm (w7[0], w6[3], selector); - c2[1] = hc_byte_perm (w6[3], w6[2], selector); - c2[0] = hc_byte_perm (w6[2], w6[1], selector); - c1[3] = hc_byte_perm (w6[1], w6[0], selector); - c1[2] = hc_byte_perm (w6[0], w5[3], selector); - c1[1] = hc_byte_perm (w5[3], w5[2], selector); - c1[0] = hc_byte_perm (w5[2], w5[1], selector); - c0[3] = hc_byte_perm (w5[1], w5[0], selector); - c0[2] = hc_byte_perm (w5[0], w4[3], selector); - c0[1] = hc_byte_perm (w4[3], w4[2], selector); - c0[0] = hc_byte_perm (w4[2], w4[1], selector); - w7[3] = hc_byte_perm (w4[1], w4[0], selector); - w7[2] = hc_byte_perm (w4[0], w3[3], selector); - w7[1] = hc_byte_perm (w3[3], w3[2], selector); - w7[0] = hc_byte_perm (w3[2], w3[1], selector); - w6[3] = hc_byte_perm (w3[1], w3[0], selector); - w6[2] = hc_byte_perm (w3[0], w2[3], selector); - w6[1] = hc_byte_perm (w2[3], w2[2], selector); - w6[0] = hc_byte_perm (w2[2], w2[1], selector); - w5[3] = hc_byte_perm (w2[1], w2[0], selector); - w5[2] = hc_byte_perm (w2[0], w1[3], selector); - w5[1] = hc_byte_perm (w1[3], w1[2], selector); - w5[0] = hc_byte_perm (w1[2], w1[1], selector); - w4[3] = hc_byte_perm (w1[1], w1[0], selector); - w4[2] = hc_byte_perm (w1[0], w0[3], selector); - w4[1] = hc_byte_perm (w0[3], w0[2], selector); - w4[0] = hc_byte_perm (w0[2], w0[1], selector); - w3[3] = hc_byte_perm (w0[1], w0[0], selector); - w3[2] = hc_byte_perm (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm ( 0, w7[3], selector); - c3[2] = hc_byte_perm (w7[3], w7[2], selector); - c3[1] = hc_byte_perm (w7[2], w7[1], selector); - c3[0] = hc_byte_perm (w7[1], w7[0], selector); - c2[3] = hc_byte_perm (w7[0], w6[3], selector); - c2[2] = hc_byte_perm (w6[3], w6[2], selector); - c2[1] = hc_byte_perm (w6[2], w6[1], selector); - c2[0] = hc_byte_perm (w6[1], w6[0], selector); - c1[3] = hc_byte_perm (w6[0], w5[3], selector); - c1[2] = hc_byte_perm (w5[3], w5[2], selector); - c1[1] = hc_byte_perm (w5[2], w5[1], selector); - c1[0] = hc_byte_perm (w5[1], w5[0], selector); - c0[3] = hc_byte_perm (w5[0], w4[3], selector); - c0[2] = hc_byte_perm (w4[3], w4[2], selector); - c0[1] = hc_byte_perm (w4[2], w4[1], selector); - c0[0] = hc_byte_perm (w4[1], w4[0], selector); - w7[3] = hc_byte_perm (w4[0], w3[3], selector); - w7[2] = hc_byte_perm (w3[3], w3[2], selector); - w7[1] = hc_byte_perm (w3[2], w3[1], selector); - w7[0] = hc_byte_perm (w3[1], w3[0], selector); - w6[3] = hc_byte_perm (w3[0], w2[3], selector); - w6[2] = hc_byte_perm (w2[3], w2[2], selector); - w6[1] = hc_byte_perm (w2[2], w2[1], selector); - w6[0] = hc_byte_perm (w2[1], w2[0], selector); - w5[3] = hc_byte_perm (w2[0], w1[3], selector); - w5[2] = hc_byte_perm (w1[3], w1[2], selector); - w5[1] = hc_byte_perm (w1[2], w1[1], selector); - w5[0] = hc_byte_perm (w1[1], w1[0], selector); - w4[3] = hc_byte_perm (w1[0], w0[3], selector); - w4[2] = hc_byte_perm (w0[3], w0[2], selector); - w4[1] = hc_byte_perm (w0[2], w0[1], selector); - w4[0] = hc_byte_perm (w0[1], w0[0], selector); - w3[3] = hc_byte_perm (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm ( 0, w7[3], selector); - c3[3] = hc_byte_perm (w7[3], w7[2], selector); - c3[2] = hc_byte_perm (w7[2], w7[1], selector); - c3[1] = hc_byte_perm (w7[1], w7[0], selector); - c3[0] = hc_byte_perm (w7[0], w6[3], selector); - c2[3] = hc_byte_perm (w6[3], w6[2], selector); - c2[2] = hc_byte_perm (w6[2], w6[1], selector); - c2[1] = hc_byte_perm (w6[1], w6[0], selector); - c2[0] = hc_byte_perm (w6[0], w5[3], selector); - c1[3] = hc_byte_perm (w5[3], w5[2], selector); - c1[2] = hc_byte_perm (w5[2], w5[1], selector); - c1[1] = hc_byte_perm (w5[1], w5[0], selector); - c1[0] = hc_byte_perm (w5[0], w4[3], selector); - c0[3] = hc_byte_perm (w4[3], w4[2], selector); - c0[2] = hc_byte_perm (w4[2], w4[1], selector); - c0[1] = hc_byte_perm (w4[1], w4[0], selector); - c0[0] = hc_byte_perm (w4[0], w3[3], selector); - w7[3] = hc_byte_perm (w3[3], w3[2], selector); - w7[2] = hc_byte_perm (w3[2], w3[1], selector); - w7[1] = hc_byte_perm (w3[1], w3[0], selector); - w7[0] = hc_byte_perm (w3[0], w2[3], selector); - w6[3] = hc_byte_perm (w2[3], w2[2], selector); - w6[2] = hc_byte_perm (w2[2], w2[1], selector); - w6[1] = hc_byte_perm (w2[1], w2[0], selector); - w6[0] = hc_byte_perm (w2[0], w1[3], selector); - w5[3] = hc_byte_perm (w1[3], w1[2], selector); - w5[2] = hc_byte_perm (w1[2], w1[1], selector); - w5[1] = hc_byte_perm (w1[1], w1[0], selector); - w5[0] = hc_byte_perm (w1[0], w0[3], selector); - w4[3] = hc_byte_perm (w0[3], w0[2], selector); - w4[2] = hc_byte_perm (w0[2], w0[1], selector); - w4[1] = hc_byte_perm (w0[1], w0[0], selector); - w4[0] = hc_byte_perm (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm ( 0, w7[3], selector); - c4[0] = hc_byte_perm (w7[3], w7[2], selector); - c3[3] = hc_byte_perm (w7[2], w7[1], selector); - c3[2] = hc_byte_perm (w7[1], w7[0], selector); - c3[1] = hc_byte_perm (w7[0], w6[3], selector); - c3[0] = hc_byte_perm (w6[3], w6[2], selector); - c2[3] = hc_byte_perm (w6[2], w6[1], selector); - c2[2] = hc_byte_perm (w6[1], w6[0], selector); - c2[1] = hc_byte_perm (w6[0], w5[3], selector); - c2[0] = hc_byte_perm (w5[3], w5[2], selector); - c1[3] = hc_byte_perm (w5[2], w5[1], selector); - c1[2] = hc_byte_perm (w5[1], w5[0], selector); - c1[1] = hc_byte_perm (w5[0], w4[3], selector); - c1[0] = hc_byte_perm (w4[3], w4[2], selector); - c0[3] = hc_byte_perm (w4[2], w4[1], selector); - c0[2] = hc_byte_perm (w4[1], w4[0], selector); - c0[1] = hc_byte_perm (w4[0], w3[3], selector); - c0[0] = hc_byte_perm (w3[3], w3[2], selector); - w7[3] = hc_byte_perm (w3[2], w3[1], selector); - w7[2] = hc_byte_perm (w3[1], w3[0], selector); - w7[1] = hc_byte_perm (w3[0], w2[3], selector); - w7[0] = hc_byte_perm (w2[3], w2[2], selector); - w6[3] = hc_byte_perm (w2[2], w2[1], selector); - w6[2] = hc_byte_perm (w2[1], w2[0], selector); - w6[1] = hc_byte_perm (w2[0], w1[3], selector); - w6[0] = hc_byte_perm (w1[3], w1[2], selector); - w5[3] = hc_byte_perm (w1[2], w1[1], selector); - w5[2] = hc_byte_perm (w1[1], w1[0], selector); - w5[1] = hc_byte_perm (w1[0], w0[3], selector); - w5[0] = hc_byte_perm (w0[3], w0[2], selector); - w4[3] = hc_byte_perm (w0[2], w0[1], selector); - w4[2] = hc_byte_perm (w0[1], w0[0], selector); - w4[1] = hc_byte_perm (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm ( 0, w7[3], selector); - c4[1] = hc_byte_perm (w7[3], w7[2], selector); - c4[0] = hc_byte_perm (w7[2], w7[1], selector); - c3[3] = hc_byte_perm (w7[1], w7[0], selector); - c3[2] = hc_byte_perm (w7[0], w6[3], selector); - c3[1] = hc_byte_perm (w6[3], w6[2], selector); - c3[0] = hc_byte_perm (w6[2], w6[1], selector); - c2[3] = hc_byte_perm (w6[1], w6[0], selector); - c2[2] = hc_byte_perm (w6[0], w5[3], selector); - c2[1] = hc_byte_perm (w5[3], w5[2], selector); - c2[0] = hc_byte_perm (w5[2], w5[1], selector); - c1[3] = hc_byte_perm (w5[1], w5[0], selector); - c1[2] = hc_byte_perm (w5[0], w4[3], selector); - c1[1] = hc_byte_perm (w4[3], w4[2], selector); - c1[0] = hc_byte_perm (w4[2], w4[1], selector); - c0[3] = hc_byte_perm (w4[1], w4[0], selector); - c0[2] = hc_byte_perm (w4[0], w3[3], selector); - c0[1] = hc_byte_perm (w3[3], w3[2], selector); - c0[0] = hc_byte_perm (w3[2], w3[1], selector); - w7[3] = hc_byte_perm (w3[1], w3[0], selector); - w7[2] = hc_byte_perm (w3[0], w2[3], selector); - w7[1] = hc_byte_perm (w2[3], w2[2], selector); - w7[0] = hc_byte_perm (w2[2], w2[1], selector); - w6[3] = hc_byte_perm (w2[1], w2[0], selector); - w6[2] = hc_byte_perm (w2[0], w1[3], selector); - w6[1] = hc_byte_perm (w1[3], w1[2], selector); - w6[0] = hc_byte_perm (w1[2], w1[1], selector); - w5[3] = hc_byte_perm (w1[1], w1[0], selector); - w5[2] = hc_byte_perm (w1[0], w0[3], selector); - w5[1] = hc_byte_perm (w0[3], w0[2], selector); - w5[0] = hc_byte_perm (w0[2], w0[1], selector); - w4[3] = hc_byte_perm (w0[1], w0[0], selector); - w4[2] = hc_byte_perm (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm ( 0, w7[3], selector); - c4[2] = hc_byte_perm (w7[3], w7[2], selector); - c4[1] = hc_byte_perm (w7[2], w7[1], selector); - c4[0] = hc_byte_perm (w7[1], w7[0], selector); - c3[3] = hc_byte_perm (w7[0], w6[3], selector); - c3[2] = hc_byte_perm (w6[3], w6[2], selector); - c3[1] = hc_byte_perm (w6[2], w6[1], selector); - c3[0] = hc_byte_perm (w6[1], w6[0], selector); - c2[3] = hc_byte_perm (w6[0], w5[3], selector); - c2[2] = hc_byte_perm (w5[3], w5[2], selector); - c2[1] = hc_byte_perm (w5[2], w5[1], selector); - c2[0] = hc_byte_perm (w5[1], w5[0], selector); - c1[3] = hc_byte_perm (w5[0], w4[3], selector); - c1[2] = hc_byte_perm (w4[3], w4[2], selector); - c1[1] = hc_byte_perm (w4[2], w4[1], selector); - c1[0] = hc_byte_perm (w4[1], w4[0], selector); - c0[3] = hc_byte_perm (w4[0], w3[3], selector); - c0[2] = hc_byte_perm (w3[3], w3[2], selector); - c0[1] = hc_byte_perm (w3[2], w3[1], selector); - c0[0] = hc_byte_perm (w3[1], w3[0], selector); - w7[3] = hc_byte_perm (w3[0], w2[3], selector); - w7[2] = hc_byte_perm (w2[3], w2[2], selector); - w7[1] = hc_byte_perm (w2[2], w2[1], selector); - w7[0] = hc_byte_perm (w2[1], w2[0], selector); - w6[3] = hc_byte_perm (w2[0], w1[3], selector); - w6[2] = hc_byte_perm (w1[3], w1[2], selector); - w6[1] = hc_byte_perm (w1[2], w1[1], selector); - w6[0] = hc_byte_perm (w1[1], w1[0], selector); - w5[3] = hc_byte_perm (w1[0], w0[3], selector); - w5[2] = hc_byte_perm (w0[3], w0[2], selector); - w5[1] = hc_byte_perm (w0[2], w0[1], selector); - w5[0] = hc_byte_perm (w0[1], w0[0], selector); - w4[3] = hc_byte_perm (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm ( 0, w7[3], selector); - c4[3] = hc_byte_perm (w7[3], w7[2], selector); - c4[2] = hc_byte_perm (w7[2], w7[1], selector); - c4[1] = hc_byte_perm (w7[1], w7[0], selector); - c4[0] = hc_byte_perm (w7[0], w6[3], selector); - c3[3] = hc_byte_perm (w6[3], w6[2], selector); - c3[2] = hc_byte_perm (w6[2], w6[1], selector); - c3[1] = hc_byte_perm (w6[1], w6[0], selector); - c3[0] = hc_byte_perm (w6[0], w5[3], selector); - c2[3] = hc_byte_perm (w5[3], w5[2], selector); - c2[2] = hc_byte_perm (w5[2], w5[1], selector); - c2[1] = hc_byte_perm (w5[1], w5[0], selector); - c2[0] = hc_byte_perm (w5[0], w4[3], selector); - c1[3] = hc_byte_perm (w4[3], w4[2], selector); - c1[2] = hc_byte_perm (w4[2], w4[1], selector); - c1[1] = hc_byte_perm (w4[1], w4[0], selector); - c1[0] = hc_byte_perm (w4[0], w3[3], selector); - c0[3] = hc_byte_perm (w3[3], w3[2], selector); - c0[2] = hc_byte_perm (w3[2], w3[1], selector); - c0[1] = hc_byte_perm (w3[1], w3[0], selector); - c0[0] = hc_byte_perm (w3[0], w2[3], selector); - w7[3] = hc_byte_perm (w2[3], w2[2], selector); - w7[2] = hc_byte_perm (w2[2], w2[1], selector); - w7[1] = hc_byte_perm (w2[1], w2[0], selector); - w7[0] = hc_byte_perm (w2[0], w1[3], selector); - w6[3] = hc_byte_perm (w1[3], w1[2], selector); - w6[2] = hc_byte_perm (w1[2], w1[1], selector); - w6[1] = hc_byte_perm (w1[1], w1[0], selector); - w6[0] = hc_byte_perm (w1[0], w0[3], selector); - w5[3] = hc_byte_perm (w0[3], w0[2], selector); - w5[2] = hc_byte_perm (w0[2], w0[1], selector); - w5[1] = hc_byte_perm (w0[1], w0[0], selector); - w5[0] = hc_byte_perm (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm ( 0, w7[3], selector); - c5[0] = hc_byte_perm (w7[3], w7[2], selector); - c4[3] = hc_byte_perm (w7[2], w7[1], selector); - c4[2] = hc_byte_perm (w7[1], w7[0], selector); - c4[1] = hc_byte_perm (w7[0], w6[3], selector); - c4[0] = hc_byte_perm (w6[3], w6[2], selector); - c3[3] = hc_byte_perm (w6[2], w6[1], selector); - c3[2] = hc_byte_perm (w6[1], w6[0], selector); - c3[1] = hc_byte_perm (w6[0], w5[3], selector); - c3[0] = hc_byte_perm (w5[3], w5[2], selector); - c2[3] = hc_byte_perm (w5[2], w5[1], selector); - c2[2] = hc_byte_perm (w5[1], w5[0], selector); - c2[1] = hc_byte_perm (w5[0], w4[3], selector); - c2[0] = hc_byte_perm (w4[3], w4[2], selector); - c1[3] = hc_byte_perm (w4[2], w4[1], selector); - c1[2] = hc_byte_perm (w4[1], w4[0], selector); - c1[1] = hc_byte_perm (w4[0], w3[3], selector); - c1[0] = hc_byte_perm (w3[3], w3[2], selector); - c0[3] = hc_byte_perm (w3[2], w3[1], selector); - c0[2] = hc_byte_perm (w3[1], w3[0], selector); - c0[1] = hc_byte_perm (w3[0], w2[3], selector); - c0[0] = hc_byte_perm (w2[3], w2[2], selector); - w7[3] = hc_byte_perm (w2[2], w2[1], selector); - w7[2] = hc_byte_perm (w2[1], w2[0], selector); - w7[1] = hc_byte_perm (w2[0], w1[3], selector); - w7[0] = hc_byte_perm (w1[3], w1[2], selector); - w6[3] = hc_byte_perm (w1[2], w1[1], selector); - w6[2] = hc_byte_perm (w1[1], w1[0], selector); - w6[1] = hc_byte_perm (w1[0], w0[3], selector); - w6[0] = hc_byte_perm (w0[3], w0[2], selector); - w5[3] = hc_byte_perm (w0[2], w0[1], selector); - w5[2] = hc_byte_perm (w0[1], w0[0], selector); - w5[1] = hc_byte_perm (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm ( 0, w7[3], selector); - c5[1] = hc_byte_perm (w7[3], w7[2], selector); - c5[0] = hc_byte_perm (w7[2], w7[1], selector); - c4[3] = hc_byte_perm (w7[1], w7[0], selector); - c4[2] = hc_byte_perm (w7[0], w6[3], selector); - c4[1] = hc_byte_perm (w6[3], w6[2], selector); - c4[0] = hc_byte_perm (w6[2], w6[1], selector); - c3[3] = hc_byte_perm (w6[1], w6[0], selector); - c3[2] = hc_byte_perm (w6[0], w5[3], selector); - c3[1] = hc_byte_perm (w5[3], w5[2], selector); - c3[0] = hc_byte_perm (w5[2], w5[1], selector); - c2[3] = hc_byte_perm (w5[1], w5[0], selector); - c2[2] = hc_byte_perm (w5[0], w4[3], selector); - c2[1] = hc_byte_perm (w4[3], w4[2], selector); - c2[0] = hc_byte_perm (w4[2], w4[1], selector); - c1[3] = hc_byte_perm (w4[1], w4[0], selector); - c1[2] = hc_byte_perm (w4[0], w3[3], selector); - c1[1] = hc_byte_perm (w3[3], w3[2], selector); - c1[0] = hc_byte_perm (w3[2], w3[1], selector); - c0[3] = hc_byte_perm (w3[1], w3[0], selector); - c0[2] = hc_byte_perm (w3[0], w2[3], selector); - c0[1] = hc_byte_perm (w2[3], w2[2], selector); - c0[0] = hc_byte_perm (w2[2], w2[1], selector); - w7[3] = hc_byte_perm (w2[1], w2[0], selector); - w7[2] = hc_byte_perm (w2[0], w1[3], selector); - w7[1] = hc_byte_perm (w1[3], w1[2], selector); - w7[0] = hc_byte_perm (w1[2], w1[1], selector); - w6[3] = hc_byte_perm (w1[1], w1[0], selector); - w6[2] = hc_byte_perm (w1[0], w0[3], selector); - w6[1] = hc_byte_perm (w0[3], w0[2], selector); - w6[0] = hc_byte_perm (w0[2], w0[1], selector); - w5[3] = hc_byte_perm (w0[1], w0[0], selector); - w5[2] = hc_byte_perm (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm ( 0, w7[3], selector); - c5[2] = hc_byte_perm (w7[3], w7[2], selector); - c5[1] = hc_byte_perm (w7[2], w7[1], selector); - c5[0] = hc_byte_perm (w7[1], w7[0], selector); - c4[3] = hc_byte_perm (w7[0], w6[3], selector); - c4[2] = hc_byte_perm (w6[3], w6[2], selector); - c4[1] = hc_byte_perm (w6[2], w6[1], selector); - c4[0] = hc_byte_perm (w6[1], w6[0], selector); - c3[3] = hc_byte_perm (w6[0], w5[3], selector); - c3[2] = hc_byte_perm (w5[3], w5[2], selector); - c3[1] = hc_byte_perm (w5[2], w5[1], selector); - c3[0] = hc_byte_perm (w5[1], w5[0], selector); - c2[3] = hc_byte_perm (w5[0], w4[3], selector); - c2[2] = hc_byte_perm (w4[3], w4[2], selector); - c2[1] = hc_byte_perm (w4[2], w4[1], selector); - c2[0] = hc_byte_perm (w4[1], w4[0], selector); - c1[3] = hc_byte_perm (w4[0], w3[3], selector); - c1[2] = hc_byte_perm (w3[3], w3[2], selector); - c1[1] = hc_byte_perm (w3[2], w3[1], selector); - c1[0] = hc_byte_perm (w3[1], w3[0], selector); - c0[3] = hc_byte_perm (w3[0], w2[3], selector); - c0[2] = hc_byte_perm (w2[3], w2[2], selector); - c0[1] = hc_byte_perm (w2[2], w2[1], selector); - c0[0] = hc_byte_perm (w2[1], w2[0], selector); - w7[3] = hc_byte_perm (w2[0], w1[3], selector); - w7[2] = hc_byte_perm (w1[3], w1[2], selector); - w7[1] = hc_byte_perm (w1[2], w1[1], selector); - w7[0] = hc_byte_perm (w1[1], w1[0], selector); - w6[3] = hc_byte_perm (w1[0], w0[3], selector); - w6[2] = hc_byte_perm (w0[3], w0[2], selector); - w6[1] = hc_byte_perm (w0[2], w0[1], selector); - w6[0] = hc_byte_perm (w0[1], w0[0], selector); - w5[3] = hc_byte_perm (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm ( 0, w7[3], selector); - c5[3] = hc_byte_perm (w7[3], w7[2], selector); - c5[2] = hc_byte_perm (w7[2], w7[1], selector); - c5[1] = hc_byte_perm (w7[1], w7[0], selector); - c5[0] = hc_byte_perm (w7[0], w6[3], selector); - c4[3] = hc_byte_perm (w6[3], w6[2], selector); - c4[2] = hc_byte_perm (w6[2], w6[1], selector); - c4[1] = hc_byte_perm (w6[1], w6[0], selector); - c4[0] = hc_byte_perm (w6[0], w5[3], selector); - c3[3] = hc_byte_perm (w5[3], w5[2], selector); - c3[2] = hc_byte_perm (w5[2], w5[1], selector); - c3[1] = hc_byte_perm (w5[1], w5[0], selector); - c3[0] = hc_byte_perm (w5[0], w4[3], selector); - c2[3] = hc_byte_perm (w4[3], w4[2], selector); - c2[2] = hc_byte_perm (w4[2], w4[1], selector); - c2[1] = hc_byte_perm (w4[1], w4[0], selector); - c2[0] = hc_byte_perm (w4[0], w3[3], selector); - c1[3] = hc_byte_perm (w3[3], w3[2], selector); - c1[2] = hc_byte_perm (w3[2], w3[1], selector); - c1[1] = hc_byte_perm (w3[1], w3[0], selector); - c1[0] = hc_byte_perm (w3[0], w2[3], selector); - c0[3] = hc_byte_perm (w2[3], w2[2], selector); - c0[2] = hc_byte_perm (w2[2], w2[1], selector); - c0[1] = hc_byte_perm (w2[1], w2[0], selector); - c0[0] = hc_byte_perm (w2[0], w1[3], selector); - w7[3] = hc_byte_perm (w1[3], w1[2], selector); - w7[2] = hc_byte_perm (w1[2], w1[1], selector); - w7[1] = hc_byte_perm (w1[1], w1[0], selector); - w7[0] = hc_byte_perm (w1[0], w0[3], selector); - w6[3] = hc_byte_perm (w0[3], w0[2], selector); - w6[2] = hc_byte_perm (w0[2], w0[1], selector); - w6[1] = hc_byte_perm (w0[1], w0[0], selector); - w6[0] = hc_byte_perm (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm ( 0, w7[3], selector); - c6[0] = hc_byte_perm (w7[3], w7[2], selector); - c5[3] = hc_byte_perm (w7[2], w7[1], selector); - c5[2] = hc_byte_perm (w7[1], w7[0], selector); - c5[1] = hc_byte_perm (w7[0], w6[3], selector); - c5[0] = hc_byte_perm (w6[3], w6[2], selector); - c4[3] = hc_byte_perm (w6[2], w6[1], selector); - c4[2] = hc_byte_perm (w6[1], w6[0], selector); - c4[1] = hc_byte_perm (w6[0], w5[3], selector); - c4[0] = hc_byte_perm (w5[3], w5[2], selector); - c3[3] = hc_byte_perm (w5[2], w5[1], selector); - c3[2] = hc_byte_perm (w5[1], w5[0], selector); - c3[1] = hc_byte_perm (w5[0], w4[3], selector); - c3[0] = hc_byte_perm (w4[3], w4[2], selector); - c2[3] = hc_byte_perm (w4[2], w4[1], selector); - c2[2] = hc_byte_perm (w4[1], w4[0], selector); - c2[1] = hc_byte_perm (w4[0], w3[3], selector); - c2[0] = hc_byte_perm (w3[3], w3[2], selector); - c1[3] = hc_byte_perm (w3[2], w3[1], selector); - c1[2] = hc_byte_perm (w3[1], w3[0], selector); - c1[1] = hc_byte_perm (w3[0], w2[3], selector); - c1[0] = hc_byte_perm (w2[3], w2[2], selector); - c0[3] = hc_byte_perm (w2[2], w2[1], selector); - c0[2] = hc_byte_perm (w2[1], w2[0], selector); - c0[1] = hc_byte_perm (w2[0], w1[3], selector); - c0[0] = hc_byte_perm (w1[3], w1[2], selector); - w7[3] = hc_byte_perm (w1[2], w1[1], selector); - w7[2] = hc_byte_perm (w1[1], w1[0], selector); - w7[1] = hc_byte_perm (w1[0], w0[3], selector); - w7[0] = hc_byte_perm (w0[3], w0[2], selector); - w6[3] = hc_byte_perm (w0[2], w0[1], selector); - w6[2] = hc_byte_perm (w0[1], w0[0], selector); - w6[1] = hc_byte_perm (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm ( 0, w7[3], selector); - c6[1] = hc_byte_perm (w7[3], w7[2], selector); - c6[0] = hc_byte_perm (w7[2], w7[1], selector); - c5[3] = hc_byte_perm (w7[1], w7[0], selector); - c5[2] = hc_byte_perm (w7[0], w6[3], selector); - c5[1] = hc_byte_perm (w6[3], w6[2], selector); - c5[0] = hc_byte_perm (w6[2], w6[1], selector); - c4[3] = hc_byte_perm (w6[1], w6[0], selector); - c4[2] = hc_byte_perm (w6[0], w5[3], selector); - c4[1] = hc_byte_perm (w5[3], w5[2], selector); - c4[0] = hc_byte_perm (w5[2], w5[1], selector); - c3[3] = hc_byte_perm (w5[1], w5[0], selector); - c3[2] = hc_byte_perm (w5[0], w4[3], selector); - c3[1] = hc_byte_perm (w4[3], w4[2], selector); - c3[0] = hc_byte_perm (w4[2], w4[1], selector); - c2[3] = hc_byte_perm (w4[1], w4[0], selector); - c2[2] = hc_byte_perm (w4[0], w3[3], selector); - c2[1] = hc_byte_perm (w3[3], w3[2], selector); - c2[0] = hc_byte_perm (w3[2], w3[1], selector); - c1[3] = hc_byte_perm (w3[1], w3[0], selector); - c1[2] = hc_byte_perm (w3[0], w2[3], selector); - c1[1] = hc_byte_perm (w2[3], w2[2], selector); - c1[0] = hc_byte_perm (w2[2], w2[1], selector); - c0[3] = hc_byte_perm (w2[1], w2[0], selector); - c0[2] = hc_byte_perm (w2[0], w1[3], selector); - c0[1] = hc_byte_perm (w1[3], w1[2], selector); - c0[0] = hc_byte_perm (w1[2], w1[1], selector); - w7[3] = hc_byte_perm (w1[1], w1[0], selector); - w7[2] = hc_byte_perm (w1[0], w0[3], selector); - w7[1] = hc_byte_perm (w0[3], w0[2], selector); - w7[0] = hc_byte_perm (w0[2], w0[1], selector); - w6[3] = hc_byte_perm (w0[1], w0[0], selector); - w6[2] = hc_byte_perm (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm ( 0, w7[3], selector); - c6[2] = hc_byte_perm (w7[3], w7[2], selector); - c6[1] = hc_byte_perm (w7[2], w7[1], selector); - c6[0] = hc_byte_perm (w7[1], w7[0], selector); - c5[3] = hc_byte_perm (w7[0], w6[3], selector); - c5[2] = hc_byte_perm (w6[3], w6[2], selector); - c5[1] = hc_byte_perm (w6[2], w6[1], selector); - c5[0] = hc_byte_perm (w6[1], w6[0], selector); - c4[3] = hc_byte_perm (w6[0], w5[3], selector); - c4[2] = hc_byte_perm (w5[3], w5[2], selector); - c4[1] = hc_byte_perm (w5[2], w5[1], selector); - c4[0] = hc_byte_perm (w5[1], w5[0], selector); - c3[3] = hc_byte_perm (w5[0], w4[3], selector); - c3[2] = hc_byte_perm (w4[3], w4[2], selector); - c3[1] = hc_byte_perm (w4[2], w4[1], selector); - c3[0] = hc_byte_perm (w4[1], w4[0], selector); - c2[3] = hc_byte_perm (w4[0], w3[3], selector); - c2[2] = hc_byte_perm (w3[3], w3[2], selector); - c2[1] = hc_byte_perm (w3[2], w3[1], selector); - c2[0] = hc_byte_perm (w3[1], w3[0], selector); - c1[3] = hc_byte_perm (w3[0], w2[3], selector); - c1[2] = hc_byte_perm (w2[3], w2[2], selector); - c1[1] = hc_byte_perm (w2[2], w2[1], selector); - c1[0] = hc_byte_perm (w2[1], w2[0], selector); - c0[3] = hc_byte_perm (w2[0], w1[3], selector); - c0[2] = hc_byte_perm (w1[3], w1[2], selector); - c0[1] = hc_byte_perm (w1[2], w1[1], selector); - c0[0] = hc_byte_perm (w1[1], w1[0], selector); - w7[3] = hc_byte_perm (w1[0], w0[3], selector); - w7[2] = hc_byte_perm (w0[3], w0[2], selector); - w7[1] = hc_byte_perm (w0[2], w0[1], selector); - w7[0] = hc_byte_perm (w0[1], w0[0], selector); - w6[3] = hc_byte_perm (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm ( 0, w7[3], selector); - c6[3] = hc_byte_perm (w7[3], w7[2], selector); - c6[2] = hc_byte_perm (w7[2], w7[1], selector); - c6[1] = hc_byte_perm (w7[1], w7[0], selector); - c6[0] = hc_byte_perm (w7[0], w6[3], selector); - c5[3] = hc_byte_perm (w6[3], w6[2], selector); - c5[2] = hc_byte_perm (w6[2], w6[1], selector); - c5[1] = hc_byte_perm (w6[1], w6[0], selector); - c5[0] = hc_byte_perm (w6[0], w5[3], selector); - c4[3] = hc_byte_perm (w5[3], w5[2], selector); - c4[2] = hc_byte_perm (w5[2], w5[1], selector); - c4[1] = hc_byte_perm (w5[1], w5[0], selector); - c4[0] = hc_byte_perm (w5[0], w4[3], selector); - c3[3] = hc_byte_perm (w4[3], w4[2], selector); - c3[2] = hc_byte_perm (w4[2], w4[1], selector); - c3[1] = hc_byte_perm (w4[1], w4[0], selector); - c3[0] = hc_byte_perm (w4[0], w3[3], selector); - c2[3] = hc_byte_perm (w3[3], w3[2], selector); - c2[2] = hc_byte_perm (w3[2], w3[1], selector); - c2[1] = hc_byte_perm (w3[1], w3[0], selector); - c2[0] = hc_byte_perm (w3[0], w2[3], selector); - c1[3] = hc_byte_perm (w2[3], w2[2], selector); - c1[2] = hc_byte_perm (w2[2], w2[1], selector); - c1[1] = hc_byte_perm (w2[1], w2[0], selector); - c1[0] = hc_byte_perm (w2[0], w1[3], selector); - c0[3] = hc_byte_perm (w1[3], w1[2], selector); - c0[2] = hc_byte_perm (w1[2], w1[1], selector); - c0[1] = hc_byte_perm (w1[1], w1[0], selector); - c0[0] = hc_byte_perm (w1[0], w0[3], selector); - w7[3] = hc_byte_perm (w0[3], w0[2], selector); - w7[2] = hc_byte_perm (w0[2], w0[1], selector); - w7[1] = hc_byte_perm (w0[1], w0[0], selector); - w7[0] = hc_byte_perm (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm ( 0, w7[3], selector); - c7[0] = hc_byte_perm (w7[3], w7[2], selector); - c6[3] = hc_byte_perm (w7[2], w7[1], selector); - c6[2] = hc_byte_perm (w7[1], w7[0], selector); - c6[1] = hc_byte_perm (w7[0], w6[3], selector); - c6[0] = hc_byte_perm (w6[3], w6[2], selector); - c5[3] = hc_byte_perm (w6[2], w6[1], selector); - c5[2] = hc_byte_perm (w6[1], w6[0], selector); - c5[1] = hc_byte_perm (w6[0], w5[3], selector); - c5[0] = hc_byte_perm (w5[3], w5[2], selector); - c4[3] = hc_byte_perm (w5[2], w5[1], selector); - c4[2] = hc_byte_perm (w5[1], w5[0], selector); - c4[1] = hc_byte_perm (w5[0], w4[3], selector); - c4[0] = hc_byte_perm (w4[3], w4[2], selector); - c3[3] = hc_byte_perm (w4[2], w4[1], selector); - c3[2] = hc_byte_perm (w4[1], w4[0], selector); - c3[1] = hc_byte_perm (w4[0], w3[3], selector); - c3[0] = hc_byte_perm (w3[3], w3[2], selector); - c2[3] = hc_byte_perm (w3[2], w3[1], selector); - c2[2] = hc_byte_perm (w3[1], w3[0], selector); - c2[1] = hc_byte_perm (w3[0], w2[3], selector); - c2[0] = hc_byte_perm (w2[3], w2[2], selector); - c1[3] = hc_byte_perm (w2[2], w2[1], selector); - c1[2] = hc_byte_perm (w2[1], w2[0], selector); - c1[1] = hc_byte_perm (w2[0], w1[3], selector); - c1[0] = hc_byte_perm (w1[3], w1[2], selector); - c0[3] = hc_byte_perm (w1[2], w1[1], selector); - c0[2] = hc_byte_perm (w1[1], w1[0], selector); - c0[1] = hc_byte_perm (w1[0], w0[3], selector); - c0[0] = hc_byte_perm (w0[3], w0[2], selector); - w7[3] = hc_byte_perm (w0[2], w0[1], selector); - w7[2] = hc_byte_perm (w0[1], w0[0], selector); - w7[1] = hc_byte_perm (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm ( 0, w7[3], selector); - c7[1] = hc_byte_perm (w7[3], w7[2], selector); - c7[0] = hc_byte_perm (w7[2], w7[1], selector); - c6[3] = hc_byte_perm (w7[1], w7[0], selector); - c6[2] = hc_byte_perm (w7[0], w6[3], selector); - c6[1] = hc_byte_perm (w6[3], w6[2], selector); - c6[0] = hc_byte_perm (w6[2], w6[1], selector); - c5[3] = hc_byte_perm (w6[1], w6[0], selector); - c5[2] = hc_byte_perm (w6[0], w5[3], selector); - c5[1] = hc_byte_perm (w5[3], w5[2], selector); - c5[0] = hc_byte_perm (w5[2], w5[1], selector); - c4[3] = hc_byte_perm (w5[1], w5[0], selector); - c4[2] = hc_byte_perm (w5[0], w4[3], selector); - c4[1] = hc_byte_perm (w4[3], w4[2], selector); - c4[0] = hc_byte_perm (w4[2], w4[1], selector); - c3[3] = hc_byte_perm (w4[1], w4[0], selector); - c3[2] = hc_byte_perm (w4[0], w3[3], selector); - c3[1] = hc_byte_perm (w3[3], w3[2], selector); - c3[0] = hc_byte_perm (w3[2], w3[1], selector); - c2[3] = hc_byte_perm (w3[1], w3[0], selector); - c2[2] = hc_byte_perm (w3[0], w2[3], selector); - c2[1] = hc_byte_perm (w2[3], w2[2], selector); - c2[0] = hc_byte_perm (w2[2], w2[1], selector); - c1[3] = hc_byte_perm (w2[1], w2[0], selector); - c1[2] = hc_byte_perm (w2[0], w1[3], selector); - c1[1] = hc_byte_perm (w1[3], w1[2], selector); - c1[0] = hc_byte_perm (w1[2], w1[1], selector); - c0[3] = hc_byte_perm (w1[1], w1[0], selector); - c0[2] = hc_byte_perm (w1[0], w0[3], selector); - c0[1] = hc_byte_perm (w0[3], w0[2], selector); - c0[0] = hc_byte_perm (w0[2], w0[1], selector); - w7[3] = hc_byte_perm (w0[1], w0[0], selector); - w7[2] = hc_byte_perm (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm ( 0, w7[3], selector); - c7[2] = hc_byte_perm (w7[3], w7[2], selector); - c7[1] = hc_byte_perm (w7[2], w7[1], selector); - c7[0] = hc_byte_perm (w7[1], w7[0], selector); - c6[3] = hc_byte_perm (w7[0], w6[3], selector); - c6[2] = hc_byte_perm (w6[3], w6[2], selector); - c6[1] = hc_byte_perm (w6[2], w6[1], selector); - c6[0] = hc_byte_perm (w6[1], w6[0], selector); - c5[3] = hc_byte_perm (w6[0], w5[3], selector); - c5[2] = hc_byte_perm (w5[3], w5[2], selector); - c5[1] = hc_byte_perm (w5[2], w5[1], selector); - c5[0] = hc_byte_perm (w5[1], w5[0], selector); - c4[3] = hc_byte_perm (w5[0], w4[3], selector); - c4[2] = hc_byte_perm (w4[3], w4[2], selector); - c4[1] = hc_byte_perm (w4[2], w4[1], selector); - c4[0] = hc_byte_perm (w4[1], w4[0], selector); - c3[3] = hc_byte_perm (w4[0], w3[3], selector); - c3[2] = hc_byte_perm (w3[3], w3[2], selector); - c3[1] = hc_byte_perm (w3[2], w3[1], selector); - c3[0] = hc_byte_perm (w3[1], w3[0], selector); - c2[3] = hc_byte_perm (w3[0], w2[3], selector); - c2[2] = hc_byte_perm (w2[3], w2[2], selector); - c2[1] = hc_byte_perm (w2[2], w2[1], selector); - c2[0] = hc_byte_perm (w2[1], w2[0], selector); - c1[3] = hc_byte_perm (w2[0], w1[3], selector); - c1[2] = hc_byte_perm (w1[3], w1[2], selector); - c1[1] = hc_byte_perm (w1[2], w1[1], selector); - c1[0] = hc_byte_perm (w1[1], w1[0], selector); - c0[3] = hc_byte_perm (w1[0], w0[3], selector); - c0[2] = hc_byte_perm (w0[3], w0[2], selector); - c0[1] = hc_byte_perm (w0[2], w0[1], selector); - c0[0] = hc_byte_perm (w0[1], w0[0], selector); - w7[3] = hc_byte_perm (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_le (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -21798,4384 +15158,12 @@ DECLSPEC void switch_buffer_by_offset_1x64_le (PRIVATE_AS u32x *w, const u32 off break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm (w[62], w[63], selector); - w[62] = hc_byte_perm (w[61], w[62], selector); - w[61] = hc_byte_perm (w[60], w[61], selector); - w[60] = hc_byte_perm (w[59], w[60], selector); - w[59] = hc_byte_perm (w[58], w[59], selector); - w[58] = hc_byte_perm (w[57], w[58], selector); - w[57] = hc_byte_perm (w[56], w[57], selector); - w[56] = hc_byte_perm (w[55], w[56], selector); - w[55] = hc_byte_perm (w[54], w[55], selector); - w[54] = hc_byte_perm (w[53], w[54], selector); - w[53] = hc_byte_perm (w[52], w[53], selector); - w[52] = hc_byte_perm (w[51], w[52], selector); - w[51] = hc_byte_perm (w[50], w[51], selector); - w[50] = hc_byte_perm (w[49], w[50], selector); - w[49] = hc_byte_perm (w[48], w[49], selector); - w[48] = hc_byte_perm (w[47], w[48], selector); - w[47] = hc_byte_perm (w[46], w[47], selector); - w[46] = hc_byte_perm (w[45], w[46], selector); - w[45] = hc_byte_perm (w[44], w[45], selector); - w[44] = hc_byte_perm (w[43], w[44], selector); - w[43] = hc_byte_perm (w[42], w[43], selector); - w[42] = hc_byte_perm (w[41], w[42], selector); - w[41] = hc_byte_perm (w[40], w[41], selector); - w[40] = hc_byte_perm (w[39], w[40], selector); - w[39] = hc_byte_perm (w[38], w[39], selector); - w[38] = hc_byte_perm (w[37], w[38], selector); - w[37] = hc_byte_perm (w[36], w[37], selector); - w[36] = hc_byte_perm (w[35], w[36], selector); - w[35] = hc_byte_perm (w[34], w[35], selector); - w[34] = hc_byte_perm (w[33], w[34], selector); - w[33] = hc_byte_perm (w[32], w[33], selector); - w[32] = hc_byte_perm (w[31], w[32], selector); - w[31] = hc_byte_perm (w[30], w[31], selector); - w[30] = hc_byte_perm (w[29], w[30], selector); - w[29] = hc_byte_perm (w[28], w[29], selector); - w[28] = hc_byte_perm (w[27], w[28], selector); - w[27] = hc_byte_perm (w[26], w[27], selector); - w[26] = hc_byte_perm (w[25], w[26], selector); - w[25] = hc_byte_perm (w[24], w[25], selector); - w[24] = hc_byte_perm (w[23], w[24], selector); - w[23] = hc_byte_perm (w[22], w[23], selector); - w[22] = hc_byte_perm (w[21], w[22], selector); - w[21] = hc_byte_perm (w[20], w[21], selector); - w[20] = hc_byte_perm (w[19], w[20], selector); - w[19] = hc_byte_perm (w[18], w[19], selector); - w[18] = hc_byte_perm (w[17], w[18], selector); - w[17] = hc_byte_perm (w[16], w[17], selector); - w[16] = hc_byte_perm (w[15], w[16], selector); - w[15] = hc_byte_perm (w[14], w[15], selector); - w[14] = hc_byte_perm (w[13], w[14], selector); - w[13] = hc_byte_perm (w[12], w[13], selector); - w[12] = hc_byte_perm (w[11], w[12], selector); - w[11] = hc_byte_perm (w[10], w[11], selector); - w[10] = hc_byte_perm (w[ 9], w[10], selector); - w[ 9] = hc_byte_perm (w[ 8], w[ 9], selector); - w[ 8] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 7] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 6] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 5] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 4] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 3] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 2] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 1] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 0] = hc_byte_perm ( 0, w[ 0], selector); - - break; - - case 1: - w[63] = hc_byte_perm (w[61], w[62], selector); - w[62] = hc_byte_perm (w[60], w[61], selector); - w[61] = hc_byte_perm (w[59], w[60], selector); - w[60] = hc_byte_perm (w[58], w[59], selector); - w[59] = hc_byte_perm (w[57], w[58], selector); - w[58] = hc_byte_perm (w[56], w[57], selector); - w[57] = hc_byte_perm (w[55], w[56], selector); - w[56] = hc_byte_perm (w[54], w[55], selector); - w[55] = hc_byte_perm (w[53], w[54], selector); - w[54] = hc_byte_perm (w[52], w[53], selector); - w[53] = hc_byte_perm (w[51], w[52], selector); - w[52] = hc_byte_perm (w[50], w[51], selector); - w[51] = hc_byte_perm (w[49], w[50], selector); - w[50] = hc_byte_perm (w[48], w[49], selector); - w[49] = hc_byte_perm (w[47], w[48], selector); - w[48] = hc_byte_perm (w[46], w[47], selector); - w[47] = hc_byte_perm (w[45], w[46], selector); - w[46] = hc_byte_perm (w[44], w[45], selector); - w[45] = hc_byte_perm (w[43], w[44], selector); - w[44] = hc_byte_perm (w[42], w[43], selector); - w[43] = hc_byte_perm (w[41], w[42], selector); - w[42] = hc_byte_perm (w[40], w[41], selector); - w[41] = hc_byte_perm (w[39], w[40], selector); - w[40] = hc_byte_perm (w[38], w[39], selector); - w[39] = hc_byte_perm (w[37], w[38], selector); - w[38] = hc_byte_perm (w[36], w[37], selector); - w[37] = hc_byte_perm (w[35], w[36], selector); - w[36] = hc_byte_perm (w[34], w[35], selector); - w[35] = hc_byte_perm (w[33], w[34], selector); - w[34] = hc_byte_perm (w[32], w[33], selector); - w[33] = hc_byte_perm (w[31], w[32], selector); - w[32] = hc_byte_perm (w[30], w[31], selector); - w[31] = hc_byte_perm (w[29], w[30], selector); - w[30] = hc_byte_perm (w[28], w[29], selector); - w[29] = hc_byte_perm (w[27], w[28], selector); - w[28] = hc_byte_perm (w[26], w[27], selector); - w[27] = hc_byte_perm (w[25], w[26], selector); - w[26] = hc_byte_perm (w[24], w[25], selector); - w[25] = hc_byte_perm (w[23], w[24], selector); - w[24] = hc_byte_perm (w[22], w[23], selector); - w[23] = hc_byte_perm (w[21], w[22], selector); - w[22] = hc_byte_perm (w[20], w[21], selector); - w[21] = hc_byte_perm (w[19], w[20], selector); - w[20] = hc_byte_perm (w[18], w[19], selector); - w[19] = hc_byte_perm (w[17], w[18], selector); - w[18] = hc_byte_perm (w[16], w[17], selector); - w[17] = hc_byte_perm (w[15], w[16], selector); - w[16] = hc_byte_perm (w[14], w[15], selector); - w[15] = hc_byte_perm (w[13], w[14], selector); - w[14] = hc_byte_perm (w[12], w[13], selector); - w[13] = hc_byte_perm (w[11], w[12], selector); - w[12] = hc_byte_perm (w[10], w[11], selector); - w[11] = hc_byte_perm (w[ 9], w[10], selector); - w[10] = hc_byte_perm (w[ 8], w[ 9], selector); - w[ 9] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 8] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 7] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 6] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 5] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 4] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 3] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 2] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 1] = hc_byte_perm ( 0, w[ 0], selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm (w[60], w[61], selector); - w[62] = hc_byte_perm (w[59], w[60], selector); - w[61] = hc_byte_perm (w[58], w[59], selector); - w[60] = hc_byte_perm (w[57], w[58], selector); - w[59] = hc_byte_perm (w[56], w[57], selector); - w[58] = hc_byte_perm (w[55], w[56], selector); - w[57] = hc_byte_perm (w[54], w[55], selector); - w[56] = hc_byte_perm (w[53], w[54], selector); - w[55] = hc_byte_perm (w[52], w[53], selector); - w[54] = hc_byte_perm (w[51], w[52], selector); - w[53] = hc_byte_perm (w[50], w[51], selector); - w[52] = hc_byte_perm (w[49], w[50], selector); - w[51] = hc_byte_perm (w[48], w[49], selector); - w[50] = hc_byte_perm (w[47], w[48], selector); - w[49] = hc_byte_perm (w[46], w[47], selector); - w[48] = hc_byte_perm (w[45], w[46], selector); - w[47] = hc_byte_perm (w[44], w[45], selector); - w[46] = hc_byte_perm (w[43], w[44], selector); - w[45] = hc_byte_perm (w[42], w[43], selector); - w[44] = hc_byte_perm (w[41], w[42], selector); - w[43] = hc_byte_perm (w[40], w[41], selector); - w[42] = hc_byte_perm (w[39], w[40], selector); - w[41] = hc_byte_perm (w[38], w[39], selector); - w[40] = hc_byte_perm (w[37], w[38], selector); - w[39] = hc_byte_perm (w[36], w[37], selector); - w[38] = hc_byte_perm (w[35], w[36], selector); - w[37] = hc_byte_perm (w[34], w[35], selector); - w[36] = hc_byte_perm (w[33], w[34], selector); - w[35] = hc_byte_perm (w[32], w[33], selector); - w[34] = hc_byte_perm (w[31], w[32], selector); - w[33] = hc_byte_perm (w[30], w[31], selector); - w[32] = hc_byte_perm (w[29], w[30], selector); - w[31] = hc_byte_perm (w[28], w[29], selector); - w[30] = hc_byte_perm (w[27], w[28], selector); - w[29] = hc_byte_perm (w[26], w[27], selector); - w[28] = hc_byte_perm (w[25], w[26], selector); - w[27] = hc_byte_perm (w[24], w[25], selector); - w[26] = hc_byte_perm (w[23], w[24], selector); - w[25] = hc_byte_perm (w[22], w[23], selector); - w[24] = hc_byte_perm (w[21], w[22], selector); - w[23] = hc_byte_perm (w[20], w[21], selector); - w[22] = hc_byte_perm (w[19], w[20], selector); - w[21] = hc_byte_perm (w[18], w[19], selector); - w[20] = hc_byte_perm (w[17], w[18], selector); - w[19] = hc_byte_perm (w[16], w[17], selector); - w[18] = hc_byte_perm (w[15], w[16], selector); - w[17] = hc_byte_perm (w[14], w[15], selector); - w[16] = hc_byte_perm (w[13], w[14], selector); - w[15] = hc_byte_perm (w[12], w[13], selector); - w[14] = hc_byte_perm (w[11], w[12], selector); - w[13] = hc_byte_perm (w[10], w[11], selector); - w[12] = hc_byte_perm (w[ 9], w[10], selector); - w[11] = hc_byte_perm (w[ 8], w[ 9], selector); - w[10] = hc_byte_perm (w[ 7], w[ 8], selector); - w[ 9] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 8] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 7] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 6] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 5] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 4] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 3] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 2] = hc_byte_perm ( 0, w[ 0], selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm (w[59], w[60], selector); - w[62] = hc_byte_perm (w[58], w[59], selector); - w[61] = hc_byte_perm (w[57], w[58], selector); - w[60] = hc_byte_perm (w[56], w[57], selector); - w[59] = hc_byte_perm (w[55], w[56], selector); - w[58] = hc_byte_perm (w[54], w[55], selector); - w[57] = hc_byte_perm (w[53], w[54], selector); - w[56] = hc_byte_perm (w[52], w[53], selector); - w[55] = hc_byte_perm (w[51], w[52], selector); - w[54] = hc_byte_perm (w[50], w[51], selector); - w[53] = hc_byte_perm (w[49], w[50], selector); - w[52] = hc_byte_perm (w[48], w[49], selector); - w[51] = hc_byte_perm (w[47], w[48], selector); - w[50] = hc_byte_perm (w[46], w[47], selector); - w[49] = hc_byte_perm (w[45], w[46], selector); - w[48] = hc_byte_perm (w[44], w[45], selector); - w[47] = hc_byte_perm (w[43], w[44], selector); - w[46] = hc_byte_perm (w[42], w[43], selector); - w[45] = hc_byte_perm (w[41], w[42], selector); - w[44] = hc_byte_perm (w[40], w[41], selector); - w[43] = hc_byte_perm (w[39], w[40], selector); - w[42] = hc_byte_perm (w[38], w[39], selector); - w[41] = hc_byte_perm (w[37], w[38], selector); - w[40] = hc_byte_perm (w[36], w[37], selector); - w[39] = hc_byte_perm (w[35], w[36], selector); - w[38] = hc_byte_perm (w[34], w[35], selector); - w[37] = hc_byte_perm (w[33], w[34], selector); - w[36] = hc_byte_perm (w[32], w[33], selector); - w[35] = hc_byte_perm (w[31], w[32], selector); - w[34] = hc_byte_perm (w[30], w[31], selector); - w[33] = hc_byte_perm (w[29], w[30], selector); - w[32] = hc_byte_perm (w[28], w[29], selector); - w[31] = hc_byte_perm (w[27], w[28], selector); - w[30] = hc_byte_perm (w[26], w[27], selector); - w[29] = hc_byte_perm (w[25], w[26], selector); - w[28] = hc_byte_perm (w[24], w[25], selector); - w[27] = hc_byte_perm (w[23], w[24], selector); - w[26] = hc_byte_perm (w[22], w[23], selector); - w[25] = hc_byte_perm (w[21], w[22], selector); - w[24] = hc_byte_perm (w[20], w[21], selector); - w[23] = hc_byte_perm (w[19], w[20], selector); - w[22] = hc_byte_perm (w[18], w[19], selector); - w[21] = hc_byte_perm (w[17], w[18], selector); - w[20] = hc_byte_perm (w[16], w[17], selector); - w[19] = hc_byte_perm (w[15], w[16], selector); - w[18] = hc_byte_perm (w[14], w[15], selector); - w[17] = hc_byte_perm (w[13], w[14], selector); - w[16] = hc_byte_perm (w[12], w[13], selector); - w[15] = hc_byte_perm (w[11], w[12], selector); - w[14] = hc_byte_perm (w[10], w[11], selector); - w[13] = hc_byte_perm (w[ 9], w[10], selector); - w[12] = hc_byte_perm (w[ 8], w[ 9], selector); - w[11] = hc_byte_perm (w[ 7], w[ 8], selector); - w[10] = hc_byte_perm (w[ 6], w[ 7], selector); - w[ 9] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 8] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 7] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 6] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 5] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 4] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 3] = hc_byte_perm ( 0, w[ 0], selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm (w[58], w[59], selector); - w[62] = hc_byte_perm (w[57], w[58], selector); - w[61] = hc_byte_perm (w[56], w[57], selector); - w[60] = hc_byte_perm (w[55], w[56], selector); - w[59] = hc_byte_perm (w[54], w[55], selector); - w[58] = hc_byte_perm (w[53], w[54], selector); - w[57] = hc_byte_perm (w[52], w[53], selector); - w[56] = hc_byte_perm (w[51], w[52], selector); - w[55] = hc_byte_perm (w[50], w[51], selector); - w[54] = hc_byte_perm (w[49], w[50], selector); - w[53] = hc_byte_perm (w[48], w[49], selector); - w[52] = hc_byte_perm (w[47], w[48], selector); - w[51] = hc_byte_perm (w[46], w[47], selector); - w[50] = hc_byte_perm (w[45], w[46], selector); - w[49] = hc_byte_perm (w[44], w[45], selector); - w[48] = hc_byte_perm (w[43], w[44], selector); - w[47] = hc_byte_perm (w[42], w[43], selector); - w[46] = hc_byte_perm (w[41], w[42], selector); - w[45] = hc_byte_perm (w[40], w[41], selector); - w[44] = hc_byte_perm (w[39], w[40], selector); - w[43] = hc_byte_perm (w[38], w[39], selector); - w[42] = hc_byte_perm (w[37], w[38], selector); - w[41] = hc_byte_perm (w[36], w[37], selector); - w[40] = hc_byte_perm (w[35], w[36], selector); - w[39] = hc_byte_perm (w[34], w[35], selector); - w[38] = hc_byte_perm (w[33], w[34], selector); - w[37] = hc_byte_perm (w[32], w[33], selector); - w[36] = hc_byte_perm (w[31], w[32], selector); - w[35] = hc_byte_perm (w[30], w[31], selector); - w[34] = hc_byte_perm (w[29], w[30], selector); - w[33] = hc_byte_perm (w[28], w[29], selector); - w[32] = hc_byte_perm (w[27], w[28], selector); - w[31] = hc_byte_perm (w[26], w[27], selector); - w[30] = hc_byte_perm (w[25], w[26], selector); - w[29] = hc_byte_perm (w[24], w[25], selector); - w[28] = hc_byte_perm (w[23], w[24], selector); - w[27] = hc_byte_perm (w[22], w[23], selector); - w[26] = hc_byte_perm (w[21], w[22], selector); - w[25] = hc_byte_perm (w[20], w[21], selector); - w[24] = hc_byte_perm (w[19], w[20], selector); - w[23] = hc_byte_perm (w[18], w[19], selector); - w[22] = hc_byte_perm (w[17], w[18], selector); - w[21] = hc_byte_perm (w[16], w[17], selector); - w[20] = hc_byte_perm (w[15], w[16], selector); - w[19] = hc_byte_perm (w[14], w[15], selector); - w[18] = hc_byte_perm (w[13], w[14], selector); - w[17] = hc_byte_perm (w[12], w[13], selector); - w[16] = hc_byte_perm (w[11], w[12], selector); - w[15] = hc_byte_perm (w[10], w[11], selector); - w[14] = hc_byte_perm (w[ 9], w[10], selector); - w[13] = hc_byte_perm (w[ 8], w[ 9], selector); - w[12] = hc_byte_perm (w[ 7], w[ 8], selector); - w[11] = hc_byte_perm (w[ 6], w[ 7], selector); - w[10] = hc_byte_perm (w[ 5], w[ 6], selector); - w[ 9] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 8] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 7] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 6] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 5] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 4] = hc_byte_perm ( 0, w[ 0], selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm (w[57], w[58], selector); - w[62] = hc_byte_perm (w[56], w[57], selector); - w[61] = hc_byte_perm (w[55], w[56], selector); - w[60] = hc_byte_perm (w[54], w[55], selector); - w[59] = hc_byte_perm (w[53], w[54], selector); - w[58] = hc_byte_perm (w[52], w[53], selector); - w[57] = hc_byte_perm (w[51], w[52], selector); - w[56] = hc_byte_perm (w[50], w[51], selector); - w[55] = hc_byte_perm (w[49], w[50], selector); - w[54] = hc_byte_perm (w[48], w[49], selector); - w[53] = hc_byte_perm (w[47], w[48], selector); - w[52] = hc_byte_perm (w[46], w[47], selector); - w[51] = hc_byte_perm (w[45], w[46], selector); - w[50] = hc_byte_perm (w[44], w[45], selector); - w[49] = hc_byte_perm (w[43], w[44], selector); - w[48] = hc_byte_perm (w[42], w[43], selector); - w[47] = hc_byte_perm (w[41], w[42], selector); - w[46] = hc_byte_perm (w[40], w[41], selector); - w[45] = hc_byte_perm (w[39], w[40], selector); - w[44] = hc_byte_perm (w[38], w[39], selector); - w[43] = hc_byte_perm (w[37], w[38], selector); - w[42] = hc_byte_perm (w[36], w[37], selector); - w[41] = hc_byte_perm (w[35], w[36], selector); - w[40] = hc_byte_perm (w[34], w[35], selector); - w[39] = hc_byte_perm (w[33], w[34], selector); - w[38] = hc_byte_perm (w[32], w[33], selector); - w[37] = hc_byte_perm (w[31], w[32], selector); - w[36] = hc_byte_perm (w[30], w[31], selector); - w[35] = hc_byte_perm (w[29], w[30], selector); - w[34] = hc_byte_perm (w[28], w[29], selector); - w[33] = hc_byte_perm (w[27], w[28], selector); - w[32] = hc_byte_perm (w[26], w[27], selector); - w[31] = hc_byte_perm (w[25], w[26], selector); - w[30] = hc_byte_perm (w[24], w[25], selector); - w[29] = hc_byte_perm (w[23], w[24], selector); - w[28] = hc_byte_perm (w[22], w[23], selector); - w[27] = hc_byte_perm (w[21], w[22], selector); - w[26] = hc_byte_perm (w[20], w[21], selector); - w[25] = hc_byte_perm (w[19], w[20], selector); - w[24] = hc_byte_perm (w[18], w[19], selector); - w[23] = hc_byte_perm (w[17], w[18], selector); - w[22] = hc_byte_perm (w[16], w[17], selector); - w[21] = hc_byte_perm (w[15], w[16], selector); - w[20] = hc_byte_perm (w[14], w[15], selector); - w[19] = hc_byte_perm (w[13], w[14], selector); - w[18] = hc_byte_perm (w[12], w[13], selector); - w[17] = hc_byte_perm (w[11], w[12], selector); - w[16] = hc_byte_perm (w[10], w[11], selector); - w[15] = hc_byte_perm (w[ 9], w[10], selector); - w[14] = hc_byte_perm (w[ 8], w[ 9], selector); - w[13] = hc_byte_perm (w[ 7], w[ 8], selector); - w[12] = hc_byte_perm (w[ 6], w[ 7], selector); - w[11] = hc_byte_perm (w[ 5], w[ 6], selector); - w[10] = hc_byte_perm (w[ 4], w[ 5], selector); - w[ 9] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 8] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 7] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 6] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 5] = hc_byte_perm ( 0, w[ 0], selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm (w[56], w[57], selector); - w[62] = hc_byte_perm (w[55], w[56], selector); - w[61] = hc_byte_perm (w[54], w[55], selector); - w[60] = hc_byte_perm (w[53], w[54], selector); - w[59] = hc_byte_perm (w[52], w[53], selector); - w[58] = hc_byte_perm (w[51], w[52], selector); - w[57] = hc_byte_perm (w[50], w[51], selector); - w[56] = hc_byte_perm (w[49], w[50], selector); - w[55] = hc_byte_perm (w[48], w[49], selector); - w[54] = hc_byte_perm (w[47], w[48], selector); - w[53] = hc_byte_perm (w[46], w[47], selector); - w[52] = hc_byte_perm (w[45], w[46], selector); - w[51] = hc_byte_perm (w[44], w[45], selector); - w[50] = hc_byte_perm (w[43], w[44], selector); - w[49] = hc_byte_perm (w[42], w[43], selector); - w[48] = hc_byte_perm (w[41], w[42], selector); - w[47] = hc_byte_perm (w[40], w[41], selector); - w[46] = hc_byte_perm (w[39], w[40], selector); - w[45] = hc_byte_perm (w[38], w[39], selector); - w[44] = hc_byte_perm (w[37], w[38], selector); - w[43] = hc_byte_perm (w[36], w[37], selector); - w[42] = hc_byte_perm (w[35], w[36], selector); - w[41] = hc_byte_perm (w[34], w[35], selector); - w[40] = hc_byte_perm (w[33], w[34], selector); - w[39] = hc_byte_perm (w[32], w[33], selector); - w[38] = hc_byte_perm (w[31], w[32], selector); - w[37] = hc_byte_perm (w[30], w[31], selector); - w[36] = hc_byte_perm (w[29], w[30], selector); - w[35] = hc_byte_perm (w[28], w[29], selector); - w[34] = hc_byte_perm (w[27], w[28], selector); - w[33] = hc_byte_perm (w[26], w[27], selector); - w[32] = hc_byte_perm (w[25], w[26], selector); - w[31] = hc_byte_perm (w[24], w[25], selector); - w[30] = hc_byte_perm (w[23], w[24], selector); - w[29] = hc_byte_perm (w[22], w[23], selector); - w[28] = hc_byte_perm (w[21], w[22], selector); - w[27] = hc_byte_perm (w[20], w[21], selector); - w[26] = hc_byte_perm (w[19], w[20], selector); - w[25] = hc_byte_perm (w[18], w[19], selector); - w[24] = hc_byte_perm (w[17], w[18], selector); - w[23] = hc_byte_perm (w[16], w[17], selector); - w[22] = hc_byte_perm (w[15], w[16], selector); - w[21] = hc_byte_perm (w[14], w[15], selector); - w[20] = hc_byte_perm (w[13], w[14], selector); - w[19] = hc_byte_perm (w[12], w[13], selector); - w[18] = hc_byte_perm (w[11], w[12], selector); - w[17] = hc_byte_perm (w[10], w[11], selector); - w[16] = hc_byte_perm (w[ 9], w[10], selector); - w[15] = hc_byte_perm (w[ 8], w[ 9], selector); - w[14] = hc_byte_perm (w[ 7], w[ 8], selector); - w[13] = hc_byte_perm (w[ 6], w[ 7], selector); - w[12] = hc_byte_perm (w[ 5], w[ 6], selector); - w[11] = hc_byte_perm (w[ 4], w[ 5], selector); - w[10] = hc_byte_perm (w[ 3], w[ 4], selector); - w[ 9] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 8] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 7] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 6] = hc_byte_perm ( 0, w[ 0], selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm (w[55], w[56], selector); - w[62] = hc_byte_perm (w[54], w[55], selector); - w[61] = hc_byte_perm (w[53], w[54], selector); - w[60] = hc_byte_perm (w[52], w[53], selector); - w[59] = hc_byte_perm (w[51], w[52], selector); - w[58] = hc_byte_perm (w[50], w[51], selector); - w[57] = hc_byte_perm (w[49], w[50], selector); - w[56] = hc_byte_perm (w[48], w[49], selector); - w[55] = hc_byte_perm (w[47], w[48], selector); - w[54] = hc_byte_perm (w[46], w[47], selector); - w[53] = hc_byte_perm (w[45], w[46], selector); - w[52] = hc_byte_perm (w[44], w[45], selector); - w[51] = hc_byte_perm (w[43], w[44], selector); - w[50] = hc_byte_perm (w[42], w[43], selector); - w[49] = hc_byte_perm (w[41], w[42], selector); - w[48] = hc_byte_perm (w[40], w[41], selector); - w[47] = hc_byte_perm (w[39], w[40], selector); - w[46] = hc_byte_perm (w[38], w[39], selector); - w[45] = hc_byte_perm (w[37], w[38], selector); - w[44] = hc_byte_perm (w[36], w[37], selector); - w[43] = hc_byte_perm (w[35], w[36], selector); - w[42] = hc_byte_perm (w[34], w[35], selector); - w[41] = hc_byte_perm (w[33], w[34], selector); - w[40] = hc_byte_perm (w[32], w[33], selector); - w[39] = hc_byte_perm (w[31], w[32], selector); - w[38] = hc_byte_perm (w[30], w[31], selector); - w[37] = hc_byte_perm (w[29], w[30], selector); - w[36] = hc_byte_perm (w[28], w[29], selector); - w[35] = hc_byte_perm (w[27], w[28], selector); - w[34] = hc_byte_perm (w[26], w[27], selector); - w[33] = hc_byte_perm (w[25], w[26], selector); - w[32] = hc_byte_perm (w[24], w[25], selector); - w[31] = hc_byte_perm (w[23], w[24], selector); - w[30] = hc_byte_perm (w[22], w[23], selector); - w[29] = hc_byte_perm (w[21], w[22], selector); - w[28] = hc_byte_perm (w[20], w[21], selector); - w[27] = hc_byte_perm (w[19], w[20], selector); - w[26] = hc_byte_perm (w[18], w[19], selector); - w[25] = hc_byte_perm (w[17], w[18], selector); - w[24] = hc_byte_perm (w[16], w[17], selector); - w[23] = hc_byte_perm (w[15], w[16], selector); - w[22] = hc_byte_perm (w[14], w[15], selector); - w[21] = hc_byte_perm (w[13], w[14], selector); - w[20] = hc_byte_perm (w[12], w[13], selector); - w[19] = hc_byte_perm (w[11], w[12], selector); - w[18] = hc_byte_perm (w[10], w[11], selector); - w[17] = hc_byte_perm (w[ 9], w[10], selector); - w[16] = hc_byte_perm (w[ 8], w[ 9], selector); - w[15] = hc_byte_perm (w[ 7], w[ 8], selector); - w[14] = hc_byte_perm (w[ 6], w[ 7], selector); - w[13] = hc_byte_perm (w[ 5], w[ 6], selector); - w[12] = hc_byte_perm (w[ 4], w[ 5], selector); - w[11] = hc_byte_perm (w[ 3], w[ 4], selector); - w[10] = hc_byte_perm (w[ 2], w[ 3], selector); - w[ 9] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 8] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 7] = hc_byte_perm ( 0, w[ 0], selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm (w[54], w[55], selector); - w[62] = hc_byte_perm (w[53], w[54], selector); - w[61] = hc_byte_perm (w[52], w[53], selector); - w[60] = hc_byte_perm (w[51], w[52], selector); - w[59] = hc_byte_perm (w[50], w[51], selector); - w[58] = hc_byte_perm (w[49], w[50], selector); - w[57] = hc_byte_perm (w[48], w[49], selector); - w[56] = hc_byte_perm (w[47], w[48], selector); - w[55] = hc_byte_perm (w[46], w[47], selector); - w[54] = hc_byte_perm (w[45], w[46], selector); - w[53] = hc_byte_perm (w[44], w[45], selector); - w[52] = hc_byte_perm (w[43], w[44], selector); - w[51] = hc_byte_perm (w[42], w[43], selector); - w[50] = hc_byte_perm (w[41], w[42], selector); - w[49] = hc_byte_perm (w[40], w[41], selector); - w[48] = hc_byte_perm (w[39], w[40], selector); - w[47] = hc_byte_perm (w[38], w[39], selector); - w[46] = hc_byte_perm (w[37], w[38], selector); - w[45] = hc_byte_perm (w[36], w[37], selector); - w[44] = hc_byte_perm (w[35], w[36], selector); - w[43] = hc_byte_perm (w[34], w[35], selector); - w[42] = hc_byte_perm (w[33], w[34], selector); - w[41] = hc_byte_perm (w[32], w[33], selector); - w[40] = hc_byte_perm (w[31], w[32], selector); - w[39] = hc_byte_perm (w[30], w[31], selector); - w[38] = hc_byte_perm (w[29], w[30], selector); - w[37] = hc_byte_perm (w[28], w[29], selector); - w[36] = hc_byte_perm (w[27], w[28], selector); - w[35] = hc_byte_perm (w[26], w[27], selector); - w[34] = hc_byte_perm (w[25], w[26], selector); - w[33] = hc_byte_perm (w[24], w[25], selector); - w[32] = hc_byte_perm (w[23], w[24], selector); - w[31] = hc_byte_perm (w[22], w[23], selector); - w[30] = hc_byte_perm (w[21], w[22], selector); - w[29] = hc_byte_perm (w[20], w[21], selector); - w[28] = hc_byte_perm (w[19], w[20], selector); - w[27] = hc_byte_perm (w[18], w[19], selector); - w[26] = hc_byte_perm (w[17], w[18], selector); - w[25] = hc_byte_perm (w[16], w[17], selector); - w[24] = hc_byte_perm (w[15], w[16], selector); - w[23] = hc_byte_perm (w[14], w[15], selector); - w[22] = hc_byte_perm (w[13], w[14], selector); - w[21] = hc_byte_perm (w[12], w[13], selector); - w[20] = hc_byte_perm (w[11], w[12], selector); - w[19] = hc_byte_perm (w[10], w[11], selector); - w[18] = hc_byte_perm (w[ 9], w[10], selector); - w[17] = hc_byte_perm (w[ 8], w[ 9], selector); - w[16] = hc_byte_perm (w[ 7], w[ 8], selector); - w[15] = hc_byte_perm (w[ 6], w[ 7], selector); - w[14] = hc_byte_perm (w[ 5], w[ 6], selector); - w[13] = hc_byte_perm (w[ 4], w[ 5], selector); - w[12] = hc_byte_perm (w[ 3], w[ 4], selector); - w[11] = hc_byte_perm (w[ 2], w[ 3], selector); - w[10] = hc_byte_perm (w[ 1], w[ 2], selector); - w[ 9] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 8] = hc_byte_perm ( 0, w[ 0], selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm (w[53], w[54], selector); - w[62] = hc_byte_perm (w[52], w[53], selector); - w[61] = hc_byte_perm (w[51], w[52], selector); - w[60] = hc_byte_perm (w[50], w[51], selector); - w[59] = hc_byte_perm (w[49], w[50], selector); - w[58] = hc_byte_perm (w[48], w[49], selector); - w[57] = hc_byte_perm (w[47], w[48], selector); - w[56] = hc_byte_perm (w[46], w[47], selector); - w[55] = hc_byte_perm (w[45], w[46], selector); - w[54] = hc_byte_perm (w[44], w[45], selector); - w[53] = hc_byte_perm (w[43], w[44], selector); - w[52] = hc_byte_perm (w[42], w[43], selector); - w[51] = hc_byte_perm (w[41], w[42], selector); - w[50] = hc_byte_perm (w[40], w[41], selector); - w[49] = hc_byte_perm (w[39], w[40], selector); - w[48] = hc_byte_perm (w[38], w[39], selector); - w[47] = hc_byte_perm (w[37], w[38], selector); - w[46] = hc_byte_perm (w[36], w[37], selector); - w[45] = hc_byte_perm (w[35], w[36], selector); - w[44] = hc_byte_perm (w[34], w[35], selector); - w[43] = hc_byte_perm (w[33], w[34], selector); - w[42] = hc_byte_perm (w[32], w[33], selector); - w[41] = hc_byte_perm (w[31], w[32], selector); - w[40] = hc_byte_perm (w[30], w[31], selector); - w[39] = hc_byte_perm (w[29], w[30], selector); - w[38] = hc_byte_perm (w[28], w[29], selector); - w[37] = hc_byte_perm (w[27], w[28], selector); - w[36] = hc_byte_perm (w[26], w[27], selector); - w[35] = hc_byte_perm (w[25], w[26], selector); - w[34] = hc_byte_perm (w[24], w[25], selector); - w[33] = hc_byte_perm (w[23], w[24], selector); - w[32] = hc_byte_perm (w[22], w[23], selector); - w[31] = hc_byte_perm (w[21], w[22], selector); - w[30] = hc_byte_perm (w[20], w[21], selector); - w[29] = hc_byte_perm (w[19], w[20], selector); - w[28] = hc_byte_perm (w[18], w[19], selector); - w[27] = hc_byte_perm (w[17], w[18], selector); - w[26] = hc_byte_perm (w[16], w[17], selector); - w[25] = hc_byte_perm (w[15], w[16], selector); - w[24] = hc_byte_perm (w[14], w[15], selector); - w[23] = hc_byte_perm (w[13], w[14], selector); - w[22] = hc_byte_perm (w[12], w[13], selector); - w[21] = hc_byte_perm (w[11], w[12], selector); - w[20] = hc_byte_perm (w[10], w[11], selector); - w[19] = hc_byte_perm (w[ 9], w[10], selector); - w[18] = hc_byte_perm (w[ 8], w[ 9], selector); - w[17] = hc_byte_perm (w[ 7], w[ 8], selector); - w[16] = hc_byte_perm (w[ 6], w[ 7], selector); - w[15] = hc_byte_perm (w[ 5], w[ 6], selector); - w[14] = hc_byte_perm (w[ 4], w[ 5], selector); - w[13] = hc_byte_perm (w[ 3], w[ 4], selector); - w[12] = hc_byte_perm (w[ 2], w[ 3], selector); - w[11] = hc_byte_perm (w[ 1], w[ 2], selector); - w[10] = hc_byte_perm (w[ 0], w[ 1], selector); - w[ 9] = hc_byte_perm ( 0, w[ 0], selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm (w[52], w[53], selector); - w[62] = hc_byte_perm (w[51], w[52], selector); - w[61] = hc_byte_perm (w[50], w[51], selector); - w[60] = hc_byte_perm (w[49], w[50], selector); - w[59] = hc_byte_perm (w[48], w[49], selector); - w[58] = hc_byte_perm (w[47], w[48], selector); - w[57] = hc_byte_perm (w[46], w[47], selector); - w[56] = hc_byte_perm (w[45], w[46], selector); - w[55] = hc_byte_perm (w[44], w[45], selector); - w[54] = hc_byte_perm (w[43], w[44], selector); - w[53] = hc_byte_perm (w[42], w[43], selector); - w[52] = hc_byte_perm (w[41], w[42], selector); - w[51] = hc_byte_perm (w[40], w[41], selector); - w[50] = hc_byte_perm (w[39], w[40], selector); - w[49] = hc_byte_perm (w[38], w[39], selector); - w[48] = hc_byte_perm (w[37], w[38], selector); - w[47] = hc_byte_perm (w[36], w[37], selector); - w[46] = hc_byte_perm (w[35], w[36], selector); - w[45] = hc_byte_perm (w[34], w[35], selector); - w[44] = hc_byte_perm (w[33], w[34], selector); - w[43] = hc_byte_perm (w[32], w[33], selector); - w[42] = hc_byte_perm (w[31], w[32], selector); - w[41] = hc_byte_perm (w[30], w[31], selector); - w[40] = hc_byte_perm (w[29], w[30], selector); - w[39] = hc_byte_perm (w[28], w[29], selector); - w[38] = hc_byte_perm (w[27], w[28], selector); - w[37] = hc_byte_perm (w[26], w[27], selector); - w[36] = hc_byte_perm (w[25], w[26], selector); - w[35] = hc_byte_perm (w[24], w[25], selector); - w[34] = hc_byte_perm (w[23], w[24], selector); - w[33] = hc_byte_perm (w[22], w[23], selector); - w[32] = hc_byte_perm (w[21], w[22], selector); - w[31] = hc_byte_perm (w[20], w[21], selector); - w[30] = hc_byte_perm (w[19], w[20], selector); - w[29] = hc_byte_perm (w[18], w[19], selector); - w[28] = hc_byte_perm (w[17], w[18], selector); - w[27] = hc_byte_perm (w[16], w[17], selector); - w[26] = hc_byte_perm (w[15], w[16], selector); - w[25] = hc_byte_perm (w[14], w[15], selector); - w[24] = hc_byte_perm (w[13], w[14], selector); - w[23] = hc_byte_perm (w[12], w[13], selector); - w[22] = hc_byte_perm (w[11], w[12], selector); - w[21] = hc_byte_perm (w[10], w[11], selector); - w[20] = hc_byte_perm (w[ 9], w[10], selector); - w[19] = hc_byte_perm (w[ 8], w[ 9], selector); - w[18] = hc_byte_perm (w[ 7], w[ 8], selector); - w[17] = hc_byte_perm (w[ 6], w[ 7], selector); - w[16] = hc_byte_perm (w[ 5], w[ 6], selector); - w[15] = hc_byte_perm (w[ 4], w[ 5], selector); - w[14] = hc_byte_perm (w[ 3], w[ 4], selector); - w[13] = hc_byte_perm (w[ 2], w[ 3], selector); - w[12] = hc_byte_perm (w[ 1], w[ 2], selector); - w[11] = hc_byte_perm (w[ 0], w[ 1], selector); - w[10] = hc_byte_perm ( 0, w[ 0], selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm (w[51], w[52], selector); - w[62] = hc_byte_perm (w[50], w[51], selector); - w[61] = hc_byte_perm (w[49], w[50], selector); - w[60] = hc_byte_perm (w[48], w[49], selector); - w[59] = hc_byte_perm (w[47], w[48], selector); - w[58] = hc_byte_perm (w[46], w[47], selector); - w[57] = hc_byte_perm (w[45], w[46], selector); - w[56] = hc_byte_perm (w[44], w[45], selector); - w[55] = hc_byte_perm (w[43], w[44], selector); - w[54] = hc_byte_perm (w[42], w[43], selector); - w[53] = hc_byte_perm (w[41], w[42], selector); - w[52] = hc_byte_perm (w[40], w[41], selector); - w[51] = hc_byte_perm (w[39], w[40], selector); - w[50] = hc_byte_perm (w[38], w[39], selector); - w[49] = hc_byte_perm (w[37], w[38], selector); - w[48] = hc_byte_perm (w[36], w[37], selector); - w[47] = hc_byte_perm (w[35], w[36], selector); - w[46] = hc_byte_perm (w[34], w[35], selector); - w[45] = hc_byte_perm (w[33], w[34], selector); - w[44] = hc_byte_perm (w[32], w[33], selector); - w[43] = hc_byte_perm (w[31], w[32], selector); - w[42] = hc_byte_perm (w[30], w[31], selector); - w[41] = hc_byte_perm (w[29], w[30], selector); - w[40] = hc_byte_perm (w[28], w[29], selector); - w[39] = hc_byte_perm (w[27], w[28], selector); - w[38] = hc_byte_perm (w[26], w[27], selector); - w[37] = hc_byte_perm (w[25], w[26], selector); - w[36] = hc_byte_perm (w[24], w[25], selector); - w[35] = hc_byte_perm (w[23], w[24], selector); - w[34] = hc_byte_perm (w[22], w[23], selector); - w[33] = hc_byte_perm (w[21], w[22], selector); - w[32] = hc_byte_perm (w[20], w[21], selector); - w[31] = hc_byte_perm (w[19], w[20], selector); - w[30] = hc_byte_perm (w[18], w[19], selector); - w[29] = hc_byte_perm (w[17], w[18], selector); - w[28] = hc_byte_perm (w[16], w[17], selector); - w[27] = hc_byte_perm (w[15], w[16], selector); - w[26] = hc_byte_perm (w[14], w[15], selector); - w[25] = hc_byte_perm (w[13], w[14], selector); - w[24] = hc_byte_perm (w[12], w[13], selector); - w[23] = hc_byte_perm (w[11], w[12], selector); - w[22] = hc_byte_perm (w[10], w[11], selector); - w[21] = hc_byte_perm (w[ 9], w[10], selector); - w[20] = hc_byte_perm (w[ 8], w[ 9], selector); - w[19] = hc_byte_perm (w[ 7], w[ 8], selector); - w[18] = hc_byte_perm (w[ 6], w[ 7], selector); - w[17] = hc_byte_perm (w[ 5], w[ 6], selector); - w[16] = hc_byte_perm (w[ 4], w[ 5], selector); - w[15] = hc_byte_perm (w[ 3], w[ 4], selector); - w[14] = hc_byte_perm (w[ 2], w[ 3], selector); - w[13] = hc_byte_perm (w[ 1], w[ 2], selector); - w[12] = hc_byte_perm (w[ 0], w[ 1], selector); - w[11] = hc_byte_perm ( 0, w[ 0], selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm (w[50], w[51], selector); - w[62] = hc_byte_perm (w[49], w[50], selector); - w[61] = hc_byte_perm (w[48], w[49], selector); - w[60] = hc_byte_perm (w[47], w[48], selector); - w[59] = hc_byte_perm (w[46], w[47], selector); - w[58] = hc_byte_perm (w[45], w[46], selector); - w[57] = hc_byte_perm (w[44], w[45], selector); - w[56] = hc_byte_perm (w[43], w[44], selector); - w[55] = hc_byte_perm (w[42], w[43], selector); - w[54] = hc_byte_perm (w[41], w[42], selector); - w[53] = hc_byte_perm (w[40], w[41], selector); - w[52] = hc_byte_perm (w[39], w[40], selector); - w[51] = hc_byte_perm (w[38], w[39], selector); - w[50] = hc_byte_perm (w[37], w[38], selector); - w[49] = hc_byte_perm (w[36], w[37], selector); - w[48] = hc_byte_perm (w[35], w[36], selector); - w[47] = hc_byte_perm (w[34], w[35], selector); - w[46] = hc_byte_perm (w[33], w[34], selector); - w[45] = hc_byte_perm (w[32], w[33], selector); - w[44] = hc_byte_perm (w[31], w[32], selector); - w[43] = hc_byte_perm (w[30], w[31], selector); - w[42] = hc_byte_perm (w[29], w[30], selector); - w[41] = hc_byte_perm (w[28], w[29], selector); - w[40] = hc_byte_perm (w[27], w[28], selector); - w[39] = hc_byte_perm (w[26], w[27], selector); - w[38] = hc_byte_perm (w[25], w[26], selector); - w[37] = hc_byte_perm (w[24], w[25], selector); - w[36] = hc_byte_perm (w[23], w[24], selector); - w[35] = hc_byte_perm (w[22], w[23], selector); - w[34] = hc_byte_perm (w[21], w[22], selector); - w[33] = hc_byte_perm (w[20], w[21], selector); - w[32] = hc_byte_perm (w[19], w[20], selector); - w[31] = hc_byte_perm (w[18], w[19], selector); - w[30] = hc_byte_perm (w[17], w[18], selector); - w[29] = hc_byte_perm (w[16], w[17], selector); - w[28] = hc_byte_perm (w[15], w[16], selector); - w[27] = hc_byte_perm (w[14], w[15], selector); - w[26] = hc_byte_perm (w[13], w[14], selector); - w[25] = hc_byte_perm (w[12], w[13], selector); - w[24] = hc_byte_perm (w[11], w[12], selector); - w[23] = hc_byte_perm (w[10], w[11], selector); - w[22] = hc_byte_perm (w[ 9], w[10], selector); - w[21] = hc_byte_perm (w[ 8], w[ 9], selector); - w[20] = hc_byte_perm (w[ 7], w[ 8], selector); - w[19] = hc_byte_perm (w[ 6], w[ 7], selector); - w[18] = hc_byte_perm (w[ 5], w[ 6], selector); - w[17] = hc_byte_perm (w[ 4], w[ 5], selector); - w[16] = hc_byte_perm (w[ 3], w[ 4], selector); - w[15] = hc_byte_perm (w[ 2], w[ 3], selector); - w[14] = hc_byte_perm (w[ 1], w[ 2], selector); - w[13] = hc_byte_perm (w[ 0], w[ 1], selector); - w[12] = hc_byte_perm ( 0, w[ 0], selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm (w[49], w[50], selector); - w[62] = hc_byte_perm (w[48], w[49], selector); - w[61] = hc_byte_perm (w[47], w[48], selector); - w[60] = hc_byte_perm (w[46], w[47], selector); - w[59] = hc_byte_perm (w[45], w[46], selector); - w[58] = hc_byte_perm (w[44], w[45], selector); - w[57] = hc_byte_perm (w[43], w[44], selector); - w[56] = hc_byte_perm (w[42], w[43], selector); - w[55] = hc_byte_perm (w[41], w[42], selector); - w[54] = hc_byte_perm (w[40], w[41], selector); - w[53] = hc_byte_perm (w[39], w[40], selector); - w[52] = hc_byte_perm (w[38], w[39], selector); - w[51] = hc_byte_perm (w[37], w[38], selector); - w[50] = hc_byte_perm (w[36], w[37], selector); - w[49] = hc_byte_perm (w[35], w[36], selector); - w[48] = hc_byte_perm (w[34], w[35], selector); - w[47] = hc_byte_perm (w[33], w[34], selector); - w[46] = hc_byte_perm (w[32], w[33], selector); - w[45] = hc_byte_perm (w[31], w[32], selector); - w[44] = hc_byte_perm (w[30], w[31], selector); - w[43] = hc_byte_perm (w[29], w[30], selector); - w[42] = hc_byte_perm (w[28], w[29], selector); - w[41] = hc_byte_perm (w[27], w[28], selector); - w[40] = hc_byte_perm (w[26], w[27], selector); - w[39] = hc_byte_perm (w[25], w[26], selector); - w[38] = hc_byte_perm (w[24], w[25], selector); - w[37] = hc_byte_perm (w[23], w[24], selector); - w[36] = hc_byte_perm (w[22], w[23], selector); - w[35] = hc_byte_perm (w[21], w[22], selector); - w[34] = hc_byte_perm (w[20], w[21], selector); - w[33] = hc_byte_perm (w[19], w[20], selector); - w[32] = hc_byte_perm (w[18], w[19], selector); - w[31] = hc_byte_perm (w[17], w[18], selector); - w[30] = hc_byte_perm (w[16], w[17], selector); - w[29] = hc_byte_perm (w[15], w[16], selector); - w[28] = hc_byte_perm (w[14], w[15], selector); - w[27] = hc_byte_perm (w[13], w[14], selector); - w[26] = hc_byte_perm (w[12], w[13], selector); - w[25] = hc_byte_perm (w[11], w[12], selector); - w[24] = hc_byte_perm (w[10], w[11], selector); - w[23] = hc_byte_perm (w[ 9], w[10], selector); - w[22] = hc_byte_perm (w[ 8], w[ 9], selector); - w[21] = hc_byte_perm (w[ 7], w[ 8], selector); - w[20] = hc_byte_perm (w[ 6], w[ 7], selector); - w[19] = hc_byte_perm (w[ 5], w[ 6], selector); - w[18] = hc_byte_perm (w[ 4], w[ 5], selector); - w[17] = hc_byte_perm (w[ 3], w[ 4], selector); - w[16] = hc_byte_perm (w[ 2], w[ 3], selector); - w[15] = hc_byte_perm (w[ 1], w[ 2], selector); - w[14] = hc_byte_perm (w[ 0], w[ 1], selector); - w[13] = hc_byte_perm ( 0, w[ 0], selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm (w[48], w[49], selector); - w[62] = hc_byte_perm (w[47], w[48], selector); - w[61] = hc_byte_perm (w[46], w[47], selector); - w[60] = hc_byte_perm (w[45], w[46], selector); - w[59] = hc_byte_perm (w[44], w[45], selector); - w[58] = hc_byte_perm (w[43], w[44], selector); - w[57] = hc_byte_perm (w[42], w[43], selector); - w[56] = hc_byte_perm (w[41], w[42], selector); - w[55] = hc_byte_perm (w[40], w[41], selector); - w[54] = hc_byte_perm (w[39], w[40], selector); - w[53] = hc_byte_perm (w[38], w[39], selector); - w[52] = hc_byte_perm (w[37], w[38], selector); - w[51] = hc_byte_perm (w[36], w[37], selector); - w[50] = hc_byte_perm (w[35], w[36], selector); - w[49] = hc_byte_perm (w[34], w[35], selector); - w[48] = hc_byte_perm (w[33], w[34], selector); - w[47] = hc_byte_perm (w[32], w[33], selector); - w[46] = hc_byte_perm (w[31], w[32], selector); - w[45] = hc_byte_perm (w[30], w[31], selector); - w[44] = hc_byte_perm (w[29], w[30], selector); - w[43] = hc_byte_perm (w[28], w[29], selector); - w[42] = hc_byte_perm (w[27], w[28], selector); - w[41] = hc_byte_perm (w[26], w[27], selector); - w[40] = hc_byte_perm (w[25], w[26], selector); - w[39] = hc_byte_perm (w[24], w[25], selector); - w[38] = hc_byte_perm (w[23], w[24], selector); - w[37] = hc_byte_perm (w[22], w[23], selector); - w[36] = hc_byte_perm (w[21], w[22], selector); - w[35] = hc_byte_perm (w[20], w[21], selector); - w[34] = hc_byte_perm (w[19], w[20], selector); - w[33] = hc_byte_perm (w[18], w[19], selector); - w[32] = hc_byte_perm (w[17], w[18], selector); - w[31] = hc_byte_perm (w[16], w[17], selector); - w[30] = hc_byte_perm (w[15], w[16], selector); - w[29] = hc_byte_perm (w[14], w[15], selector); - w[28] = hc_byte_perm (w[13], w[14], selector); - w[27] = hc_byte_perm (w[12], w[13], selector); - w[26] = hc_byte_perm (w[11], w[12], selector); - w[25] = hc_byte_perm (w[10], w[11], selector); - w[24] = hc_byte_perm (w[ 9], w[10], selector); - w[23] = hc_byte_perm (w[ 8], w[ 9], selector); - w[22] = hc_byte_perm (w[ 7], w[ 8], selector); - w[21] = hc_byte_perm (w[ 6], w[ 7], selector); - w[20] = hc_byte_perm (w[ 5], w[ 6], selector); - w[19] = hc_byte_perm (w[ 4], w[ 5], selector); - w[18] = hc_byte_perm (w[ 3], w[ 4], selector); - w[17] = hc_byte_perm (w[ 2], w[ 3], selector); - w[16] = hc_byte_perm (w[ 1], w[ 2], selector); - w[15] = hc_byte_perm (w[ 0], w[ 1], selector); - w[14] = hc_byte_perm ( 0, w[ 0], selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm (w[47], w[48], selector); - w[62] = hc_byte_perm (w[46], w[47], selector); - w[61] = hc_byte_perm (w[45], w[46], selector); - w[60] = hc_byte_perm (w[44], w[45], selector); - w[59] = hc_byte_perm (w[43], w[44], selector); - w[58] = hc_byte_perm (w[42], w[43], selector); - w[57] = hc_byte_perm (w[41], w[42], selector); - w[56] = hc_byte_perm (w[40], w[41], selector); - w[55] = hc_byte_perm (w[39], w[40], selector); - w[54] = hc_byte_perm (w[38], w[39], selector); - w[53] = hc_byte_perm (w[37], w[38], selector); - w[52] = hc_byte_perm (w[36], w[37], selector); - w[51] = hc_byte_perm (w[35], w[36], selector); - w[50] = hc_byte_perm (w[34], w[35], selector); - w[49] = hc_byte_perm (w[33], w[34], selector); - w[48] = hc_byte_perm (w[32], w[33], selector); - w[47] = hc_byte_perm (w[31], w[32], selector); - w[46] = hc_byte_perm (w[30], w[31], selector); - w[45] = hc_byte_perm (w[29], w[30], selector); - w[44] = hc_byte_perm (w[28], w[29], selector); - w[43] = hc_byte_perm (w[27], w[28], selector); - w[42] = hc_byte_perm (w[26], w[27], selector); - w[41] = hc_byte_perm (w[25], w[26], selector); - w[40] = hc_byte_perm (w[24], w[25], selector); - w[39] = hc_byte_perm (w[23], w[24], selector); - w[38] = hc_byte_perm (w[22], w[23], selector); - w[37] = hc_byte_perm (w[21], w[22], selector); - w[36] = hc_byte_perm (w[20], w[21], selector); - w[35] = hc_byte_perm (w[19], w[20], selector); - w[34] = hc_byte_perm (w[18], w[19], selector); - w[33] = hc_byte_perm (w[17], w[18], selector); - w[32] = hc_byte_perm (w[16], w[17], selector); - w[31] = hc_byte_perm (w[15], w[16], selector); - w[30] = hc_byte_perm (w[14], w[15], selector); - w[29] = hc_byte_perm (w[13], w[14], selector); - w[28] = hc_byte_perm (w[12], w[13], selector); - w[27] = hc_byte_perm (w[11], w[12], selector); - w[26] = hc_byte_perm (w[10], w[11], selector); - w[25] = hc_byte_perm (w[ 9], w[10], selector); - w[24] = hc_byte_perm (w[ 8], w[ 9], selector); - w[23] = hc_byte_perm (w[ 7], w[ 8], selector); - w[22] = hc_byte_perm (w[ 6], w[ 7], selector); - w[21] = hc_byte_perm (w[ 5], w[ 6], selector); - w[20] = hc_byte_perm (w[ 4], w[ 5], selector); - w[19] = hc_byte_perm (w[ 3], w[ 4], selector); - w[18] = hc_byte_perm (w[ 2], w[ 3], selector); - w[17] = hc_byte_perm (w[ 1], w[ 2], selector); - w[16] = hc_byte_perm (w[ 0], w[ 1], selector); - w[15] = hc_byte_perm ( 0, w[ 0], selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm (w[46], w[47], selector); - w[62] = hc_byte_perm (w[45], w[46], selector); - w[61] = hc_byte_perm (w[44], w[45], selector); - w[60] = hc_byte_perm (w[43], w[44], selector); - w[59] = hc_byte_perm (w[42], w[43], selector); - w[58] = hc_byte_perm (w[41], w[42], selector); - w[57] = hc_byte_perm (w[40], w[41], selector); - w[56] = hc_byte_perm (w[39], w[40], selector); - w[55] = hc_byte_perm (w[38], w[39], selector); - w[54] = hc_byte_perm (w[37], w[38], selector); - w[53] = hc_byte_perm (w[36], w[37], selector); - w[52] = hc_byte_perm (w[35], w[36], selector); - w[51] = hc_byte_perm (w[34], w[35], selector); - w[50] = hc_byte_perm (w[33], w[34], selector); - w[49] = hc_byte_perm (w[32], w[33], selector); - w[48] = hc_byte_perm (w[31], w[32], selector); - w[47] = hc_byte_perm (w[30], w[31], selector); - w[46] = hc_byte_perm (w[29], w[30], selector); - w[45] = hc_byte_perm (w[28], w[29], selector); - w[44] = hc_byte_perm (w[27], w[28], selector); - w[43] = hc_byte_perm (w[26], w[27], selector); - w[42] = hc_byte_perm (w[25], w[26], selector); - w[41] = hc_byte_perm (w[24], w[25], selector); - w[40] = hc_byte_perm (w[23], w[24], selector); - w[39] = hc_byte_perm (w[22], w[23], selector); - w[38] = hc_byte_perm (w[21], w[22], selector); - w[37] = hc_byte_perm (w[20], w[21], selector); - w[36] = hc_byte_perm (w[19], w[20], selector); - w[35] = hc_byte_perm (w[18], w[19], selector); - w[34] = hc_byte_perm (w[17], w[18], selector); - w[33] = hc_byte_perm (w[16], w[17], selector); - w[32] = hc_byte_perm (w[15], w[16], selector); - w[31] = hc_byte_perm (w[14], w[15], selector); - w[30] = hc_byte_perm (w[13], w[14], selector); - w[29] = hc_byte_perm (w[12], w[13], selector); - w[28] = hc_byte_perm (w[11], w[12], selector); - w[27] = hc_byte_perm (w[10], w[11], selector); - w[26] = hc_byte_perm (w[ 9], w[10], selector); - w[25] = hc_byte_perm (w[ 8], w[ 9], selector); - w[24] = hc_byte_perm (w[ 7], w[ 8], selector); - w[23] = hc_byte_perm (w[ 6], w[ 7], selector); - w[22] = hc_byte_perm (w[ 5], w[ 6], selector); - w[21] = hc_byte_perm (w[ 4], w[ 5], selector); - w[20] = hc_byte_perm (w[ 3], w[ 4], selector); - w[19] = hc_byte_perm (w[ 2], w[ 3], selector); - w[18] = hc_byte_perm (w[ 1], w[ 2], selector); - w[17] = hc_byte_perm (w[ 0], w[ 1], selector); - w[16] = hc_byte_perm ( 0, w[ 0], selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm (w[45], w[46], selector); - w[62] = hc_byte_perm (w[44], w[45], selector); - w[61] = hc_byte_perm (w[43], w[44], selector); - w[60] = hc_byte_perm (w[42], w[43], selector); - w[59] = hc_byte_perm (w[41], w[42], selector); - w[58] = hc_byte_perm (w[40], w[41], selector); - w[57] = hc_byte_perm (w[39], w[40], selector); - w[56] = hc_byte_perm (w[38], w[39], selector); - w[55] = hc_byte_perm (w[37], w[38], selector); - w[54] = hc_byte_perm (w[36], w[37], selector); - w[53] = hc_byte_perm (w[35], w[36], selector); - w[52] = hc_byte_perm (w[34], w[35], selector); - w[51] = hc_byte_perm (w[33], w[34], selector); - w[50] = hc_byte_perm (w[32], w[33], selector); - w[49] = hc_byte_perm (w[31], w[32], selector); - w[48] = hc_byte_perm (w[30], w[31], selector); - w[47] = hc_byte_perm (w[29], w[30], selector); - w[46] = hc_byte_perm (w[28], w[29], selector); - w[45] = hc_byte_perm (w[27], w[28], selector); - w[44] = hc_byte_perm (w[26], w[27], selector); - w[43] = hc_byte_perm (w[25], w[26], selector); - w[42] = hc_byte_perm (w[24], w[25], selector); - w[41] = hc_byte_perm (w[23], w[24], selector); - w[40] = hc_byte_perm (w[22], w[23], selector); - w[39] = hc_byte_perm (w[21], w[22], selector); - w[38] = hc_byte_perm (w[20], w[21], selector); - w[37] = hc_byte_perm (w[19], w[20], selector); - w[36] = hc_byte_perm (w[18], w[19], selector); - w[35] = hc_byte_perm (w[17], w[18], selector); - w[34] = hc_byte_perm (w[16], w[17], selector); - w[33] = hc_byte_perm (w[15], w[16], selector); - w[32] = hc_byte_perm (w[14], w[15], selector); - w[31] = hc_byte_perm (w[13], w[14], selector); - w[30] = hc_byte_perm (w[12], w[13], selector); - w[29] = hc_byte_perm (w[11], w[12], selector); - w[28] = hc_byte_perm (w[10], w[11], selector); - w[27] = hc_byte_perm (w[ 9], w[10], selector); - w[26] = hc_byte_perm (w[ 8], w[ 9], selector); - w[25] = hc_byte_perm (w[ 7], w[ 8], selector); - w[24] = hc_byte_perm (w[ 6], w[ 7], selector); - w[23] = hc_byte_perm (w[ 5], w[ 6], selector); - w[22] = hc_byte_perm (w[ 4], w[ 5], selector); - w[21] = hc_byte_perm (w[ 3], w[ 4], selector); - w[20] = hc_byte_perm (w[ 2], w[ 3], selector); - w[19] = hc_byte_perm (w[ 1], w[ 2], selector); - w[18] = hc_byte_perm (w[ 0], w[ 1], selector); - w[17] = hc_byte_perm ( 0, w[ 0], selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm (w[44], w[45], selector); - w[62] = hc_byte_perm (w[43], w[44], selector); - w[61] = hc_byte_perm (w[42], w[43], selector); - w[60] = hc_byte_perm (w[41], w[42], selector); - w[59] = hc_byte_perm (w[40], w[41], selector); - w[58] = hc_byte_perm (w[39], w[40], selector); - w[57] = hc_byte_perm (w[38], w[39], selector); - w[56] = hc_byte_perm (w[37], w[38], selector); - w[55] = hc_byte_perm (w[36], w[37], selector); - w[54] = hc_byte_perm (w[35], w[36], selector); - w[53] = hc_byte_perm (w[34], w[35], selector); - w[52] = hc_byte_perm (w[33], w[34], selector); - w[51] = hc_byte_perm (w[32], w[33], selector); - w[50] = hc_byte_perm (w[31], w[32], selector); - w[49] = hc_byte_perm (w[30], w[31], selector); - w[48] = hc_byte_perm (w[29], w[30], selector); - w[47] = hc_byte_perm (w[28], w[29], selector); - w[46] = hc_byte_perm (w[27], w[28], selector); - w[45] = hc_byte_perm (w[26], w[27], selector); - w[44] = hc_byte_perm (w[25], w[26], selector); - w[43] = hc_byte_perm (w[24], w[25], selector); - w[42] = hc_byte_perm (w[23], w[24], selector); - w[41] = hc_byte_perm (w[22], w[23], selector); - w[40] = hc_byte_perm (w[21], w[22], selector); - w[39] = hc_byte_perm (w[20], w[21], selector); - w[38] = hc_byte_perm (w[19], w[20], selector); - w[37] = hc_byte_perm (w[18], w[19], selector); - w[36] = hc_byte_perm (w[17], w[18], selector); - w[35] = hc_byte_perm (w[16], w[17], selector); - w[34] = hc_byte_perm (w[15], w[16], selector); - w[33] = hc_byte_perm (w[14], w[15], selector); - w[32] = hc_byte_perm (w[13], w[14], selector); - w[31] = hc_byte_perm (w[12], w[13], selector); - w[30] = hc_byte_perm (w[11], w[12], selector); - w[29] = hc_byte_perm (w[10], w[11], selector); - w[28] = hc_byte_perm (w[ 9], w[10], selector); - w[27] = hc_byte_perm (w[ 8], w[ 9], selector); - w[26] = hc_byte_perm (w[ 7], w[ 8], selector); - w[25] = hc_byte_perm (w[ 6], w[ 7], selector); - w[24] = hc_byte_perm (w[ 5], w[ 6], selector); - w[23] = hc_byte_perm (w[ 4], w[ 5], selector); - w[22] = hc_byte_perm (w[ 3], w[ 4], selector); - w[21] = hc_byte_perm (w[ 2], w[ 3], selector); - w[20] = hc_byte_perm (w[ 1], w[ 2], selector); - w[19] = hc_byte_perm (w[ 0], w[ 1], selector); - w[18] = hc_byte_perm ( 0, w[ 0], selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm (w[43], w[44], selector); - w[62] = hc_byte_perm (w[42], w[43], selector); - w[61] = hc_byte_perm (w[41], w[42], selector); - w[60] = hc_byte_perm (w[40], w[41], selector); - w[59] = hc_byte_perm (w[39], w[40], selector); - w[58] = hc_byte_perm (w[38], w[39], selector); - w[57] = hc_byte_perm (w[37], w[38], selector); - w[56] = hc_byte_perm (w[36], w[37], selector); - w[55] = hc_byte_perm (w[35], w[36], selector); - w[54] = hc_byte_perm (w[34], w[35], selector); - w[53] = hc_byte_perm (w[33], w[34], selector); - w[52] = hc_byte_perm (w[32], w[33], selector); - w[51] = hc_byte_perm (w[31], w[32], selector); - w[50] = hc_byte_perm (w[30], w[31], selector); - w[49] = hc_byte_perm (w[29], w[30], selector); - w[48] = hc_byte_perm (w[28], w[29], selector); - w[47] = hc_byte_perm (w[27], w[28], selector); - w[46] = hc_byte_perm (w[26], w[27], selector); - w[45] = hc_byte_perm (w[25], w[26], selector); - w[44] = hc_byte_perm (w[24], w[25], selector); - w[43] = hc_byte_perm (w[23], w[24], selector); - w[42] = hc_byte_perm (w[22], w[23], selector); - w[41] = hc_byte_perm (w[21], w[22], selector); - w[40] = hc_byte_perm (w[20], w[21], selector); - w[39] = hc_byte_perm (w[19], w[20], selector); - w[38] = hc_byte_perm (w[18], w[19], selector); - w[37] = hc_byte_perm (w[17], w[18], selector); - w[36] = hc_byte_perm (w[16], w[17], selector); - w[35] = hc_byte_perm (w[15], w[16], selector); - w[34] = hc_byte_perm (w[14], w[15], selector); - w[33] = hc_byte_perm (w[13], w[14], selector); - w[32] = hc_byte_perm (w[12], w[13], selector); - w[31] = hc_byte_perm (w[11], w[12], selector); - w[30] = hc_byte_perm (w[10], w[11], selector); - w[29] = hc_byte_perm (w[ 9], w[10], selector); - w[28] = hc_byte_perm (w[ 8], w[ 9], selector); - w[27] = hc_byte_perm (w[ 7], w[ 8], selector); - w[26] = hc_byte_perm (w[ 6], w[ 7], selector); - w[25] = hc_byte_perm (w[ 5], w[ 6], selector); - w[24] = hc_byte_perm (w[ 4], w[ 5], selector); - w[23] = hc_byte_perm (w[ 3], w[ 4], selector); - w[22] = hc_byte_perm (w[ 2], w[ 3], selector); - w[21] = hc_byte_perm (w[ 1], w[ 2], selector); - w[20] = hc_byte_perm (w[ 0], w[ 1], selector); - w[19] = hc_byte_perm ( 0, w[ 0], selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm (w[42], w[43], selector); - w[62] = hc_byte_perm (w[41], w[42], selector); - w[61] = hc_byte_perm (w[40], w[41], selector); - w[60] = hc_byte_perm (w[39], w[40], selector); - w[59] = hc_byte_perm (w[38], w[39], selector); - w[58] = hc_byte_perm (w[37], w[38], selector); - w[57] = hc_byte_perm (w[36], w[37], selector); - w[56] = hc_byte_perm (w[35], w[36], selector); - w[55] = hc_byte_perm (w[34], w[35], selector); - w[54] = hc_byte_perm (w[33], w[34], selector); - w[53] = hc_byte_perm (w[32], w[33], selector); - w[52] = hc_byte_perm (w[31], w[32], selector); - w[51] = hc_byte_perm (w[30], w[31], selector); - w[50] = hc_byte_perm (w[29], w[30], selector); - w[49] = hc_byte_perm (w[28], w[29], selector); - w[48] = hc_byte_perm (w[27], w[28], selector); - w[47] = hc_byte_perm (w[26], w[27], selector); - w[46] = hc_byte_perm (w[25], w[26], selector); - w[45] = hc_byte_perm (w[24], w[25], selector); - w[44] = hc_byte_perm (w[23], w[24], selector); - w[43] = hc_byte_perm (w[22], w[23], selector); - w[42] = hc_byte_perm (w[21], w[22], selector); - w[41] = hc_byte_perm (w[20], w[21], selector); - w[40] = hc_byte_perm (w[19], w[20], selector); - w[39] = hc_byte_perm (w[18], w[19], selector); - w[38] = hc_byte_perm (w[17], w[18], selector); - w[37] = hc_byte_perm (w[16], w[17], selector); - w[36] = hc_byte_perm (w[15], w[16], selector); - w[35] = hc_byte_perm (w[14], w[15], selector); - w[34] = hc_byte_perm (w[13], w[14], selector); - w[33] = hc_byte_perm (w[12], w[13], selector); - w[32] = hc_byte_perm (w[11], w[12], selector); - w[31] = hc_byte_perm (w[10], w[11], selector); - w[30] = hc_byte_perm (w[ 9], w[10], selector); - w[29] = hc_byte_perm (w[ 8], w[ 9], selector); - w[28] = hc_byte_perm (w[ 7], w[ 8], selector); - w[27] = hc_byte_perm (w[ 6], w[ 7], selector); - w[26] = hc_byte_perm (w[ 5], w[ 6], selector); - w[25] = hc_byte_perm (w[ 4], w[ 5], selector); - w[24] = hc_byte_perm (w[ 3], w[ 4], selector); - w[23] = hc_byte_perm (w[ 2], w[ 3], selector); - w[22] = hc_byte_perm (w[ 1], w[ 2], selector); - w[21] = hc_byte_perm (w[ 0], w[ 1], selector); - w[20] = hc_byte_perm ( 0, w[ 0], selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm (w[41], w[42], selector); - w[62] = hc_byte_perm (w[40], w[41], selector); - w[61] = hc_byte_perm (w[39], w[40], selector); - w[60] = hc_byte_perm (w[38], w[39], selector); - w[59] = hc_byte_perm (w[37], w[38], selector); - w[58] = hc_byte_perm (w[36], w[37], selector); - w[57] = hc_byte_perm (w[35], w[36], selector); - w[56] = hc_byte_perm (w[34], w[35], selector); - w[55] = hc_byte_perm (w[33], w[34], selector); - w[54] = hc_byte_perm (w[32], w[33], selector); - w[53] = hc_byte_perm (w[31], w[32], selector); - w[52] = hc_byte_perm (w[30], w[31], selector); - w[51] = hc_byte_perm (w[29], w[30], selector); - w[50] = hc_byte_perm (w[28], w[29], selector); - w[49] = hc_byte_perm (w[27], w[28], selector); - w[48] = hc_byte_perm (w[26], w[27], selector); - w[47] = hc_byte_perm (w[25], w[26], selector); - w[46] = hc_byte_perm (w[24], w[25], selector); - w[45] = hc_byte_perm (w[23], w[24], selector); - w[44] = hc_byte_perm (w[22], w[23], selector); - w[43] = hc_byte_perm (w[21], w[22], selector); - w[42] = hc_byte_perm (w[20], w[21], selector); - w[41] = hc_byte_perm (w[19], w[20], selector); - w[40] = hc_byte_perm (w[18], w[19], selector); - w[39] = hc_byte_perm (w[17], w[18], selector); - w[38] = hc_byte_perm (w[16], w[17], selector); - w[37] = hc_byte_perm (w[15], w[16], selector); - w[36] = hc_byte_perm (w[14], w[15], selector); - w[35] = hc_byte_perm (w[13], w[14], selector); - w[34] = hc_byte_perm (w[12], w[13], selector); - w[33] = hc_byte_perm (w[11], w[12], selector); - w[32] = hc_byte_perm (w[10], w[11], selector); - w[31] = hc_byte_perm (w[ 9], w[10], selector); - w[30] = hc_byte_perm (w[ 8], w[ 9], selector); - w[29] = hc_byte_perm (w[ 7], w[ 8], selector); - w[28] = hc_byte_perm (w[ 6], w[ 7], selector); - w[27] = hc_byte_perm (w[ 5], w[ 6], selector); - w[26] = hc_byte_perm (w[ 4], w[ 5], selector); - w[25] = hc_byte_perm (w[ 3], w[ 4], selector); - w[24] = hc_byte_perm (w[ 2], w[ 3], selector); - w[23] = hc_byte_perm (w[ 1], w[ 2], selector); - w[22] = hc_byte_perm (w[ 0], w[ 1], selector); - w[21] = hc_byte_perm ( 0, w[ 0], selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm (w[40], w[41], selector); - w[62] = hc_byte_perm (w[39], w[40], selector); - w[61] = hc_byte_perm (w[38], w[39], selector); - w[60] = hc_byte_perm (w[37], w[38], selector); - w[59] = hc_byte_perm (w[36], w[37], selector); - w[58] = hc_byte_perm (w[35], w[36], selector); - w[57] = hc_byte_perm (w[34], w[35], selector); - w[56] = hc_byte_perm (w[33], w[34], selector); - w[55] = hc_byte_perm (w[32], w[33], selector); - w[54] = hc_byte_perm (w[31], w[32], selector); - w[53] = hc_byte_perm (w[30], w[31], selector); - w[52] = hc_byte_perm (w[29], w[30], selector); - w[51] = hc_byte_perm (w[28], w[29], selector); - w[50] = hc_byte_perm (w[27], w[28], selector); - w[49] = hc_byte_perm (w[26], w[27], selector); - w[48] = hc_byte_perm (w[25], w[26], selector); - w[47] = hc_byte_perm (w[24], w[25], selector); - w[46] = hc_byte_perm (w[23], w[24], selector); - w[45] = hc_byte_perm (w[22], w[23], selector); - w[44] = hc_byte_perm (w[21], w[22], selector); - w[43] = hc_byte_perm (w[20], w[21], selector); - w[42] = hc_byte_perm (w[19], w[20], selector); - w[41] = hc_byte_perm (w[18], w[19], selector); - w[40] = hc_byte_perm (w[17], w[18], selector); - w[39] = hc_byte_perm (w[16], w[17], selector); - w[38] = hc_byte_perm (w[15], w[16], selector); - w[37] = hc_byte_perm (w[14], w[15], selector); - w[36] = hc_byte_perm (w[13], w[14], selector); - w[35] = hc_byte_perm (w[12], w[13], selector); - w[34] = hc_byte_perm (w[11], w[12], selector); - w[33] = hc_byte_perm (w[10], w[11], selector); - w[32] = hc_byte_perm (w[ 9], w[10], selector); - w[31] = hc_byte_perm (w[ 8], w[ 9], selector); - w[30] = hc_byte_perm (w[ 7], w[ 8], selector); - w[29] = hc_byte_perm (w[ 6], w[ 7], selector); - w[28] = hc_byte_perm (w[ 5], w[ 6], selector); - w[27] = hc_byte_perm (w[ 4], w[ 5], selector); - w[26] = hc_byte_perm (w[ 3], w[ 4], selector); - w[25] = hc_byte_perm (w[ 2], w[ 3], selector); - w[24] = hc_byte_perm (w[ 1], w[ 2], selector); - w[23] = hc_byte_perm (w[ 0], w[ 1], selector); - w[22] = hc_byte_perm ( 0, w[ 0], selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm (w[39], w[40], selector); - w[62] = hc_byte_perm (w[38], w[39], selector); - w[61] = hc_byte_perm (w[37], w[38], selector); - w[60] = hc_byte_perm (w[36], w[37], selector); - w[59] = hc_byte_perm (w[35], w[36], selector); - w[58] = hc_byte_perm (w[34], w[35], selector); - w[57] = hc_byte_perm (w[33], w[34], selector); - w[56] = hc_byte_perm (w[32], w[33], selector); - w[55] = hc_byte_perm (w[31], w[32], selector); - w[54] = hc_byte_perm (w[30], w[31], selector); - w[53] = hc_byte_perm (w[29], w[30], selector); - w[52] = hc_byte_perm (w[28], w[29], selector); - w[51] = hc_byte_perm (w[27], w[28], selector); - w[50] = hc_byte_perm (w[26], w[27], selector); - w[49] = hc_byte_perm (w[25], w[26], selector); - w[48] = hc_byte_perm (w[24], w[25], selector); - w[47] = hc_byte_perm (w[23], w[24], selector); - w[46] = hc_byte_perm (w[22], w[23], selector); - w[45] = hc_byte_perm (w[21], w[22], selector); - w[44] = hc_byte_perm (w[20], w[21], selector); - w[43] = hc_byte_perm (w[19], w[20], selector); - w[42] = hc_byte_perm (w[18], w[19], selector); - w[41] = hc_byte_perm (w[17], w[18], selector); - w[40] = hc_byte_perm (w[16], w[17], selector); - w[39] = hc_byte_perm (w[15], w[16], selector); - w[38] = hc_byte_perm (w[14], w[15], selector); - w[37] = hc_byte_perm (w[13], w[14], selector); - w[36] = hc_byte_perm (w[12], w[13], selector); - w[35] = hc_byte_perm (w[11], w[12], selector); - w[34] = hc_byte_perm (w[10], w[11], selector); - w[33] = hc_byte_perm (w[ 9], w[10], selector); - w[32] = hc_byte_perm (w[ 8], w[ 9], selector); - w[31] = hc_byte_perm (w[ 7], w[ 8], selector); - w[30] = hc_byte_perm (w[ 6], w[ 7], selector); - w[29] = hc_byte_perm (w[ 5], w[ 6], selector); - w[28] = hc_byte_perm (w[ 4], w[ 5], selector); - w[27] = hc_byte_perm (w[ 3], w[ 4], selector); - w[26] = hc_byte_perm (w[ 2], w[ 3], selector); - w[25] = hc_byte_perm (w[ 1], w[ 2], selector); - w[24] = hc_byte_perm (w[ 0], w[ 1], selector); - w[23] = hc_byte_perm ( 0, w[ 0], selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm (w[38], w[39], selector); - w[62] = hc_byte_perm (w[37], w[38], selector); - w[61] = hc_byte_perm (w[36], w[37], selector); - w[60] = hc_byte_perm (w[35], w[36], selector); - w[59] = hc_byte_perm (w[34], w[35], selector); - w[58] = hc_byte_perm (w[33], w[34], selector); - w[57] = hc_byte_perm (w[32], w[33], selector); - w[56] = hc_byte_perm (w[31], w[32], selector); - w[55] = hc_byte_perm (w[30], w[31], selector); - w[54] = hc_byte_perm (w[29], w[30], selector); - w[53] = hc_byte_perm (w[28], w[29], selector); - w[52] = hc_byte_perm (w[27], w[28], selector); - w[51] = hc_byte_perm (w[26], w[27], selector); - w[50] = hc_byte_perm (w[25], w[26], selector); - w[49] = hc_byte_perm (w[24], w[25], selector); - w[48] = hc_byte_perm (w[23], w[24], selector); - w[47] = hc_byte_perm (w[22], w[23], selector); - w[46] = hc_byte_perm (w[21], w[22], selector); - w[45] = hc_byte_perm (w[20], w[21], selector); - w[44] = hc_byte_perm (w[19], w[20], selector); - w[43] = hc_byte_perm (w[18], w[19], selector); - w[42] = hc_byte_perm (w[17], w[18], selector); - w[41] = hc_byte_perm (w[16], w[17], selector); - w[40] = hc_byte_perm (w[15], w[16], selector); - w[39] = hc_byte_perm (w[14], w[15], selector); - w[38] = hc_byte_perm (w[13], w[14], selector); - w[37] = hc_byte_perm (w[12], w[13], selector); - w[36] = hc_byte_perm (w[11], w[12], selector); - w[35] = hc_byte_perm (w[10], w[11], selector); - w[34] = hc_byte_perm (w[ 9], w[10], selector); - w[33] = hc_byte_perm (w[ 8], w[ 9], selector); - w[32] = hc_byte_perm (w[ 7], w[ 8], selector); - w[31] = hc_byte_perm (w[ 6], w[ 7], selector); - w[30] = hc_byte_perm (w[ 5], w[ 6], selector); - w[29] = hc_byte_perm (w[ 4], w[ 5], selector); - w[28] = hc_byte_perm (w[ 3], w[ 4], selector); - w[27] = hc_byte_perm (w[ 2], w[ 3], selector); - w[26] = hc_byte_perm (w[ 1], w[ 2], selector); - w[25] = hc_byte_perm (w[ 0], w[ 1], selector); - w[24] = hc_byte_perm ( 0, w[ 0], selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm (w[37], w[38], selector); - w[62] = hc_byte_perm (w[36], w[37], selector); - w[61] = hc_byte_perm (w[35], w[36], selector); - w[60] = hc_byte_perm (w[34], w[35], selector); - w[59] = hc_byte_perm (w[33], w[34], selector); - w[58] = hc_byte_perm (w[32], w[33], selector); - w[57] = hc_byte_perm (w[31], w[32], selector); - w[56] = hc_byte_perm (w[30], w[31], selector); - w[55] = hc_byte_perm (w[29], w[30], selector); - w[54] = hc_byte_perm (w[28], w[29], selector); - w[53] = hc_byte_perm (w[27], w[28], selector); - w[52] = hc_byte_perm (w[26], w[27], selector); - w[51] = hc_byte_perm (w[25], w[26], selector); - w[50] = hc_byte_perm (w[24], w[25], selector); - w[49] = hc_byte_perm (w[23], w[24], selector); - w[48] = hc_byte_perm (w[22], w[23], selector); - w[47] = hc_byte_perm (w[21], w[22], selector); - w[46] = hc_byte_perm (w[20], w[21], selector); - w[45] = hc_byte_perm (w[19], w[20], selector); - w[44] = hc_byte_perm (w[18], w[19], selector); - w[43] = hc_byte_perm (w[17], w[18], selector); - w[42] = hc_byte_perm (w[16], w[17], selector); - w[41] = hc_byte_perm (w[15], w[16], selector); - w[40] = hc_byte_perm (w[14], w[15], selector); - w[39] = hc_byte_perm (w[13], w[14], selector); - w[38] = hc_byte_perm (w[12], w[13], selector); - w[37] = hc_byte_perm (w[11], w[12], selector); - w[36] = hc_byte_perm (w[10], w[11], selector); - w[35] = hc_byte_perm (w[ 9], w[10], selector); - w[34] = hc_byte_perm (w[ 8], w[ 9], selector); - w[33] = hc_byte_perm (w[ 7], w[ 8], selector); - w[32] = hc_byte_perm (w[ 6], w[ 7], selector); - w[31] = hc_byte_perm (w[ 5], w[ 6], selector); - w[30] = hc_byte_perm (w[ 4], w[ 5], selector); - w[29] = hc_byte_perm (w[ 3], w[ 4], selector); - w[28] = hc_byte_perm (w[ 2], w[ 3], selector); - w[27] = hc_byte_perm (w[ 1], w[ 2], selector); - w[26] = hc_byte_perm (w[ 0], w[ 1], selector); - w[25] = hc_byte_perm ( 0, w[ 0], selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm (w[36], w[37], selector); - w[62] = hc_byte_perm (w[35], w[36], selector); - w[61] = hc_byte_perm (w[34], w[35], selector); - w[60] = hc_byte_perm (w[33], w[34], selector); - w[59] = hc_byte_perm (w[32], w[33], selector); - w[58] = hc_byte_perm (w[31], w[32], selector); - w[57] = hc_byte_perm (w[30], w[31], selector); - w[56] = hc_byte_perm (w[29], w[30], selector); - w[55] = hc_byte_perm (w[28], w[29], selector); - w[54] = hc_byte_perm (w[27], w[28], selector); - w[53] = hc_byte_perm (w[26], w[27], selector); - w[52] = hc_byte_perm (w[25], w[26], selector); - w[51] = hc_byte_perm (w[24], w[25], selector); - w[50] = hc_byte_perm (w[23], w[24], selector); - w[49] = hc_byte_perm (w[22], w[23], selector); - w[48] = hc_byte_perm (w[21], w[22], selector); - w[47] = hc_byte_perm (w[20], w[21], selector); - w[46] = hc_byte_perm (w[19], w[20], selector); - w[45] = hc_byte_perm (w[18], w[19], selector); - w[44] = hc_byte_perm (w[17], w[18], selector); - w[43] = hc_byte_perm (w[16], w[17], selector); - w[42] = hc_byte_perm (w[15], w[16], selector); - w[41] = hc_byte_perm (w[14], w[15], selector); - w[40] = hc_byte_perm (w[13], w[14], selector); - w[39] = hc_byte_perm (w[12], w[13], selector); - w[38] = hc_byte_perm (w[11], w[12], selector); - w[37] = hc_byte_perm (w[10], w[11], selector); - w[36] = hc_byte_perm (w[ 9], w[10], selector); - w[35] = hc_byte_perm (w[ 8], w[ 9], selector); - w[34] = hc_byte_perm (w[ 7], w[ 8], selector); - w[33] = hc_byte_perm (w[ 6], w[ 7], selector); - w[32] = hc_byte_perm (w[ 5], w[ 6], selector); - w[31] = hc_byte_perm (w[ 4], w[ 5], selector); - w[30] = hc_byte_perm (w[ 3], w[ 4], selector); - w[29] = hc_byte_perm (w[ 2], w[ 3], selector); - w[28] = hc_byte_perm (w[ 1], w[ 2], selector); - w[27] = hc_byte_perm (w[ 0], w[ 1], selector); - w[26] = hc_byte_perm ( 0, w[ 0], selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm (w[35], w[36], selector); - w[62] = hc_byte_perm (w[34], w[35], selector); - w[61] = hc_byte_perm (w[33], w[34], selector); - w[60] = hc_byte_perm (w[32], w[33], selector); - w[59] = hc_byte_perm (w[31], w[32], selector); - w[58] = hc_byte_perm (w[30], w[31], selector); - w[57] = hc_byte_perm (w[29], w[30], selector); - w[56] = hc_byte_perm (w[28], w[29], selector); - w[55] = hc_byte_perm (w[27], w[28], selector); - w[54] = hc_byte_perm (w[26], w[27], selector); - w[53] = hc_byte_perm (w[25], w[26], selector); - w[52] = hc_byte_perm (w[24], w[25], selector); - w[51] = hc_byte_perm (w[23], w[24], selector); - w[50] = hc_byte_perm (w[22], w[23], selector); - w[49] = hc_byte_perm (w[21], w[22], selector); - w[48] = hc_byte_perm (w[20], w[21], selector); - w[47] = hc_byte_perm (w[19], w[20], selector); - w[46] = hc_byte_perm (w[18], w[19], selector); - w[45] = hc_byte_perm (w[17], w[18], selector); - w[44] = hc_byte_perm (w[16], w[17], selector); - w[43] = hc_byte_perm (w[15], w[16], selector); - w[42] = hc_byte_perm (w[14], w[15], selector); - w[41] = hc_byte_perm (w[13], w[14], selector); - w[40] = hc_byte_perm (w[12], w[13], selector); - w[39] = hc_byte_perm (w[11], w[12], selector); - w[38] = hc_byte_perm (w[10], w[11], selector); - w[37] = hc_byte_perm (w[ 9], w[10], selector); - w[36] = hc_byte_perm (w[ 8], w[ 9], selector); - w[35] = hc_byte_perm (w[ 7], w[ 8], selector); - w[34] = hc_byte_perm (w[ 6], w[ 7], selector); - w[33] = hc_byte_perm (w[ 5], w[ 6], selector); - w[32] = hc_byte_perm (w[ 4], w[ 5], selector); - w[31] = hc_byte_perm (w[ 3], w[ 4], selector); - w[30] = hc_byte_perm (w[ 2], w[ 3], selector); - w[29] = hc_byte_perm (w[ 1], w[ 2], selector); - w[28] = hc_byte_perm (w[ 0], w[ 1], selector); - w[27] = hc_byte_perm ( 0, w[ 0], selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm (w[34], w[35], selector); - w[62] = hc_byte_perm (w[33], w[34], selector); - w[61] = hc_byte_perm (w[32], w[33], selector); - w[60] = hc_byte_perm (w[31], w[32], selector); - w[59] = hc_byte_perm (w[30], w[31], selector); - w[58] = hc_byte_perm (w[29], w[30], selector); - w[57] = hc_byte_perm (w[28], w[29], selector); - w[56] = hc_byte_perm (w[27], w[28], selector); - w[55] = hc_byte_perm (w[26], w[27], selector); - w[54] = hc_byte_perm (w[25], w[26], selector); - w[53] = hc_byte_perm (w[24], w[25], selector); - w[52] = hc_byte_perm (w[23], w[24], selector); - w[51] = hc_byte_perm (w[22], w[23], selector); - w[50] = hc_byte_perm (w[21], w[22], selector); - w[49] = hc_byte_perm (w[20], w[21], selector); - w[48] = hc_byte_perm (w[19], w[20], selector); - w[47] = hc_byte_perm (w[18], w[19], selector); - w[46] = hc_byte_perm (w[17], w[18], selector); - w[45] = hc_byte_perm (w[16], w[17], selector); - w[44] = hc_byte_perm (w[15], w[16], selector); - w[43] = hc_byte_perm (w[14], w[15], selector); - w[42] = hc_byte_perm (w[13], w[14], selector); - w[41] = hc_byte_perm (w[12], w[13], selector); - w[40] = hc_byte_perm (w[11], w[12], selector); - w[39] = hc_byte_perm (w[10], w[11], selector); - w[38] = hc_byte_perm (w[ 9], w[10], selector); - w[37] = hc_byte_perm (w[ 8], w[ 9], selector); - w[36] = hc_byte_perm (w[ 7], w[ 8], selector); - w[35] = hc_byte_perm (w[ 6], w[ 7], selector); - w[34] = hc_byte_perm (w[ 5], w[ 6], selector); - w[33] = hc_byte_perm (w[ 4], w[ 5], selector); - w[32] = hc_byte_perm (w[ 3], w[ 4], selector); - w[31] = hc_byte_perm (w[ 2], w[ 3], selector); - w[30] = hc_byte_perm (w[ 1], w[ 2], selector); - w[29] = hc_byte_perm (w[ 0], w[ 1], selector); - w[28] = hc_byte_perm ( 0, w[ 0], selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm (w[33], w[34], selector); - w[62] = hc_byte_perm (w[32], w[33], selector); - w[61] = hc_byte_perm (w[31], w[32], selector); - w[60] = hc_byte_perm (w[30], w[31], selector); - w[59] = hc_byte_perm (w[29], w[30], selector); - w[58] = hc_byte_perm (w[28], w[29], selector); - w[57] = hc_byte_perm (w[27], w[28], selector); - w[56] = hc_byte_perm (w[26], w[27], selector); - w[55] = hc_byte_perm (w[25], w[26], selector); - w[54] = hc_byte_perm (w[24], w[25], selector); - w[53] = hc_byte_perm (w[23], w[24], selector); - w[52] = hc_byte_perm (w[22], w[23], selector); - w[51] = hc_byte_perm (w[21], w[22], selector); - w[50] = hc_byte_perm (w[20], w[21], selector); - w[49] = hc_byte_perm (w[19], w[20], selector); - w[48] = hc_byte_perm (w[18], w[19], selector); - w[47] = hc_byte_perm (w[17], w[18], selector); - w[46] = hc_byte_perm (w[16], w[17], selector); - w[45] = hc_byte_perm (w[15], w[16], selector); - w[44] = hc_byte_perm (w[14], w[15], selector); - w[43] = hc_byte_perm (w[13], w[14], selector); - w[42] = hc_byte_perm (w[12], w[13], selector); - w[41] = hc_byte_perm (w[11], w[12], selector); - w[40] = hc_byte_perm (w[10], w[11], selector); - w[39] = hc_byte_perm (w[ 9], w[10], selector); - w[38] = hc_byte_perm (w[ 8], w[ 9], selector); - w[37] = hc_byte_perm (w[ 7], w[ 8], selector); - w[36] = hc_byte_perm (w[ 6], w[ 7], selector); - w[35] = hc_byte_perm (w[ 5], w[ 6], selector); - w[34] = hc_byte_perm (w[ 4], w[ 5], selector); - w[33] = hc_byte_perm (w[ 3], w[ 4], selector); - w[32] = hc_byte_perm (w[ 2], w[ 3], selector); - w[31] = hc_byte_perm (w[ 1], w[ 2], selector); - w[30] = hc_byte_perm (w[ 0], w[ 1], selector); - w[29] = hc_byte_perm ( 0, w[ 0], selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm (w[32], w[33], selector); - w[62] = hc_byte_perm (w[31], w[32], selector); - w[61] = hc_byte_perm (w[30], w[31], selector); - w[60] = hc_byte_perm (w[29], w[30], selector); - w[59] = hc_byte_perm (w[28], w[29], selector); - w[58] = hc_byte_perm (w[27], w[28], selector); - w[57] = hc_byte_perm (w[26], w[27], selector); - w[56] = hc_byte_perm (w[25], w[26], selector); - w[55] = hc_byte_perm (w[24], w[25], selector); - w[54] = hc_byte_perm (w[23], w[24], selector); - w[53] = hc_byte_perm (w[22], w[23], selector); - w[52] = hc_byte_perm (w[21], w[22], selector); - w[51] = hc_byte_perm (w[20], w[21], selector); - w[50] = hc_byte_perm (w[19], w[20], selector); - w[49] = hc_byte_perm (w[18], w[19], selector); - w[48] = hc_byte_perm (w[17], w[18], selector); - w[47] = hc_byte_perm (w[16], w[17], selector); - w[46] = hc_byte_perm (w[15], w[16], selector); - w[45] = hc_byte_perm (w[14], w[15], selector); - w[44] = hc_byte_perm (w[13], w[14], selector); - w[43] = hc_byte_perm (w[12], w[13], selector); - w[42] = hc_byte_perm (w[11], w[12], selector); - w[41] = hc_byte_perm (w[10], w[11], selector); - w[40] = hc_byte_perm (w[ 9], w[10], selector); - w[39] = hc_byte_perm (w[ 8], w[ 9], selector); - w[38] = hc_byte_perm (w[ 7], w[ 8], selector); - w[37] = hc_byte_perm (w[ 6], w[ 7], selector); - w[36] = hc_byte_perm (w[ 5], w[ 6], selector); - w[35] = hc_byte_perm (w[ 4], w[ 5], selector); - w[34] = hc_byte_perm (w[ 3], w[ 4], selector); - w[33] = hc_byte_perm (w[ 2], w[ 3], selector); - w[32] = hc_byte_perm (w[ 1], w[ 2], selector); - w[31] = hc_byte_perm (w[ 0], w[ 1], selector); - w[30] = hc_byte_perm ( 0, w[ 0], selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm (w[31], w[32], selector); - w[62] = hc_byte_perm (w[30], w[31], selector); - w[61] = hc_byte_perm (w[29], w[30], selector); - w[60] = hc_byte_perm (w[28], w[29], selector); - w[59] = hc_byte_perm (w[27], w[28], selector); - w[58] = hc_byte_perm (w[26], w[27], selector); - w[57] = hc_byte_perm (w[25], w[26], selector); - w[56] = hc_byte_perm (w[24], w[25], selector); - w[55] = hc_byte_perm (w[23], w[24], selector); - w[54] = hc_byte_perm (w[22], w[23], selector); - w[53] = hc_byte_perm (w[21], w[22], selector); - w[52] = hc_byte_perm (w[20], w[21], selector); - w[51] = hc_byte_perm (w[19], w[20], selector); - w[50] = hc_byte_perm (w[18], w[19], selector); - w[49] = hc_byte_perm (w[17], w[18], selector); - w[48] = hc_byte_perm (w[16], w[17], selector); - w[47] = hc_byte_perm (w[15], w[16], selector); - w[46] = hc_byte_perm (w[14], w[15], selector); - w[45] = hc_byte_perm (w[13], w[14], selector); - w[44] = hc_byte_perm (w[12], w[13], selector); - w[43] = hc_byte_perm (w[11], w[12], selector); - w[42] = hc_byte_perm (w[10], w[11], selector); - w[41] = hc_byte_perm (w[ 9], w[10], selector); - w[40] = hc_byte_perm (w[ 8], w[ 9], selector); - w[39] = hc_byte_perm (w[ 7], w[ 8], selector); - w[38] = hc_byte_perm (w[ 6], w[ 7], selector); - w[37] = hc_byte_perm (w[ 5], w[ 6], selector); - w[36] = hc_byte_perm (w[ 4], w[ 5], selector); - w[35] = hc_byte_perm (w[ 3], w[ 4], selector); - w[34] = hc_byte_perm (w[ 2], w[ 3], selector); - w[33] = hc_byte_perm (w[ 1], w[ 2], selector); - w[32] = hc_byte_perm (w[ 0], w[ 1], selector); - w[31] = hc_byte_perm ( 0, w[ 0], selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm (w[30], w[31], selector); - w[62] = hc_byte_perm (w[29], w[30], selector); - w[61] = hc_byte_perm (w[28], w[29], selector); - w[60] = hc_byte_perm (w[27], w[28], selector); - w[59] = hc_byte_perm (w[26], w[27], selector); - w[58] = hc_byte_perm (w[25], w[26], selector); - w[57] = hc_byte_perm (w[24], w[25], selector); - w[56] = hc_byte_perm (w[23], w[24], selector); - w[55] = hc_byte_perm (w[22], w[23], selector); - w[54] = hc_byte_perm (w[21], w[22], selector); - w[53] = hc_byte_perm (w[20], w[21], selector); - w[52] = hc_byte_perm (w[19], w[20], selector); - w[51] = hc_byte_perm (w[18], w[19], selector); - w[50] = hc_byte_perm (w[17], w[18], selector); - w[49] = hc_byte_perm (w[16], w[17], selector); - w[48] = hc_byte_perm (w[15], w[16], selector); - w[47] = hc_byte_perm (w[14], w[15], selector); - w[46] = hc_byte_perm (w[13], w[14], selector); - w[45] = hc_byte_perm (w[12], w[13], selector); - w[44] = hc_byte_perm (w[11], w[12], selector); - w[43] = hc_byte_perm (w[10], w[11], selector); - w[42] = hc_byte_perm (w[ 9], w[10], selector); - w[41] = hc_byte_perm (w[ 8], w[ 9], selector); - w[40] = hc_byte_perm (w[ 7], w[ 8], selector); - w[39] = hc_byte_perm (w[ 6], w[ 7], selector); - w[38] = hc_byte_perm (w[ 5], w[ 6], selector); - w[37] = hc_byte_perm (w[ 4], w[ 5], selector); - w[36] = hc_byte_perm (w[ 3], w[ 4], selector); - w[35] = hc_byte_perm (w[ 2], w[ 3], selector); - w[34] = hc_byte_perm (w[ 1], w[ 2], selector); - w[33] = hc_byte_perm (w[ 0], w[ 1], selector); - w[32] = hc_byte_perm ( 0, w[ 0], selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm (w[29], w[30], selector); - w[62] = hc_byte_perm (w[28], w[29], selector); - w[61] = hc_byte_perm (w[27], w[28], selector); - w[60] = hc_byte_perm (w[26], w[27], selector); - w[59] = hc_byte_perm (w[25], w[26], selector); - w[58] = hc_byte_perm (w[24], w[25], selector); - w[57] = hc_byte_perm (w[23], w[24], selector); - w[56] = hc_byte_perm (w[22], w[23], selector); - w[55] = hc_byte_perm (w[21], w[22], selector); - w[54] = hc_byte_perm (w[20], w[21], selector); - w[53] = hc_byte_perm (w[19], w[20], selector); - w[52] = hc_byte_perm (w[18], w[19], selector); - w[51] = hc_byte_perm (w[17], w[18], selector); - w[50] = hc_byte_perm (w[16], w[17], selector); - w[49] = hc_byte_perm (w[15], w[16], selector); - w[48] = hc_byte_perm (w[14], w[15], selector); - w[47] = hc_byte_perm (w[13], w[14], selector); - w[46] = hc_byte_perm (w[12], w[13], selector); - w[45] = hc_byte_perm (w[11], w[12], selector); - w[44] = hc_byte_perm (w[10], w[11], selector); - w[43] = hc_byte_perm (w[ 9], w[10], selector); - w[42] = hc_byte_perm (w[ 8], w[ 9], selector); - w[41] = hc_byte_perm (w[ 7], w[ 8], selector); - w[40] = hc_byte_perm (w[ 6], w[ 7], selector); - w[39] = hc_byte_perm (w[ 5], w[ 6], selector); - w[38] = hc_byte_perm (w[ 4], w[ 5], selector); - w[37] = hc_byte_perm (w[ 3], w[ 4], selector); - w[36] = hc_byte_perm (w[ 2], w[ 3], selector); - w[35] = hc_byte_perm (w[ 1], w[ 2], selector); - w[34] = hc_byte_perm (w[ 0], w[ 1], selector); - w[33] = hc_byte_perm ( 0, w[ 0], selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm (w[28], w[29], selector); - w[62] = hc_byte_perm (w[27], w[28], selector); - w[61] = hc_byte_perm (w[26], w[27], selector); - w[60] = hc_byte_perm (w[25], w[26], selector); - w[59] = hc_byte_perm (w[24], w[25], selector); - w[58] = hc_byte_perm (w[23], w[24], selector); - w[57] = hc_byte_perm (w[22], w[23], selector); - w[56] = hc_byte_perm (w[21], w[22], selector); - w[55] = hc_byte_perm (w[20], w[21], selector); - w[54] = hc_byte_perm (w[19], w[20], selector); - w[53] = hc_byte_perm (w[18], w[19], selector); - w[52] = hc_byte_perm (w[17], w[18], selector); - w[51] = hc_byte_perm (w[16], w[17], selector); - w[50] = hc_byte_perm (w[15], w[16], selector); - w[49] = hc_byte_perm (w[14], w[15], selector); - w[48] = hc_byte_perm (w[13], w[14], selector); - w[47] = hc_byte_perm (w[12], w[13], selector); - w[46] = hc_byte_perm (w[11], w[12], selector); - w[45] = hc_byte_perm (w[10], w[11], selector); - w[44] = hc_byte_perm (w[ 9], w[10], selector); - w[43] = hc_byte_perm (w[ 8], w[ 9], selector); - w[42] = hc_byte_perm (w[ 7], w[ 8], selector); - w[41] = hc_byte_perm (w[ 6], w[ 7], selector); - w[40] = hc_byte_perm (w[ 5], w[ 6], selector); - w[39] = hc_byte_perm (w[ 4], w[ 5], selector); - w[38] = hc_byte_perm (w[ 3], w[ 4], selector); - w[37] = hc_byte_perm (w[ 2], w[ 3], selector); - w[36] = hc_byte_perm (w[ 1], w[ 2], selector); - w[35] = hc_byte_perm (w[ 0], w[ 1], selector); - w[34] = hc_byte_perm ( 0, w[ 0], selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm (w[27], w[28], selector); - w[62] = hc_byte_perm (w[26], w[27], selector); - w[61] = hc_byte_perm (w[25], w[26], selector); - w[60] = hc_byte_perm (w[24], w[25], selector); - w[59] = hc_byte_perm (w[23], w[24], selector); - w[58] = hc_byte_perm (w[22], w[23], selector); - w[57] = hc_byte_perm (w[21], w[22], selector); - w[56] = hc_byte_perm (w[20], w[21], selector); - w[55] = hc_byte_perm (w[19], w[20], selector); - w[54] = hc_byte_perm (w[18], w[19], selector); - w[53] = hc_byte_perm (w[17], w[18], selector); - w[52] = hc_byte_perm (w[16], w[17], selector); - w[51] = hc_byte_perm (w[15], w[16], selector); - w[50] = hc_byte_perm (w[14], w[15], selector); - w[49] = hc_byte_perm (w[13], w[14], selector); - w[48] = hc_byte_perm (w[12], w[13], selector); - w[47] = hc_byte_perm (w[11], w[12], selector); - w[46] = hc_byte_perm (w[10], w[11], selector); - w[45] = hc_byte_perm (w[ 9], w[10], selector); - w[44] = hc_byte_perm (w[ 8], w[ 9], selector); - w[43] = hc_byte_perm (w[ 7], w[ 8], selector); - w[42] = hc_byte_perm (w[ 6], w[ 7], selector); - w[41] = hc_byte_perm (w[ 5], w[ 6], selector); - w[40] = hc_byte_perm (w[ 4], w[ 5], selector); - w[39] = hc_byte_perm (w[ 3], w[ 4], selector); - w[38] = hc_byte_perm (w[ 2], w[ 3], selector); - w[37] = hc_byte_perm (w[ 1], w[ 2], selector); - w[36] = hc_byte_perm (w[ 0], w[ 1], selector); - w[35] = hc_byte_perm ( 0, w[ 0], selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm (w[26], w[27], selector); - w[62] = hc_byte_perm (w[25], w[26], selector); - w[61] = hc_byte_perm (w[24], w[25], selector); - w[60] = hc_byte_perm (w[23], w[24], selector); - w[59] = hc_byte_perm (w[22], w[23], selector); - w[58] = hc_byte_perm (w[21], w[22], selector); - w[57] = hc_byte_perm (w[20], w[21], selector); - w[56] = hc_byte_perm (w[19], w[20], selector); - w[55] = hc_byte_perm (w[18], w[19], selector); - w[54] = hc_byte_perm (w[17], w[18], selector); - w[53] = hc_byte_perm (w[16], w[17], selector); - w[52] = hc_byte_perm (w[15], w[16], selector); - w[51] = hc_byte_perm (w[14], w[15], selector); - w[50] = hc_byte_perm (w[13], w[14], selector); - w[49] = hc_byte_perm (w[12], w[13], selector); - w[48] = hc_byte_perm (w[11], w[12], selector); - w[47] = hc_byte_perm (w[10], w[11], selector); - w[46] = hc_byte_perm (w[ 9], w[10], selector); - w[45] = hc_byte_perm (w[ 8], w[ 9], selector); - w[44] = hc_byte_perm (w[ 7], w[ 8], selector); - w[43] = hc_byte_perm (w[ 6], w[ 7], selector); - w[42] = hc_byte_perm (w[ 5], w[ 6], selector); - w[41] = hc_byte_perm (w[ 4], w[ 5], selector); - w[40] = hc_byte_perm (w[ 3], w[ 4], selector); - w[39] = hc_byte_perm (w[ 2], w[ 3], selector); - w[38] = hc_byte_perm (w[ 1], w[ 2], selector); - w[37] = hc_byte_perm (w[ 0], w[ 1], selector); - w[36] = hc_byte_perm ( 0, w[ 0], selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm (w[25], w[26], selector); - w[62] = hc_byte_perm (w[24], w[25], selector); - w[61] = hc_byte_perm (w[23], w[24], selector); - w[60] = hc_byte_perm (w[22], w[23], selector); - w[59] = hc_byte_perm (w[21], w[22], selector); - w[58] = hc_byte_perm (w[20], w[21], selector); - w[57] = hc_byte_perm (w[19], w[20], selector); - w[56] = hc_byte_perm (w[18], w[19], selector); - w[55] = hc_byte_perm (w[17], w[18], selector); - w[54] = hc_byte_perm (w[16], w[17], selector); - w[53] = hc_byte_perm (w[15], w[16], selector); - w[52] = hc_byte_perm (w[14], w[15], selector); - w[51] = hc_byte_perm (w[13], w[14], selector); - w[50] = hc_byte_perm (w[12], w[13], selector); - w[49] = hc_byte_perm (w[11], w[12], selector); - w[48] = hc_byte_perm (w[10], w[11], selector); - w[47] = hc_byte_perm (w[ 9], w[10], selector); - w[46] = hc_byte_perm (w[ 8], w[ 9], selector); - w[45] = hc_byte_perm (w[ 7], w[ 8], selector); - w[44] = hc_byte_perm (w[ 6], w[ 7], selector); - w[43] = hc_byte_perm (w[ 5], w[ 6], selector); - w[42] = hc_byte_perm (w[ 4], w[ 5], selector); - w[41] = hc_byte_perm (w[ 3], w[ 4], selector); - w[40] = hc_byte_perm (w[ 2], w[ 3], selector); - w[39] = hc_byte_perm (w[ 1], w[ 2], selector); - w[38] = hc_byte_perm (w[ 0], w[ 1], selector); - w[37] = hc_byte_perm ( 0, w[ 0], selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm (w[24], w[25], selector); - w[62] = hc_byte_perm (w[23], w[24], selector); - w[61] = hc_byte_perm (w[22], w[23], selector); - w[60] = hc_byte_perm (w[21], w[22], selector); - w[59] = hc_byte_perm (w[20], w[21], selector); - w[58] = hc_byte_perm (w[19], w[20], selector); - w[57] = hc_byte_perm (w[18], w[19], selector); - w[56] = hc_byte_perm (w[17], w[18], selector); - w[55] = hc_byte_perm (w[16], w[17], selector); - w[54] = hc_byte_perm (w[15], w[16], selector); - w[53] = hc_byte_perm (w[14], w[15], selector); - w[52] = hc_byte_perm (w[13], w[14], selector); - w[51] = hc_byte_perm (w[12], w[13], selector); - w[50] = hc_byte_perm (w[11], w[12], selector); - w[49] = hc_byte_perm (w[10], w[11], selector); - w[48] = hc_byte_perm (w[ 9], w[10], selector); - w[47] = hc_byte_perm (w[ 8], w[ 9], selector); - w[46] = hc_byte_perm (w[ 7], w[ 8], selector); - w[45] = hc_byte_perm (w[ 6], w[ 7], selector); - w[44] = hc_byte_perm (w[ 5], w[ 6], selector); - w[43] = hc_byte_perm (w[ 4], w[ 5], selector); - w[42] = hc_byte_perm (w[ 3], w[ 4], selector); - w[41] = hc_byte_perm (w[ 2], w[ 3], selector); - w[40] = hc_byte_perm (w[ 1], w[ 2], selector); - w[39] = hc_byte_perm (w[ 0], w[ 1], selector); - w[38] = hc_byte_perm ( 0, w[ 0], selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm (w[23], w[24], selector); - w[62] = hc_byte_perm (w[22], w[23], selector); - w[61] = hc_byte_perm (w[21], w[22], selector); - w[60] = hc_byte_perm (w[20], w[21], selector); - w[59] = hc_byte_perm (w[19], w[20], selector); - w[58] = hc_byte_perm (w[18], w[19], selector); - w[57] = hc_byte_perm (w[17], w[18], selector); - w[56] = hc_byte_perm (w[16], w[17], selector); - w[55] = hc_byte_perm (w[15], w[16], selector); - w[54] = hc_byte_perm (w[14], w[15], selector); - w[53] = hc_byte_perm (w[13], w[14], selector); - w[52] = hc_byte_perm (w[12], w[13], selector); - w[51] = hc_byte_perm (w[11], w[12], selector); - w[50] = hc_byte_perm (w[10], w[11], selector); - w[49] = hc_byte_perm (w[ 9], w[10], selector); - w[48] = hc_byte_perm (w[ 8], w[ 9], selector); - w[47] = hc_byte_perm (w[ 7], w[ 8], selector); - w[46] = hc_byte_perm (w[ 6], w[ 7], selector); - w[45] = hc_byte_perm (w[ 5], w[ 6], selector); - w[44] = hc_byte_perm (w[ 4], w[ 5], selector); - w[43] = hc_byte_perm (w[ 3], w[ 4], selector); - w[42] = hc_byte_perm (w[ 2], w[ 3], selector); - w[41] = hc_byte_perm (w[ 1], w[ 2], selector); - w[40] = hc_byte_perm (w[ 0], w[ 1], selector); - w[39] = hc_byte_perm ( 0, w[ 0], selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm (w[22], w[23], selector); - w[62] = hc_byte_perm (w[21], w[22], selector); - w[61] = hc_byte_perm (w[20], w[21], selector); - w[60] = hc_byte_perm (w[19], w[20], selector); - w[59] = hc_byte_perm (w[18], w[19], selector); - w[58] = hc_byte_perm (w[17], w[18], selector); - w[57] = hc_byte_perm (w[16], w[17], selector); - w[56] = hc_byte_perm (w[15], w[16], selector); - w[55] = hc_byte_perm (w[14], w[15], selector); - w[54] = hc_byte_perm (w[13], w[14], selector); - w[53] = hc_byte_perm (w[12], w[13], selector); - w[52] = hc_byte_perm (w[11], w[12], selector); - w[51] = hc_byte_perm (w[10], w[11], selector); - w[50] = hc_byte_perm (w[ 9], w[10], selector); - w[49] = hc_byte_perm (w[ 8], w[ 9], selector); - w[48] = hc_byte_perm (w[ 7], w[ 8], selector); - w[47] = hc_byte_perm (w[ 6], w[ 7], selector); - w[46] = hc_byte_perm (w[ 5], w[ 6], selector); - w[45] = hc_byte_perm (w[ 4], w[ 5], selector); - w[44] = hc_byte_perm (w[ 3], w[ 4], selector); - w[43] = hc_byte_perm (w[ 2], w[ 3], selector); - w[42] = hc_byte_perm (w[ 1], w[ 2], selector); - w[41] = hc_byte_perm (w[ 0], w[ 1], selector); - w[40] = hc_byte_perm ( 0, w[ 0], selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm (w[21], w[22], selector); - w[62] = hc_byte_perm (w[20], w[21], selector); - w[61] = hc_byte_perm (w[19], w[20], selector); - w[60] = hc_byte_perm (w[18], w[19], selector); - w[59] = hc_byte_perm (w[17], w[18], selector); - w[58] = hc_byte_perm (w[16], w[17], selector); - w[57] = hc_byte_perm (w[15], w[16], selector); - w[56] = hc_byte_perm (w[14], w[15], selector); - w[55] = hc_byte_perm (w[13], w[14], selector); - w[54] = hc_byte_perm (w[12], w[13], selector); - w[53] = hc_byte_perm (w[11], w[12], selector); - w[52] = hc_byte_perm (w[10], w[11], selector); - w[51] = hc_byte_perm (w[ 9], w[10], selector); - w[50] = hc_byte_perm (w[ 8], w[ 9], selector); - w[49] = hc_byte_perm (w[ 7], w[ 8], selector); - w[48] = hc_byte_perm (w[ 6], w[ 7], selector); - w[47] = hc_byte_perm (w[ 5], w[ 6], selector); - w[46] = hc_byte_perm (w[ 4], w[ 5], selector); - w[45] = hc_byte_perm (w[ 3], w[ 4], selector); - w[44] = hc_byte_perm (w[ 2], w[ 3], selector); - w[43] = hc_byte_perm (w[ 1], w[ 2], selector); - w[42] = hc_byte_perm (w[ 0], w[ 1], selector); - w[41] = hc_byte_perm ( 0, w[ 0], selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm (w[20], w[21], selector); - w[62] = hc_byte_perm (w[19], w[20], selector); - w[61] = hc_byte_perm (w[18], w[19], selector); - w[60] = hc_byte_perm (w[17], w[18], selector); - w[59] = hc_byte_perm (w[16], w[17], selector); - w[58] = hc_byte_perm (w[15], w[16], selector); - w[57] = hc_byte_perm (w[14], w[15], selector); - w[56] = hc_byte_perm (w[13], w[14], selector); - w[55] = hc_byte_perm (w[12], w[13], selector); - w[54] = hc_byte_perm (w[11], w[12], selector); - w[53] = hc_byte_perm (w[10], w[11], selector); - w[52] = hc_byte_perm (w[ 9], w[10], selector); - w[51] = hc_byte_perm (w[ 8], w[ 9], selector); - w[50] = hc_byte_perm (w[ 7], w[ 8], selector); - w[49] = hc_byte_perm (w[ 6], w[ 7], selector); - w[48] = hc_byte_perm (w[ 5], w[ 6], selector); - w[47] = hc_byte_perm (w[ 4], w[ 5], selector); - w[46] = hc_byte_perm (w[ 3], w[ 4], selector); - w[45] = hc_byte_perm (w[ 2], w[ 3], selector); - w[44] = hc_byte_perm (w[ 1], w[ 2], selector); - w[43] = hc_byte_perm (w[ 0], w[ 1], selector); - w[42] = hc_byte_perm ( 0, w[ 0], selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm (w[19], w[20], selector); - w[62] = hc_byte_perm (w[18], w[19], selector); - w[61] = hc_byte_perm (w[17], w[18], selector); - w[60] = hc_byte_perm (w[16], w[17], selector); - w[59] = hc_byte_perm (w[15], w[16], selector); - w[58] = hc_byte_perm (w[14], w[15], selector); - w[57] = hc_byte_perm (w[13], w[14], selector); - w[56] = hc_byte_perm (w[12], w[13], selector); - w[55] = hc_byte_perm (w[11], w[12], selector); - w[54] = hc_byte_perm (w[10], w[11], selector); - w[53] = hc_byte_perm (w[ 9], w[10], selector); - w[52] = hc_byte_perm (w[ 8], w[ 9], selector); - w[51] = hc_byte_perm (w[ 7], w[ 8], selector); - w[50] = hc_byte_perm (w[ 6], w[ 7], selector); - w[49] = hc_byte_perm (w[ 5], w[ 6], selector); - w[48] = hc_byte_perm (w[ 4], w[ 5], selector); - w[47] = hc_byte_perm (w[ 3], w[ 4], selector); - w[46] = hc_byte_perm (w[ 2], w[ 3], selector); - w[45] = hc_byte_perm (w[ 1], w[ 2], selector); - w[44] = hc_byte_perm (w[ 0], w[ 1], selector); - w[43] = hc_byte_perm ( 0, w[ 0], selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm (w[18], w[19], selector); - w[62] = hc_byte_perm (w[17], w[18], selector); - w[61] = hc_byte_perm (w[16], w[17], selector); - w[60] = hc_byte_perm (w[15], w[16], selector); - w[59] = hc_byte_perm (w[14], w[15], selector); - w[58] = hc_byte_perm (w[13], w[14], selector); - w[57] = hc_byte_perm (w[12], w[13], selector); - w[56] = hc_byte_perm (w[11], w[12], selector); - w[55] = hc_byte_perm (w[10], w[11], selector); - w[54] = hc_byte_perm (w[ 9], w[10], selector); - w[53] = hc_byte_perm (w[ 8], w[ 9], selector); - w[52] = hc_byte_perm (w[ 7], w[ 8], selector); - w[51] = hc_byte_perm (w[ 6], w[ 7], selector); - w[50] = hc_byte_perm (w[ 5], w[ 6], selector); - w[49] = hc_byte_perm (w[ 4], w[ 5], selector); - w[48] = hc_byte_perm (w[ 3], w[ 4], selector); - w[47] = hc_byte_perm (w[ 2], w[ 3], selector); - w[46] = hc_byte_perm (w[ 1], w[ 2], selector); - w[45] = hc_byte_perm (w[ 0], w[ 1], selector); - w[44] = hc_byte_perm ( 0, w[ 0], selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm (w[17], w[18], selector); - w[62] = hc_byte_perm (w[16], w[17], selector); - w[61] = hc_byte_perm (w[15], w[16], selector); - w[60] = hc_byte_perm (w[14], w[15], selector); - w[59] = hc_byte_perm (w[13], w[14], selector); - w[58] = hc_byte_perm (w[12], w[13], selector); - w[57] = hc_byte_perm (w[11], w[12], selector); - w[56] = hc_byte_perm (w[10], w[11], selector); - w[55] = hc_byte_perm (w[ 9], w[10], selector); - w[54] = hc_byte_perm (w[ 8], w[ 9], selector); - w[53] = hc_byte_perm (w[ 7], w[ 8], selector); - w[52] = hc_byte_perm (w[ 6], w[ 7], selector); - w[51] = hc_byte_perm (w[ 5], w[ 6], selector); - w[50] = hc_byte_perm (w[ 4], w[ 5], selector); - w[49] = hc_byte_perm (w[ 3], w[ 4], selector); - w[48] = hc_byte_perm (w[ 2], w[ 3], selector); - w[47] = hc_byte_perm (w[ 1], w[ 2], selector); - w[46] = hc_byte_perm (w[ 0], w[ 1], selector); - w[45] = hc_byte_perm ( 0, w[ 0], selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm (w[16], w[17], selector); - w[62] = hc_byte_perm (w[15], w[16], selector); - w[61] = hc_byte_perm (w[14], w[15], selector); - w[60] = hc_byte_perm (w[13], w[14], selector); - w[59] = hc_byte_perm (w[12], w[13], selector); - w[58] = hc_byte_perm (w[11], w[12], selector); - w[57] = hc_byte_perm (w[10], w[11], selector); - w[56] = hc_byte_perm (w[ 9], w[10], selector); - w[55] = hc_byte_perm (w[ 8], w[ 9], selector); - w[54] = hc_byte_perm (w[ 7], w[ 8], selector); - w[53] = hc_byte_perm (w[ 6], w[ 7], selector); - w[52] = hc_byte_perm (w[ 5], w[ 6], selector); - w[51] = hc_byte_perm (w[ 4], w[ 5], selector); - w[50] = hc_byte_perm (w[ 3], w[ 4], selector); - w[49] = hc_byte_perm (w[ 2], w[ 3], selector); - w[48] = hc_byte_perm (w[ 1], w[ 2], selector); - w[47] = hc_byte_perm (w[ 0], w[ 1], selector); - w[46] = hc_byte_perm ( 0, w[ 0], selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm (w[15], w[16], selector); - w[62] = hc_byte_perm (w[14], w[15], selector); - w[61] = hc_byte_perm (w[13], w[14], selector); - w[60] = hc_byte_perm (w[12], w[13], selector); - w[59] = hc_byte_perm (w[11], w[12], selector); - w[58] = hc_byte_perm (w[10], w[11], selector); - w[57] = hc_byte_perm (w[ 9], w[10], selector); - w[56] = hc_byte_perm (w[ 8], w[ 9], selector); - w[55] = hc_byte_perm (w[ 7], w[ 8], selector); - w[54] = hc_byte_perm (w[ 6], w[ 7], selector); - w[53] = hc_byte_perm (w[ 5], w[ 6], selector); - w[52] = hc_byte_perm (w[ 4], w[ 5], selector); - w[51] = hc_byte_perm (w[ 3], w[ 4], selector); - w[50] = hc_byte_perm (w[ 2], w[ 3], selector); - w[49] = hc_byte_perm (w[ 1], w[ 2], selector); - w[48] = hc_byte_perm (w[ 0], w[ 1], selector); - w[47] = hc_byte_perm ( 0, w[ 0], selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm (w[14], w[15], selector); - w[62] = hc_byte_perm (w[13], w[14], selector); - w[61] = hc_byte_perm (w[12], w[13], selector); - w[60] = hc_byte_perm (w[11], w[12], selector); - w[59] = hc_byte_perm (w[10], w[11], selector); - w[58] = hc_byte_perm (w[ 9], w[10], selector); - w[57] = hc_byte_perm (w[ 8], w[ 9], selector); - w[56] = hc_byte_perm (w[ 7], w[ 8], selector); - w[55] = hc_byte_perm (w[ 6], w[ 7], selector); - w[54] = hc_byte_perm (w[ 5], w[ 6], selector); - w[53] = hc_byte_perm (w[ 4], w[ 5], selector); - w[52] = hc_byte_perm (w[ 3], w[ 4], selector); - w[51] = hc_byte_perm (w[ 2], w[ 3], selector); - w[50] = hc_byte_perm (w[ 1], w[ 2], selector); - w[49] = hc_byte_perm (w[ 0], w[ 1], selector); - w[48] = hc_byte_perm ( 0, w[ 0], selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm (w[13], w[14], selector); - w[62] = hc_byte_perm (w[12], w[13], selector); - w[61] = hc_byte_perm (w[11], w[12], selector); - w[60] = hc_byte_perm (w[10], w[11], selector); - w[59] = hc_byte_perm (w[ 9], w[10], selector); - w[58] = hc_byte_perm (w[ 8], w[ 9], selector); - w[57] = hc_byte_perm (w[ 7], w[ 8], selector); - w[56] = hc_byte_perm (w[ 6], w[ 7], selector); - w[55] = hc_byte_perm (w[ 5], w[ 6], selector); - w[54] = hc_byte_perm (w[ 4], w[ 5], selector); - w[53] = hc_byte_perm (w[ 3], w[ 4], selector); - w[52] = hc_byte_perm (w[ 2], w[ 3], selector); - w[51] = hc_byte_perm (w[ 1], w[ 2], selector); - w[50] = hc_byte_perm (w[ 0], w[ 1], selector); - w[49] = hc_byte_perm ( 0, w[ 0], selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm (w[12], w[13], selector); - w[62] = hc_byte_perm (w[11], w[12], selector); - w[61] = hc_byte_perm (w[10], w[11], selector); - w[60] = hc_byte_perm (w[ 9], w[10], selector); - w[59] = hc_byte_perm (w[ 8], w[ 9], selector); - w[58] = hc_byte_perm (w[ 7], w[ 8], selector); - w[57] = hc_byte_perm (w[ 6], w[ 7], selector); - w[56] = hc_byte_perm (w[ 5], w[ 6], selector); - w[55] = hc_byte_perm (w[ 4], w[ 5], selector); - w[54] = hc_byte_perm (w[ 3], w[ 4], selector); - w[53] = hc_byte_perm (w[ 2], w[ 3], selector); - w[52] = hc_byte_perm (w[ 1], w[ 2], selector); - w[51] = hc_byte_perm (w[ 0], w[ 1], selector); - w[50] = hc_byte_perm ( 0, w[ 0], selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm (w[11], w[12], selector); - w[62] = hc_byte_perm (w[10], w[11], selector); - w[61] = hc_byte_perm (w[ 9], w[10], selector); - w[60] = hc_byte_perm (w[ 8], w[ 9], selector); - w[59] = hc_byte_perm (w[ 7], w[ 8], selector); - w[58] = hc_byte_perm (w[ 6], w[ 7], selector); - w[57] = hc_byte_perm (w[ 5], w[ 6], selector); - w[56] = hc_byte_perm (w[ 4], w[ 5], selector); - w[55] = hc_byte_perm (w[ 3], w[ 4], selector); - w[54] = hc_byte_perm (w[ 2], w[ 3], selector); - w[53] = hc_byte_perm (w[ 1], w[ 2], selector); - w[52] = hc_byte_perm (w[ 0], w[ 1], selector); - w[51] = hc_byte_perm ( 0, w[ 0], selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm (w[10], w[11], selector); - w[62] = hc_byte_perm (w[ 9], w[10], selector); - w[61] = hc_byte_perm (w[ 8], w[ 9], selector); - w[60] = hc_byte_perm (w[ 7], w[ 8], selector); - w[59] = hc_byte_perm (w[ 6], w[ 7], selector); - w[58] = hc_byte_perm (w[ 5], w[ 6], selector); - w[57] = hc_byte_perm (w[ 4], w[ 5], selector); - w[56] = hc_byte_perm (w[ 3], w[ 4], selector); - w[55] = hc_byte_perm (w[ 2], w[ 3], selector); - w[54] = hc_byte_perm (w[ 1], w[ 2], selector); - w[53] = hc_byte_perm (w[ 0], w[ 1], selector); - w[52] = hc_byte_perm ( 0, w[ 0], selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm (w[ 9], w[10], selector); - w[62] = hc_byte_perm (w[ 8], w[ 9], selector); - w[61] = hc_byte_perm (w[ 7], w[ 8], selector); - w[60] = hc_byte_perm (w[ 6], w[ 7], selector); - w[59] = hc_byte_perm (w[ 5], w[ 6], selector); - w[58] = hc_byte_perm (w[ 4], w[ 5], selector); - w[57] = hc_byte_perm (w[ 3], w[ 4], selector); - w[56] = hc_byte_perm (w[ 2], w[ 3], selector); - w[55] = hc_byte_perm (w[ 1], w[ 2], selector); - w[54] = hc_byte_perm (w[ 0], w[ 1], selector); - w[53] = hc_byte_perm ( 0, w[ 0], selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm (w[ 8], w[ 9], selector); - w[62] = hc_byte_perm (w[ 7], w[ 8], selector); - w[61] = hc_byte_perm (w[ 6], w[ 7], selector); - w[60] = hc_byte_perm (w[ 5], w[ 6], selector); - w[59] = hc_byte_perm (w[ 4], w[ 5], selector); - w[58] = hc_byte_perm (w[ 3], w[ 4], selector); - w[57] = hc_byte_perm (w[ 2], w[ 3], selector); - w[56] = hc_byte_perm (w[ 1], w[ 2], selector); - w[55] = hc_byte_perm (w[ 0], w[ 1], selector); - w[54] = hc_byte_perm ( 0, w[ 0], selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm (w[ 7], w[ 8], selector); - w[62] = hc_byte_perm (w[ 6], w[ 7], selector); - w[61] = hc_byte_perm (w[ 5], w[ 6], selector); - w[60] = hc_byte_perm (w[ 4], w[ 5], selector); - w[59] = hc_byte_perm (w[ 3], w[ 4], selector); - w[58] = hc_byte_perm (w[ 2], w[ 3], selector); - w[57] = hc_byte_perm (w[ 1], w[ 2], selector); - w[56] = hc_byte_perm (w[ 0], w[ 1], selector); - w[55] = hc_byte_perm ( 0, w[ 0], selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm (w[ 6], w[ 7], selector); - w[62] = hc_byte_perm (w[ 5], w[ 6], selector); - w[61] = hc_byte_perm (w[ 4], w[ 5], selector); - w[60] = hc_byte_perm (w[ 3], w[ 4], selector); - w[59] = hc_byte_perm (w[ 2], w[ 3], selector); - w[58] = hc_byte_perm (w[ 1], w[ 2], selector); - w[57] = hc_byte_perm (w[ 0], w[ 1], selector); - w[56] = hc_byte_perm ( 0, w[ 0], selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm (w[ 5], w[ 6], selector); - w[62] = hc_byte_perm (w[ 4], w[ 5], selector); - w[61] = hc_byte_perm (w[ 3], w[ 4], selector); - w[60] = hc_byte_perm (w[ 2], w[ 3], selector); - w[59] = hc_byte_perm (w[ 1], w[ 2], selector); - w[58] = hc_byte_perm (w[ 0], w[ 1], selector); - w[57] = hc_byte_perm ( 0, w[ 0], selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm (w[ 4], w[ 5], selector); - w[62] = hc_byte_perm (w[ 3], w[ 4], selector); - w[61] = hc_byte_perm (w[ 2], w[ 3], selector); - w[60] = hc_byte_perm (w[ 1], w[ 2], selector); - w[59] = hc_byte_perm (w[ 0], w[ 1], selector); - w[58] = hc_byte_perm ( 0, w[ 0], selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm (w[ 3], w[ 4], selector); - w[62] = hc_byte_perm (w[ 2], w[ 3], selector); - w[61] = hc_byte_perm (w[ 1], w[ 2], selector); - w[60] = hc_byte_perm (w[ 0], w[ 1], selector); - w[59] = hc_byte_perm ( 0, w[ 0], selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm (w[ 2], w[ 3], selector); - w[62] = hc_byte_perm (w[ 1], w[ 2], selector); - w[61] = hc_byte_perm (w[ 0], w[ 1], selector); - w[60] = hc_byte_perm ( 0, w[ 0], selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm (w[ 1], w[ 2], selector); - w[62] = hc_byte_perm (w[ 0], w[ 1], selector); - w[61] = hc_byte_perm ( 0, w[ 0], selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm (w[ 0], w[ 1], selector); - w[62] = hc_byte_perm ( 0, w[ 0], selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm ( 0, w[ 0], selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_be (PRIVATE_AS u32x *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -30530,4373 +19518,6 @@ DECLSPEC void switch_buffer_by_offset_1x64_be (PRIVATE_AS u32x *w, const u32 off break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm (w[63], w[62], selector); - w[62] = hc_byte_perm (w[62], w[61], selector); - w[61] = hc_byte_perm (w[61], w[60], selector); - w[60] = hc_byte_perm (w[60], w[59], selector); - w[59] = hc_byte_perm (w[59], w[58], selector); - w[58] = hc_byte_perm (w[58], w[57], selector); - w[57] = hc_byte_perm (w[57], w[56], selector); - w[56] = hc_byte_perm (w[56], w[55], selector); - w[55] = hc_byte_perm (w[55], w[54], selector); - w[54] = hc_byte_perm (w[54], w[53], selector); - w[53] = hc_byte_perm (w[53], w[52], selector); - w[52] = hc_byte_perm (w[52], w[51], selector); - w[51] = hc_byte_perm (w[51], w[50], selector); - w[50] = hc_byte_perm (w[50], w[49], selector); - w[49] = hc_byte_perm (w[49], w[48], selector); - w[48] = hc_byte_perm (w[48], w[47], selector); - w[47] = hc_byte_perm (w[47], w[46], selector); - w[46] = hc_byte_perm (w[46], w[45], selector); - w[45] = hc_byte_perm (w[45], w[44], selector); - w[44] = hc_byte_perm (w[44], w[43], selector); - w[43] = hc_byte_perm (w[43], w[42], selector); - w[42] = hc_byte_perm (w[42], w[41], selector); - w[41] = hc_byte_perm (w[41], w[40], selector); - w[40] = hc_byte_perm (w[40], w[39], selector); - w[39] = hc_byte_perm (w[39], w[38], selector); - w[38] = hc_byte_perm (w[38], w[37], selector); - w[37] = hc_byte_perm (w[37], w[36], selector); - w[36] = hc_byte_perm (w[36], w[35], selector); - w[35] = hc_byte_perm (w[35], w[34], selector); - w[34] = hc_byte_perm (w[34], w[33], selector); - w[33] = hc_byte_perm (w[33], w[32], selector); - w[32] = hc_byte_perm (w[32], w[31], selector); - w[31] = hc_byte_perm (w[31], w[30], selector); - w[30] = hc_byte_perm (w[30], w[29], selector); - w[29] = hc_byte_perm (w[29], w[28], selector); - w[28] = hc_byte_perm (w[28], w[27], selector); - w[27] = hc_byte_perm (w[27], w[26], selector); - w[26] = hc_byte_perm (w[26], w[25], selector); - w[25] = hc_byte_perm (w[25], w[24], selector); - w[24] = hc_byte_perm (w[24], w[23], selector); - w[23] = hc_byte_perm (w[23], w[22], selector); - w[22] = hc_byte_perm (w[22], w[21], selector); - w[21] = hc_byte_perm (w[21], w[20], selector); - w[20] = hc_byte_perm (w[20], w[19], selector); - w[19] = hc_byte_perm (w[19], w[18], selector); - w[18] = hc_byte_perm (w[18], w[17], selector); - w[17] = hc_byte_perm (w[17], w[16], selector); - w[16] = hc_byte_perm (w[16], w[15], selector); - w[15] = hc_byte_perm (w[15], w[14], selector); - w[14] = hc_byte_perm (w[14], w[13], selector); - w[13] = hc_byte_perm (w[13], w[12], selector); - w[12] = hc_byte_perm (w[12], w[11], selector); - w[11] = hc_byte_perm (w[11], w[10], selector); - w[10] = hc_byte_perm (w[10], w[ 9], selector); - w[ 9] = hc_byte_perm (w[ 9], w[ 8], selector); - w[ 8] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 7] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 6] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 5] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 4] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 3] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 2] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 1] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 0] = hc_byte_perm (w[ 0], 0, selector); - - break; - - case 1: - w[63] = hc_byte_perm (w[62], w[61], selector); - w[62] = hc_byte_perm (w[61], w[60], selector); - w[61] = hc_byte_perm (w[60], w[59], selector); - w[60] = hc_byte_perm (w[59], w[58], selector); - w[59] = hc_byte_perm (w[58], w[57], selector); - w[58] = hc_byte_perm (w[57], w[56], selector); - w[57] = hc_byte_perm (w[56], w[55], selector); - w[56] = hc_byte_perm (w[55], w[54], selector); - w[55] = hc_byte_perm (w[54], w[53], selector); - w[54] = hc_byte_perm (w[53], w[52], selector); - w[53] = hc_byte_perm (w[52], w[51], selector); - w[52] = hc_byte_perm (w[51], w[50], selector); - w[51] = hc_byte_perm (w[50], w[49], selector); - w[50] = hc_byte_perm (w[49], w[48], selector); - w[49] = hc_byte_perm (w[48], w[47], selector); - w[48] = hc_byte_perm (w[47], w[46], selector); - w[47] = hc_byte_perm (w[46], w[45], selector); - w[46] = hc_byte_perm (w[45], w[44], selector); - w[45] = hc_byte_perm (w[44], w[43], selector); - w[44] = hc_byte_perm (w[43], w[42], selector); - w[43] = hc_byte_perm (w[42], w[41], selector); - w[42] = hc_byte_perm (w[41], w[40], selector); - w[41] = hc_byte_perm (w[40], w[39], selector); - w[40] = hc_byte_perm (w[39], w[38], selector); - w[39] = hc_byte_perm (w[38], w[37], selector); - w[38] = hc_byte_perm (w[37], w[36], selector); - w[37] = hc_byte_perm (w[36], w[35], selector); - w[36] = hc_byte_perm (w[35], w[34], selector); - w[35] = hc_byte_perm (w[34], w[33], selector); - w[34] = hc_byte_perm (w[33], w[32], selector); - w[33] = hc_byte_perm (w[32], w[31], selector); - w[32] = hc_byte_perm (w[31], w[30], selector); - w[31] = hc_byte_perm (w[30], w[29], selector); - w[30] = hc_byte_perm (w[29], w[28], selector); - w[29] = hc_byte_perm (w[28], w[27], selector); - w[28] = hc_byte_perm (w[27], w[26], selector); - w[27] = hc_byte_perm (w[26], w[25], selector); - w[26] = hc_byte_perm (w[25], w[24], selector); - w[25] = hc_byte_perm (w[24], w[23], selector); - w[24] = hc_byte_perm (w[23], w[22], selector); - w[23] = hc_byte_perm (w[22], w[21], selector); - w[22] = hc_byte_perm (w[21], w[20], selector); - w[21] = hc_byte_perm (w[20], w[19], selector); - w[20] = hc_byte_perm (w[19], w[18], selector); - w[19] = hc_byte_perm (w[18], w[17], selector); - w[18] = hc_byte_perm (w[17], w[16], selector); - w[17] = hc_byte_perm (w[16], w[15], selector); - w[16] = hc_byte_perm (w[15], w[14], selector); - w[15] = hc_byte_perm (w[14], w[13], selector); - w[14] = hc_byte_perm (w[13], w[12], selector); - w[13] = hc_byte_perm (w[12], w[11], selector); - w[12] = hc_byte_perm (w[11], w[10], selector); - w[11] = hc_byte_perm (w[10], w[ 9], selector); - w[10] = hc_byte_perm (w[ 9], w[ 8], selector); - w[ 9] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 8] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 7] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 6] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 5] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 4] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 3] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 2] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 1] = hc_byte_perm (w[ 0], 0, selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm (w[61], w[60], selector); - w[62] = hc_byte_perm (w[60], w[59], selector); - w[61] = hc_byte_perm (w[59], w[58], selector); - w[60] = hc_byte_perm (w[58], w[57], selector); - w[59] = hc_byte_perm (w[57], w[56], selector); - w[58] = hc_byte_perm (w[56], w[55], selector); - w[57] = hc_byte_perm (w[55], w[54], selector); - w[56] = hc_byte_perm (w[54], w[53], selector); - w[55] = hc_byte_perm (w[53], w[52], selector); - w[54] = hc_byte_perm (w[52], w[51], selector); - w[53] = hc_byte_perm (w[51], w[50], selector); - w[52] = hc_byte_perm (w[50], w[49], selector); - w[51] = hc_byte_perm (w[49], w[48], selector); - w[50] = hc_byte_perm (w[48], w[47], selector); - w[49] = hc_byte_perm (w[47], w[46], selector); - w[48] = hc_byte_perm (w[46], w[45], selector); - w[47] = hc_byte_perm (w[45], w[44], selector); - w[46] = hc_byte_perm (w[44], w[43], selector); - w[45] = hc_byte_perm (w[43], w[42], selector); - w[44] = hc_byte_perm (w[42], w[41], selector); - w[43] = hc_byte_perm (w[41], w[40], selector); - w[42] = hc_byte_perm (w[40], w[39], selector); - w[41] = hc_byte_perm (w[39], w[38], selector); - w[40] = hc_byte_perm (w[38], w[37], selector); - w[39] = hc_byte_perm (w[37], w[36], selector); - w[38] = hc_byte_perm (w[36], w[35], selector); - w[37] = hc_byte_perm (w[35], w[34], selector); - w[36] = hc_byte_perm (w[34], w[33], selector); - w[35] = hc_byte_perm (w[33], w[32], selector); - w[34] = hc_byte_perm (w[32], w[31], selector); - w[33] = hc_byte_perm (w[31], w[30], selector); - w[32] = hc_byte_perm (w[30], w[29], selector); - w[31] = hc_byte_perm (w[29], w[28], selector); - w[30] = hc_byte_perm (w[28], w[27], selector); - w[29] = hc_byte_perm (w[27], w[26], selector); - w[28] = hc_byte_perm (w[26], w[25], selector); - w[27] = hc_byte_perm (w[25], w[24], selector); - w[26] = hc_byte_perm (w[24], w[23], selector); - w[25] = hc_byte_perm (w[23], w[22], selector); - w[24] = hc_byte_perm (w[22], w[21], selector); - w[23] = hc_byte_perm (w[21], w[20], selector); - w[22] = hc_byte_perm (w[20], w[19], selector); - w[21] = hc_byte_perm (w[19], w[18], selector); - w[20] = hc_byte_perm (w[18], w[17], selector); - w[19] = hc_byte_perm (w[17], w[16], selector); - w[18] = hc_byte_perm (w[16], w[15], selector); - w[17] = hc_byte_perm (w[15], w[14], selector); - w[16] = hc_byte_perm (w[14], w[13], selector); - w[15] = hc_byte_perm (w[13], w[12], selector); - w[14] = hc_byte_perm (w[12], w[11], selector); - w[13] = hc_byte_perm (w[11], w[10], selector); - w[12] = hc_byte_perm (w[10], w[ 9], selector); - w[11] = hc_byte_perm (w[ 9], w[ 8], selector); - w[10] = hc_byte_perm (w[ 8], w[ 7], selector); - w[ 9] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 8] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 7] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 6] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 5] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 4] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 3] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 2] = hc_byte_perm (w[ 0], 0, selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm (w[60], w[59], selector); - w[62] = hc_byte_perm (w[59], w[58], selector); - w[61] = hc_byte_perm (w[58], w[57], selector); - w[60] = hc_byte_perm (w[57], w[56], selector); - w[59] = hc_byte_perm (w[56], w[55], selector); - w[58] = hc_byte_perm (w[55], w[54], selector); - w[57] = hc_byte_perm (w[54], w[53], selector); - w[56] = hc_byte_perm (w[53], w[52], selector); - w[55] = hc_byte_perm (w[52], w[51], selector); - w[54] = hc_byte_perm (w[51], w[50], selector); - w[53] = hc_byte_perm (w[50], w[49], selector); - w[52] = hc_byte_perm (w[49], w[48], selector); - w[51] = hc_byte_perm (w[48], w[47], selector); - w[50] = hc_byte_perm (w[47], w[46], selector); - w[49] = hc_byte_perm (w[46], w[45], selector); - w[48] = hc_byte_perm (w[45], w[44], selector); - w[47] = hc_byte_perm (w[44], w[43], selector); - w[46] = hc_byte_perm (w[43], w[42], selector); - w[45] = hc_byte_perm (w[42], w[41], selector); - w[44] = hc_byte_perm (w[41], w[40], selector); - w[43] = hc_byte_perm (w[40], w[39], selector); - w[42] = hc_byte_perm (w[39], w[38], selector); - w[41] = hc_byte_perm (w[38], w[37], selector); - w[40] = hc_byte_perm (w[37], w[36], selector); - w[39] = hc_byte_perm (w[36], w[35], selector); - w[38] = hc_byte_perm (w[35], w[34], selector); - w[37] = hc_byte_perm (w[34], w[33], selector); - w[36] = hc_byte_perm (w[33], w[32], selector); - w[35] = hc_byte_perm (w[32], w[31], selector); - w[34] = hc_byte_perm (w[31], w[30], selector); - w[33] = hc_byte_perm (w[30], w[29], selector); - w[32] = hc_byte_perm (w[29], w[28], selector); - w[31] = hc_byte_perm (w[28], w[27], selector); - w[30] = hc_byte_perm (w[27], w[26], selector); - w[29] = hc_byte_perm (w[26], w[25], selector); - w[28] = hc_byte_perm (w[25], w[24], selector); - w[27] = hc_byte_perm (w[24], w[23], selector); - w[26] = hc_byte_perm (w[23], w[22], selector); - w[25] = hc_byte_perm (w[22], w[21], selector); - w[24] = hc_byte_perm (w[21], w[20], selector); - w[23] = hc_byte_perm (w[20], w[19], selector); - w[22] = hc_byte_perm (w[19], w[18], selector); - w[21] = hc_byte_perm (w[18], w[17], selector); - w[20] = hc_byte_perm (w[17], w[16], selector); - w[19] = hc_byte_perm (w[16], w[15], selector); - w[18] = hc_byte_perm (w[15], w[14], selector); - w[17] = hc_byte_perm (w[14], w[13], selector); - w[16] = hc_byte_perm (w[13], w[12], selector); - w[15] = hc_byte_perm (w[12], w[11], selector); - w[14] = hc_byte_perm (w[11], w[10], selector); - w[13] = hc_byte_perm (w[10], w[ 9], selector); - w[12] = hc_byte_perm (w[ 9], w[ 8], selector); - w[11] = hc_byte_perm (w[ 8], w[ 7], selector); - w[10] = hc_byte_perm (w[ 7], w[ 6], selector); - w[ 9] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 8] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 7] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 6] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 5] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 4] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 3] = hc_byte_perm (w[ 0], 0, selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm (w[59], w[58], selector); - w[62] = hc_byte_perm (w[58], w[57], selector); - w[61] = hc_byte_perm (w[57], w[56], selector); - w[60] = hc_byte_perm (w[56], w[55], selector); - w[59] = hc_byte_perm (w[55], w[54], selector); - w[58] = hc_byte_perm (w[54], w[53], selector); - w[57] = hc_byte_perm (w[53], w[52], selector); - w[56] = hc_byte_perm (w[52], w[51], selector); - w[55] = hc_byte_perm (w[51], w[50], selector); - w[54] = hc_byte_perm (w[50], w[49], selector); - w[53] = hc_byte_perm (w[49], w[48], selector); - w[52] = hc_byte_perm (w[48], w[47], selector); - w[51] = hc_byte_perm (w[47], w[46], selector); - w[50] = hc_byte_perm (w[46], w[45], selector); - w[49] = hc_byte_perm (w[45], w[44], selector); - w[48] = hc_byte_perm (w[44], w[43], selector); - w[47] = hc_byte_perm (w[43], w[42], selector); - w[46] = hc_byte_perm (w[42], w[41], selector); - w[45] = hc_byte_perm (w[41], w[40], selector); - w[44] = hc_byte_perm (w[40], w[39], selector); - w[43] = hc_byte_perm (w[39], w[38], selector); - w[42] = hc_byte_perm (w[38], w[37], selector); - w[41] = hc_byte_perm (w[37], w[36], selector); - w[40] = hc_byte_perm (w[36], w[35], selector); - w[39] = hc_byte_perm (w[35], w[34], selector); - w[38] = hc_byte_perm (w[34], w[33], selector); - w[37] = hc_byte_perm (w[33], w[32], selector); - w[36] = hc_byte_perm (w[32], w[31], selector); - w[35] = hc_byte_perm (w[31], w[30], selector); - w[34] = hc_byte_perm (w[30], w[29], selector); - w[33] = hc_byte_perm (w[29], w[28], selector); - w[32] = hc_byte_perm (w[28], w[27], selector); - w[31] = hc_byte_perm (w[27], w[26], selector); - w[30] = hc_byte_perm (w[26], w[25], selector); - w[29] = hc_byte_perm (w[25], w[24], selector); - w[28] = hc_byte_perm (w[24], w[23], selector); - w[27] = hc_byte_perm (w[23], w[22], selector); - w[26] = hc_byte_perm (w[22], w[21], selector); - w[25] = hc_byte_perm (w[21], w[20], selector); - w[24] = hc_byte_perm (w[20], w[19], selector); - w[23] = hc_byte_perm (w[19], w[18], selector); - w[22] = hc_byte_perm (w[18], w[17], selector); - w[21] = hc_byte_perm (w[17], w[16], selector); - w[20] = hc_byte_perm (w[16], w[15], selector); - w[19] = hc_byte_perm (w[15], w[14], selector); - w[18] = hc_byte_perm (w[14], w[13], selector); - w[17] = hc_byte_perm (w[13], w[12], selector); - w[16] = hc_byte_perm (w[12], w[11], selector); - w[15] = hc_byte_perm (w[11], w[10], selector); - w[14] = hc_byte_perm (w[10], w[ 9], selector); - w[13] = hc_byte_perm (w[ 9], w[ 8], selector); - w[12] = hc_byte_perm (w[ 8], w[ 7], selector); - w[11] = hc_byte_perm (w[ 7], w[ 6], selector); - w[10] = hc_byte_perm (w[ 6], w[ 5], selector); - w[ 9] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 8] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 7] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 6] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 5] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 4] = hc_byte_perm (w[ 0], 0, selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm (w[58], w[57], selector); - w[62] = hc_byte_perm (w[57], w[56], selector); - w[61] = hc_byte_perm (w[56], w[55], selector); - w[60] = hc_byte_perm (w[55], w[54], selector); - w[59] = hc_byte_perm (w[54], w[53], selector); - w[58] = hc_byte_perm (w[53], w[52], selector); - w[57] = hc_byte_perm (w[52], w[51], selector); - w[56] = hc_byte_perm (w[51], w[50], selector); - w[55] = hc_byte_perm (w[50], w[49], selector); - w[54] = hc_byte_perm (w[49], w[48], selector); - w[53] = hc_byte_perm (w[48], w[47], selector); - w[52] = hc_byte_perm (w[47], w[46], selector); - w[51] = hc_byte_perm (w[46], w[45], selector); - w[50] = hc_byte_perm (w[45], w[44], selector); - w[49] = hc_byte_perm (w[44], w[43], selector); - w[48] = hc_byte_perm (w[43], w[42], selector); - w[47] = hc_byte_perm (w[42], w[41], selector); - w[46] = hc_byte_perm (w[41], w[40], selector); - w[45] = hc_byte_perm (w[40], w[39], selector); - w[44] = hc_byte_perm (w[39], w[38], selector); - w[43] = hc_byte_perm (w[38], w[37], selector); - w[42] = hc_byte_perm (w[37], w[36], selector); - w[41] = hc_byte_perm (w[36], w[35], selector); - w[40] = hc_byte_perm (w[35], w[34], selector); - w[39] = hc_byte_perm (w[34], w[33], selector); - w[38] = hc_byte_perm (w[33], w[32], selector); - w[37] = hc_byte_perm (w[32], w[31], selector); - w[36] = hc_byte_perm (w[31], w[30], selector); - w[35] = hc_byte_perm (w[30], w[29], selector); - w[34] = hc_byte_perm (w[29], w[28], selector); - w[33] = hc_byte_perm (w[28], w[27], selector); - w[32] = hc_byte_perm (w[27], w[26], selector); - w[31] = hc_byte_perm (w[26], w[25], selector); - w[30] = hc_byte_perm (w[25], w[24], selector); - w[29] = hc_byte_perm (w[24], w[23], selector); - w[28] = hc_byte_perm (w[23], w[22], selector); - w[27] = hc_byte_perm (w[22], w[21], selector); - w[26] = hc_byte_perm (w[21], w[20], selector); - w[25] = hc_byte_perm (w[20], w[19], selector); - w[24] = hc_byte_perm (w[19], w[18], selector); - w[23] = hc_byte_perm (w[18], w[17], selector); - w[22] = hc_byte_perm (w[17], w[16], selector); - w[21] = hc_byte_perm (w[16], w[15], selector); - w[20] = hc_byte_perm (w[15], w[14], selector); - w[19] = hc_byte_perm (w[14], w[13], selector); - w[18] = hc_byte_perm (w[13], w[12], selector); - w[17] = hc_byte_perm (w[12], w[11], selector); - w[16] = hc_byte_perm (w[11], w[10], selector); - w[15] = hc_byte_perm (w[10], w[ 9], selector); - w[14] = hc_byte_perm (w[ 9], w[ 8], selector); - w[13] = hc_byte_perm (w[ 8], w[ 7], selector); - w[12] = hc_byte_perm (w[ 7], w[ 6], selector); - w[11] = hc_byte_perm (w[ 6], w[ 5], selector); - w[10] = hc_byte_perm (w[ 5], w[ 4], selector); - w[ 9] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 8] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 7] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 6] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 5] = hc_byte_perm (w[ 0], 0, selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm (w[57], w[56], selector); - w[62] = hc_byte_perm (w[56], w[55], selector); - w[61] = hc_byte_perm (w[55], w[54], selector); - w[60] = hc_byte_perm (w[54], w[53], selector); - w[59] = hc_byte_perm (w[53], w[52], selector); - w[58] = hc_byte_perm (w[52], w[51], selector); - w[57] = hc_byte_perm (w[51], w[50], selector); - w[56] = hc_byte_perm (w[50], w[49], selector); - w[55] = hc_byte_perm (w[49], w[48], selector); - w[54] = hc_byte_perm (w[48], w[47], selector); - w[53] = hc_byte_perm (w[47], w[46], selector); - w[52] = hc_byte_perm (w[46], w[45], selector); - w[51] = hc_byte_perm (w[45], w[44], selector); - w[50] = hc_byte_perm (w[44], w[43], selector); - w[49] = hc_byte_perm (w[43], w[42], selector); - w[48] = hc_byte_perm (w[42], w[41], selector); - w[47] = hc_byte_perm (w[41], w[40], selector); - w[46] = hc_byte_perm (w[40], w[39], selector); - w[45] = hc_byte_perm (w[39], w[38], selector); - w[44] = hc_byte_perm (w[38], w[37], selector); - w[43] = hc_byte_perm (w[37], w[36], selector); - w[42] = hc_byte_perm (w[36], w[35], selector); - w[41] = hc_byte_perm (w[35], w[34], selector); - w[40] = hc_byte_perm (w[34], w[33], selector); - w[39] = hc_byte_perm (w[33], w[32], selector); - w[38] = hc_byte_perm (w[32], w[31], selector); - w[37] = hc_byte_perm (w[31], w[30], selector); - w[36] = hc_byte_perm (w[30], w[29], selector); - w[35] = hc_byte_perm (w[29], w[28], selector); - w[34] = hc_byte_perm (w[28], w[27], selector); - w[33] = hc_byte_perm (w[27], w[26], selector); - w[32] = hc_byte_perm (w[26], w[25], selector); - w[31] = hc_byte_perm (w[25], w[24], selector); - w[30] = hc_byte_perm (w[24], w[23], selector); - w[29] = hc_byte_perm (w[23], w[22], selector); - w[28] = hc_byte_perm (w[22], w[21], selector); - w[27] = hc_byte_perm (w[21], w[20], selector); - w[26] = hc_byte_perm (w[20], w[19], selector); - w[25] = hc_byte_perm (w[19], w[18], selector); - w[24] = hc_byte_perm (w[18], w[17], selector); - w[23] = hc_byte_perm (w[17], w[16], selector); - w[22] = hc_byte_perm (w[16], w[15], selector); - w[21] = hc_byte_perm (w[15], w[14], selector); - w[20] = hc_byte_perm (w[14], w[13], selector); - w[19] = hc_byte_perm (w[13], w[12], selector); - w[18] = hc_byte_perm (w[12], w[11], selector); - w[17] = hc_byte_perm (w[11], w[10], selector); - w[16] = hc_byte_perm (w[10], w[ 9], selector); - w[15] = hc_byte_perm (w[ 9], w[ 8], selector); - w[14] = hc_byte_perm (w[ 8], w[ 7], selector); - w[13] = hc_byte_perm (w[ 7], w[ 6], selector); - w[12] = hc_byte_perm (w[ 6], w[ 5], selector); - w[11] = hc_byte_perm (w[ 5], w[ 4], selector); - w[10] = hc_byte_perm (w[ 4], w[ 3], selector); - w[ 9] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 8] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 7] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 6] = hc_byte_perm (w[ 0], 0, selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm (w[56], w[55], selector); - w[62] = hc_byte_perm (w[55], w[54], selector); - w[61] = hc_byte_perm (w[54], w[53], selector); - w[60] = hc_byte_perm (w[53], w[52], selector); - w[59] = hc_byte_perm (w[52], w[51], selector); - w[58] = hc_byte_perm (w[51], w[50], selector); - w[57] = hc_byte_perm (w[50], w[49], selector); - w[56] = hc_byte_perm (w[49], w[48], selector); - w[55] = hc_byte_perm (w[48], w[47], selector); - w[54] = hc_byte_perm (w[47], w[46], selector); - w[53] = hc_byte_perm (w[46], w[45], selector); - w[52] = hc_byte_perm (w[45], w[44], selector); - w[51] = hc_byte_perm (w[44], w[43], selector); - w[50] = hc_byte_perm (w[43], w[42], selector); - w[49] = hc_byte_perm (w[42], w[41], selector); - w[48] = hc_byte_perm (w[41], w[40], selector); - w[47] = hc_byte_perm (w[40], w[39], selector); - w[46] = hc_byte_perm (w[39], w[38], selector); - w[45] = hc_byte_perm (w[38], w[37], selector); - w[44] = hc_byte_perm (w[37], w[36], selector); - w[43] = hc_byte_perm (w[36], w[35], selector); - w[42] = hc_byte_perm (w[35], w[34], selector); - w[41] = hc_byte_perm (w[34], w[33], selector); - w[40] = hc_byte_perm (w[33], w[32], selector); - w[39] = hc_byte_perm (w[32], w[31], selector); - w[38] = hc_byte_perm (w[31], w[30], selector); - w[37] = hc_byte_perm (w[30], w[29], selector); - w[36] = hc_byte_perm (w[29], w[28], selector); - w[35] = hc_byte_perm (w[28], w[27], selector); - w[34] = hc_byte_perm (w[27], w[26], selector); - w[33] = hc_byte_perm (w[26], w[25], selector); - w[32] = hc_byte_perm (w[25], w[24], selector); - w[31] = hc_byte_perm (w[24], w[23], selector); - w[30] = hc_byte_perm (w[23], w[22], selector); - w[29] = hc_byte_perm (w[22], w[21], selector); - w[28] = hc_byte_perm (w[21], w[20], selector); - w[27] = hc_byte_perm (w[20], w[19], selector); - w[26] = hc_byte_perm (w[19], w[18], selector); - w[25] = hc_byte_perm (w[18], w[17], selector); - w[24] = hc_byte_perm (w[17], w[16], selector); - w[23] = hc_byte_perm (w[16], w[15], selector); - w[22] = hc_byte_perm (w[15], w[14], selector); - w[21] = hc_byte_perm (w[14], w[13], selector); - w[20] = hc_byte_perm (w[13], w[12], selector); - w[19] = hc_byte_perm (w[12], w[11], selector); - w[18] = hc_byte_perm (w[11], w[10], selector); - w[17] = hc_byte_perm (w[10], w[ 9], selector); - w[16] = hc_byte_perm (w[ 9], w[ 8], selector); - w[15] = hc_byte_perm (w[ 8], w[ 7], selector); - w[14] = hc_byte_perm (w[ 7], w[ 6], selector); - w[13] = hc_byte_perm (w[ 6], w[ 5], selector); - w[12] = hc_byte_perm (w[ 5], w[ 4], selector); - w[11] = hc_byte_perm (w[ 4], w[ 3], selector); - w[10] = hc_byte_perm (w[ 3], w[ 2], selector); - w[ 9] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 8] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 7] = hc_byte_perm (w[ 0], 0, selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm (w[55], w[54], selector); - w[62] = hc_byte_perm (w[54], w[53], selector); - w[61] = hc_byte_perm (w[53], w[52], selector); - w[60] = hc_byte_perm (w[52], w[51], selector); - w[59] = hc_byte_perm (w[51], w[50], selector); - w[58] = hc_byte_perm (w[50], w[49], selector); - w[57] = hc_byte_perm (w[49], w[48], selector); - w[56] = hc_byte_perm (w[48], w[47], selector); - w[55] = hc_byte_perm (w[47], w[46], selector); - w[54] = hc_byte_perm (w[46], w[45], selector); - w[53] = hc_byte_perm (w[45], w[44], selector); - w[52] = hc_byte_perm (w[44], w[43], selector); - w[51] = hc_byte_perm (w[43], w[42], selector); - w[50] = hc_byte_perm (w[42], w[41], selector); - w[49] = hc_byte_perm (w[41], w[40], selector); - w[48] = hc_byte_perm (w[40], w[39], selector); - w[47] = hc_byte_perm (w[39], w[38], selector); - w[46] = hc_byte_perm (w[38], w[37], selector); - w[45] = hc_byte_perm (w[37], w[36], selector); - w[44] = hc_byte_perm (w[36], w[35], selector); - w[43] = hc_byte_perm (w[35], w[34], selector); - w[42] = hc_byte_perm (w[34], w[33], selector); - w[41] = hc_byte_perm (w[33], w[32], selector); - w[40] = hc_byte_perm (w[32], w[31], selector); - w[39] = hc_byte_perm (w[31], w[30], selector); - w[38] = hc_byte_perm (w[30], w[29], selector); - w[37] = hc_byte_perm (w[29], w[28], selector); - w[36] = hc_byte_perm (w[28], w[27], selector); - w[35] = hc_byte_perm (w[27], w[26], selector); - w[34] = hc_byte_perm (w[26], w[25], selector); - w[33] = hc_byte_perm (w[25], w[24], selector); - w[32] = hc_byte_perm (w[24], w[23], selector); - w[31] = hc_byte_perm (w[23], w[22], selector); - w[30] = hc_byte_perm (w[22], w[21], selector); - w[29] = hc_byte_perm (w[21], w[20], selector); - w[28] = hc_byte_perm (w[20], w[19], selector); - w[27] = hc_byte_perm (w[19], w[18], selector); - w[26] = hc_byte_perm (w[18], w[17], selector); - w[25] = hc_byte_perm (w[17], w[16], selector); - w[24] = hc_byte_perm (w[16], w[15], selector); - w[23] = hc_byte_perm (w[15], w[14], selector); - w[22] = hc_byte_perm (w[14], w[13], selector); - w[21] = hc_byte_perm (w[13], w[12], selector); - w[20] = hc_byte_perm (w[12], w[11], selector); - w[19] = hc_byte_perm (w[11], w[10], selector); - w[18] = hc_byte_perm (w[10], w[ 9], selector); - w[17] = hc_byte_perm (w[ 9], w[ 8], selector); - w[16] = hc_byte_perm (w[ 8], w[ 7], selector); - w[15] = hc_byte_perm (w[ 7], w[ 6], selector); - w[14] = hc_byte_perm (w[ 6], w[ 5], selector); - w[13] = hc_byte_perm (w[ 5], w[ 4], selector); - w[12] = hc_byte_perm (w[ 4], w[ 3], selector); - w[11] = hc_byte_perm (w[ 3], w[ 2], selector); - w[10] = hc_byte_perm (w[ 2], w[ 1], selector); - w[ 9] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 8] = hc_byte_perm (w[ 0], 0, selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm (w[54], w[53], selector); - w[62] = hc_byte_perm (w[53], w[52], selector); - w[61] = hc_byte_perm (w[52], w[51], selector); - w[60] = hc_byte_perm (w[51], w[50], selector); - w[59] = hc_byte_perm (w[50], w[49], selector); - w[58] = hc_byte_perm (w[49], w[48], selector); - w[57] = hc_byte_perm (w[48], w[47], selector); - w[56] = hc_byte_perm (w[47], w[46], selector); - w[55] = hc_byte_perm (w[46], w[45], selector); - w[54] = hc_byte_perm (w[45], w[44], selector); - w[53] = hc_byte_perm (w[44], w[43], selector); - w[52] = hc_byte_perm (w[43], w[42], selector); - w[51] = hc_byte_perm (w[42], w[41], selector); - w[50] = hc_byte_perm (w[41], w[40], selector); - w[49] = hc_byte_perm (w[40], w[39], selector); - w[48] = hc_byte_perm (w[39], w[38], selector); - w[47] = hc_byte_perm (w[38], w[37], selector); - w[46] = hc_byte_perm (w[37], w[36], selector); - w[45] = hc_byte_perm (w[36], w[35], selector); - w[44] = hc_byte_perm (w[35], w[34], selector); - w[43] = hc_byte_perm (w[34], w[33], selector); - w[42] = hc_byte_perm (w[33], w[32], selector); - w[41] = hc_byte_perm (w[32], w[31], selector); - w[40] = hc_byte_perm (w[31], w[30], selector); - w[39] = hc_byte_perm (w[30], w[29], selector); - w[38] = hc_byte_perm (w[29], w[28], selector); - w[37] = hc_byte_perm (w[28], w[27], selector); - w[36] = hc_byte_perm (w[27], w[26], selector); - w[35] = hc_byte_perm (w[26], w[25], selector); - w[34] = hc_byte_perm (w[25], w[24], selector); - w[33] = hc_byte_perm (w[24], w[23], selector); - w[32] = hc_byte_perm (w[23], w[22], selector); - w[31] = hc_byte_perm (w[22], w[21], selector); - w[30] = hc_byte_perm (w[21], w[20], selector); - w[29] = hc_byte_perm (w[20], w[19], selector); - w[28] = hc_byte_perm (w[19], w[18], selector); - w[27] = hc_byte_perm (w[18], w[17], selector); - w[26] = hc_byte_perm (w[17], w[16], selector); - w[25] = hc_byte_perm (w[16], w[15], selector); - w[24] = hc_byte_perm (w[15], w[14], selector); - w[23] = hc_byte_perm (w[14], w[13], selector); - w[22] = hc_byte_perm (w[13], w[12], selector); - w[21] = hc_byte_perm (w[12], w[11], selector); - w[20] = hc_byte_perm (w[11], w[10], selector); - w[19] = hc_byte_perm (w[10], w[ 9], selector); - w[18] = hc_byte_perm (w[ 9], w[ 8], selector); - w[17] = hc_byte_perm (w[ 8], w[ 7], selector); - w[16] = hc_byte_perm (w[ 7], w[ 6], selector); - w[15] = hc_byte_perm (w[ 6], w[ 5], selector); - w[14] = hc_byte_perm (w[ 5], w[ 4], selector); - w[13] = hc_byte_perm (w[ 4], w[ 3], selector); - w[12] = hc_byte_perm (w[ 3], w[ 2], selector); - w[11] = hc_byte_perm (w[ 2], w[ 1], selector); - w[10] = hc_byte_perm (w[ 1], w[ 0], selector); - w[ 9] = hc_byte_perm (w[ 0], 0, selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm (w[53], w[52], selector); - w[62] = hc_byte_perm (w[52], w[51], selector); - w[61] = hc_byte_perm (w[51], w[50], selector); - w[60] = hc_byte_perm (w[50], w[49], selector); - w[59] = hc_byte_perm (w[49], w[48], selector); - w[58] = hc_byte_perm (w[48], w[47], selector); - w[57] = hc_byte_perm (w[47], w[46], selector); - w[56] = hc_byte_perm (w[46], w[45], selector); - w[55] = hc_byte_perm (w[45], w[44], selector); - w[54] = hc_byte_perm (w[44], w[43], selector); - w[53] = hc_byte_perm (w[43], w[42], selector); - w[52] = hc_byte_perm (w[42], w[41], selector); - w[51] = hc_byte_perm (w[41], w[40], selector); - w[50] = hc_byte_perm (w[40], w[39], selector); - w[49] = hc_byte_perm (w[39], w[38], selector); - w[48] = hc_byte_perm (w[38], w[37], selector); - w[47] = hc_byte_perm (w[37], w[36], selector); - w[46] = hc_byte_perm (w[36], w[35], selector); - w[45] = hc_byte_perm (w[35], w[34], selector); - w[44] = hc_byte_perm (w[34], w[33], selector); - w[43] = hc_byte_perm (w[33], w[32], selector); - w[42] = hc_byte_perm (w[32], w[31], selector); - w[41] = hc_byte_perm (w[31], w[30], selector); - w[40] = hc_byte_perm (w[30], w[29], selector); - w[39] = hc_byte_perm (w[29], w[28], selector); - w[38] = hc_byte_perm (w[28], w[27], selector); - w[37] = hc_byte_perm (w[27], w[26], selector); - w[36] = hc_byte_perm (w[26], w[25], selector); - w[35] = hc_byte_perm (w[25], w[24], selector); - w[34] = hc_byte_perm (w[24], w[23], selector); - w[33] = hc_byte_perm (w[23], w[22], selector); - w[32] = hc_byte_perm (w[22], w[21], selector); - w[31] = hc_byte_perm (w[21], w[20], selector); - w[30] = hc_byte_perm (w[20], w[19], selector); - w[29] = hc_byte_perm (w[19], w[18], selector); - w[28] = hc_byte_perm (w[18], w[17], selector); - w[27] = hc_byte_perm (w[17], w[16], selector); - w[26] = hc_byte_perm (w[16], w[15], selector); - w[25] = hc_byte_perm (w[15], w[14], selector); - w[24] = hc_byte_perm (w[14], w[13], selector); - w[23] = hc_byte_perm (w[13], w[12], selector); - w[22] = hc_byte_perm (w[12], w[11], selector); - w[21] = hc_byte_perm (w[11], w[10], selector); - w[20] = hc_byte_perm (w[10], w[ 9], selector); - w[19] = hc_byte_perm (w[ 9], w[ 8], selector); - w[18] = hc_byte_perm (w[ 8], w[ 7], selector); - w[17] = hc_byte_perm (w[ 7], w[ 6], selector); - w[16] = hc_byte_perm (w[ 6], w[ 5], selector); - w[15] = hc_byte_perm (w[ 5], w[ 4], selector); - w[14] = hc_byte_perm (w[ 4], w[ 3], selector); - w[13] = hc_byte_perm (w[ 3], w[ 2], selector); - w[12] = hc_byte_perm (w[ 2], w[ 1], selector); - w[11] = hc_byte_perm (w[ 1], w[ 0], selector); - w[10] = hc_byte_perm (w[ 0], 0, selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm (w[52], w[51], selector); - w[62] = hc_byte_perm (w[51], w[50], selector); - w[61] = hc_byte_perm (w[50], w[49], selector); - w[60] = hc_byte_perm (w[49], w[48], selector); - w[59] = hc_byte_perm (w[48], w[47], selector); - w[58] = hc_byte_perm (w[47], w[46], selector); - w[57] = hc_byte_perm (w[46], w[45], selector); - w[56] = hc_byte_perm (w[45], w[44], selector); - w[55] = hc_byte_perm (w[44], w[43], selector); - w[54] = hc_byte_perm (w[43], w[42], selector); - w[53] = hc_byte_perm (w[42], w[41], selector); - w[52] = hc_byte_perm (w[41], w[40], selector); - w[51] = hc_byte_perm (w[40], w[39], selector); - w[50] = hc_byte_perm (w[39], w[38], selector); - w[49] = hc_byte_perm (w[38], w[37], selector); - w[48] = hc_byte_perm (w[37], w[36], selector); - w[47] = hc_byte_perm (w[36], w[35], selector); - w[46] = hc_byte_perm (w[35], w[34], selector); - w[45] = hc_byte_perm (w[34], w[33], selector); - w[44] = hc_byte_perm (w[33], w[32], selector); - w[43] = hc_byte_perm (w[32], w[31], selector); - w[42] = hc_byte_perm (w[31], w[30], selector); - w[41] = hc_byte_perm (w[30], w[29], selector); - w[40] = hc_byte_perm (w[29], w[28], selector); - w[39] = hc_byte_perm (w[28], w[27], selector); - w[38] = hc_byte_perm (w[27], w[26], selector); - w[37] = hc_byte_perm (w[26], w[25], selector); - w[36] = hc_byte_perm (w[25], w[24], selector); - w[35] = hc_byte_perm (w[24], w[23], selector); - w[34] = hc_byte_perm (w[23], w[22], selector); - w[33] = hc_byte_perm (w[22], w[21], selector); - w[32] = hc_byte_perm (w[21], w[20], selector); - w[31] = hc_byte_perm (w[20], w[19], selector); - w[30] = hc_byte_perm (w[19], w[18], selector); - w[29] = hc_byte_perm (w[18], w[17], selector); - w[28] = hc_byte_perm (w[17], w[16], selector); - w[27] = hc_byte_perm (w[16], w[15], selector); - w[26] = hc_byte_perm (w[15], w[14], selector); - w[25] = hc_byte_perm (w[14], w[13], selector); - w[24] = hc_byte_perm (w[13], w[12], selector); - w[23] = hc_byte_perm (w[12], w[11], selector); - w[22] = hc_byte_perm (w[11], w[10], selector); - w[21] = hc_byte_perm (w[10], w[ 9], selector); - w[20] = hc_byte_perm (w[ 9], w[ 8], selector); - w[19] = hc_byte_perm (w[ 8], w[ 7], selector); - w[18] = hc_byte_perm (w[ 7], w[ 6], selector); - w[17] = hc_byte_perm (w[ 6], w[ 5], selector); - w[16] = hc_byte_perm (w[ 5], w[ 4], selector); - w[15] = hc_byte_perm (w[ 4], w[ 3], selector); - w[14] = hc_byte_perm (w[ 3], w[ 2], selector); - w[13] = hc_byte_perm (w[ 2], w[ 1], selector); - w[12] = hc_byte_perm (w[ 1], w[ 0], selector); - w[11] = hc_byte_perm (w[ 0], 0, selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm (w[51], w[50], selector); - w[62] = hc_byte_perm (w[50], w[49], selector); - w[61] = hc_byte_perm (w[49], w[48], selector); - w[60] = hc_byte_perm (w[48], w[47], selector); - w[59] = hc_byte_perm (w[47], w[46], selector); - w[58] = hc_byte_perm (w[46], w[45], selector); - w[57] = hc_byte_perm (w[45], w[44], selector); - w[56] = hc_byte_perm (w[44], w[43], selector); - w[55] = hc_byte_perm (w[43], w[42], selector); - w[54] = hc_byte_perm (w[42], w[41], selector); - w[53] = hc_byte_perm (w[41], w[40], selector); - w[52] = hc_byte_perm (w[40], w[39], selector); - w[51] = hc_byte_perm (w[39], w[38], selector); - w[50] = hc_byte_perm (w[38], w[37], selector); - w[49] = hc_byte_perm (w[37], w[36], selector); - w[48] = hc_byte_perm (w[36], w[35], selector); - w[47] = hc_byte_perm (w[35], w[34], selector); - w[46] = hc_byte_perm (w[34], w[33], selector); - w[45] = hc_byte_perm (w[33], w[32], selector); - w[44] = hc_byte_perm (w[32], w[31], selector); - w[43] = hc_byte_perm (w[31], w[30], selector); - w[42] = hc_byte_perm (w[30], w[29], selector); - w[41] = hc_byte_perm (w[29], w[28], selector); - w[40] = hc_byte_perm (w[28], w[27], selector); - w[39] = hc_byte_perm (w[27], w[26], selector); - w[38] = hc_byte_perm (w[26], w[25], selector); - w[37] = hc_byte_perm (w[25], w[24], selector); - w[36] = hc_byte_perm (w[24], w[23], selector); - w[35] = hc_byte_perm (w[23], w[22], selector); - w[34] = hc_byte_perm (w[22], w[21], selector); - w[33] = hc_byte_perm (w[21], w[20], selector); - w[32] = hc_byte_perm (w[20], w[19], selector); - w[31] = hc_byte_perm (w[19], w[18], selector); - w[30] = hc_byte_perm (w[18], w[17], selector); - w[29] = hc_byte_perm (w[17], w[16], selector); - w[28] = hc_byte_perm (w[16], w[15], selector); - w[27] = hc_byte_perm (w[15], w[14], selector); - w[26] = hc_byte_perm (w[14], w[13], selector); - w[25] = hc_byte_perm (w[13], w[12], selector); - w[24] = hc_byte_perm (w[12], w[11], selector); - w[23] = hc_byte_perm (w[11], w[10], selector); - w[22] = hc_byte_perm (w[10], w[ 9], selector); - w[21] = hc_byte_perm (w[ 9], w[ 8], selector); - w[20] = hc_byte_perm (w[ 8], w[ 7], selector); - w[19] = hc_byte_perm (w[ 7], w[ 6], selector); - w[18] = hc_byte_perm (w[ 6], w[ 5], selector); - w[17] = hc_byte_perm (w[ 5], w[ 4], selector); - w[16] = hc_byte_perm (w[ 4], w[ 3], selector); - w[15] = hc_byte_perm (w[ 3], w[ 2], selector); - w[14] = hc_byte_perm (w[ 2], w[ 1], selector); - w[13] = hc_byte_perm (w[ 1], w[ 0], selector); - w[12] = hc_byte_perm (w[ 0], 0, selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm (w[50], w[49], selector); - w[62] = hc_byte_perm (w[49], w[48], selector); - w[61] = hc_byte_perm (w[48], w[47], selector); - w[60] = hc_byte_perm (w[47], w[46], selector); - w[59] = hc_byte_perm (w[46], w[45], selector); - w[58] = hc_byte_perm (w[45], w[44], selector); - w[57] = hc_byte_perm (w[44], w[43], selector); - w[56] = hc_byte_perm (w[43], w[42], selector); - w[55] = hc_byte_perm (w[42], w[41], selector); - w[54] = hc_byte_perm (w[41], w[40], selector); - w[53] = hc_byte_perm (w[40], w[39], selector); - w[52] = hc_byte_perm (w[39], w[38], selector); - w[51] = hc_byte_perm (w[38], w[37], selector); - w[50] = hc_byte_perm (w[37], w[36], selector); - w[49] = hc_byte_perm (w[36], w[35], selector); - w[48] = hc_byte_perm (w[35], w[34], selector); - w[47] = hc_byte_perm (w[34], w[33], selector); - w[46] = hc_byte_perm (w[33], w[32], selector); - w[45] = hc_byte_perm (w[32], w[31], selector); - w[44] = hc_byte_perm (w[31], w[30], selector); - w[43] = hc_byte_perm (w[30], w[29], selector); - w[42] = hc_byte_perm (w[29], w[28], selector); - w[41] = hc_byte_perm (w[28], w[27], selector); - w[40] = hc_byte_perm (w[27], w[26], selector); - w[39] = hc_byte_perm (w[26], w[25], selector); - w[38] = hc_byte_perm (w[25], w[24], selector); - w[37] = hc_byte_perm (w[24], w[23], selector); - w[36] = hc_byte_perm (w[23], w[22], selector); - w[35] = hc_byte_perm (w[22], w[21], selector); - w[34] = hc_byte_perm (w[21], w[20], selector); - w[33] = hc_byte_perm (w[20], w[19], selector); - w[32] = hc_byte_perm (w[19], w[18], selector); - w[31] = hc_byte_perm (w[18], w[17], selector); - w[30] = hc_byte_perm (w[17], w[16], selector); - w[29] = hc_byte_perm (w[16], w[15], selector); - w[28] = hc_byte_perm (w[15], w[14], selector); - w[27] = hc_byte_perm (w[14], w[13], selector); - w[26] = hc_byte_perm (w[13], w[12], selector); - w[25] = hc_byte_perm (w[12], w[11], selector); - w[24] = hc_byte_perm (w[11], w[10], selector); - w[23] = hc_byte_perm (w[10], w[ 9], selector); - w[22] = hc_byte_perm (w[ 9], w[ 8], selector); - w[21] = hc_byte_perm (w[ 8], w[ 7], selector); - w[20] = hc_byte_perm (w[ 7], w[ 6], selector); - w[19] = hc_byte_perm (w[ 6], w[ 5], selector); - w[18] = hc_byte_perm (w[ 5], w[ 4], selector); - w[17] = hc_byte_perm (w[ 4], w[ 3], selector); - w[16] = hc_byte_perm (w[ 3], w[ 2], selector); - w[15] = hc_byte_perm (w[ 2], w[ 1], selector); - w[14] = hc_byte_perm (w[ 1], w[ 0], selector); - w[13] = hc_byte_perm (w[ 0], 0, selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm (w[49], w[48], selector); - w[62] = hc_byte_perm (w[48], w[47], selector); - w[61] = hc_byte_perm (w[47], w[46], selector); - w[60] = hc_byte_perm (w[46], w[45], selector); - w[59] = hc_byte_perm (w[45], w[44], selector); - w[58] = hc_byte_perm (w[44], w[43], selector); - w[57] = hc_byte_perm (w[43], w[42], selector); - w[56] = hc_byte_perm (w[42], w[41], selector); - w[55] = hc_byte_perm (w[41], w[40], selector); - w[54] = hc_byte_perm (w[40], w[39], selector); - w[53] = hc_byte_perm (w[39], w[38], selector); - w[52] = hc_byte_perm (w[38], w[37], selector); - w[51] = hc_byte_perm (w[37], w[36], selector); - w[50] = hc_byte_perm (w[36], w[35], selector); - w[49] = hc_byte_perm (w[35], w[34], selector); - w[48] = hc_byte_perm (w[34], w[33], selector); - w[47] = hc_byte_perm (w[33], w[32], selector); - w[46] = hc_byte_perm (w[32], w[31], selector); - w[45] = hc_byte_perm (w[31], w[30], selector); - w[44] = hc_byte_perm (w[30], w[29], selector); - w[43] = hc_byte_perm (w[29], w[28], selector); - w[42] = hc_byte_perm (w[28], w[27], selector); - w[41] = hc_byte_perm (w[27], w[26], selector); - w[40] = hc_byte_perm (w[26], w[25], selector); - w[39] = hc_byte_perm (w[25], w[24], selector); - w[38] = hc_byte_perm (w[24], w[23], selector); - w[37] = hc_byte_perm (w[23], w[22], selector); - w[36] = hc_byte_perm (w[22], w[21], selector); - w[35] = hc_byte_perm (w[21], w[20], selector); - w[34] = hc_byte_perm (w[20], w[19], selector); - w[33] = hc_byte_perm (w[19], w[18], selector); - w[32] = hc_byte_perm (w[18], w[17], selector); - w[31] = hc_byte_perm (w[17], w[16], selector); - w[30] = hc_byte_perm (w[16], w[15], selector); - w[29] = hc_byte_perm (w[15], w[14], selector); - w[28] = hc_byte_perm (w[14], w[13], selector); - w[27] = hc_byte_perm (w[13], w[12], selector); - w[26] = hc_byte_perm (w[12], w[11], selector); - w[25] = hc_byte_perm (w[11], w[10], selector); - w[24] = hc_byte_perm (w[10], w[ 9], selector); - w[23] = hc_byte_perm (w[ 9], w[ 8], selector); - w[22] = hc_byte_perm (w[ 8], w[ 7], selector); - w[21] = hc_byte_perm (w[ 7], w[ 6], selector); - w[20] = hc_byte_perm (w[ 6], w[ 5], selector); - w[19] = hc_byte_perm (w[ 5], w[ 4], selector); - w[18] = hc_byte_perm (w[ 4], w[ 3], selector); - w[17] = hc_byte_perm (w[ 3], w[ 2], selector); - w[16] = hc_byte_perm (w[ 2], w[ 1], selector); - w[15] = hc_byte_perm (w[ 1], w[ 0], selector); - w[14] = hc_byte_perm (w[ 0], 0, selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm (w[48], w[47], selector); - w[62] = hc_byte_perm (w[47], w[46], selector); - w[61] = hc_byte_perm (w[46], w[45], selector); - w[60] = hc_byte_perm (w[45], w[44], selector); - w[59] = hc_byte_perm (w[44], w[43], selector); - w[58] = hc_byte_perm (w[43], w[42], selector); - w[57] = hc_byte_perm (w[42], w[41], selector); - w[56] = hc_byte_perm (w[41], w[40], selector); - w[55] = hc_byte_perm (w[40], w[39], selector); - w[54] = hc_byte_perm (w[39], w[38], selector); - w[53] = hc_byte_perm (w[38], w[37], selector); - w[52] = hc_byte_perm (w[37], w[36], selector); - w[51] = hc_byte_perm (w[36], w[35], selector); - w[50] = hc_byte_perm (w[35], w[34], selector); - w[49] = hc_byte_perm (w[34], w[33], selector); - w[48] = hc_byte_perm (w[33], w[32], selector); - w[47] = hc_byte_perm (w[32], w[31], selector); - w[46] = hc_byte_perm (w[31], w[30], selector); - w[45] = hc_byte_perm (w[30], w[29], selector); - w[44] = hc_byte_perm (w[29], w[28], selector); - w[43] = hc_byte_perm (w[28], w[27], selector); - w[42] = hc_byte_perm (w[27], w[26], selector); - w[41] = hc_byte_perm (w[26], w[25], selector); - w[40] = hc_byte_perm (w[25], w[24], selector); - w[39] = hc_byte_perm (w[24], w[23], selector); - w[38] = hc_byte_perm (w[23], w[22], selector); - w[37] = hc_byte_perm (w[22], w[21], selector); - w[36] = hc_byte_perm (w[21], w[20], selector); - w[35] = hc_byte_perm (w[20], w[19], selector); - w[34] = hc_byte_perm (w[19], w[18], selector); - w[33] = hc_byte_perm (w[18], w[17], selector); - w[32] = hc_byte_perm (w[17], w[16], selector); - w[31] = hc_byte_perm (w[16], w[15], selector); - w[30] = hc_byte_perm (w[15], w[14], selector); - w[29] = hc_byte_perm (w[14], w[13], selector); - w[28] = hc_byte_perm (w[13], w[12], selector); - w[27] = hc_byte_perm (w[12], w[11], selector); - w[26] = hc_byte_perm (w[11], w[10], selector); - w[25] = hc_byte_perm (w[10], w[ 9], selector); - w[24] = hc_byte_perm (w[ 9], w[ 8], selector); - w[23] = hc_byte_perm (w[ 8], w[ 7], selector); - w[22] = hc_byte_perm (w[ 7], w[ 6], selector); - w[21] = hc_byte_perm (w[ 6], w[ 5], selector); - w[20] = hc_byte_perm (w[ 5], w[ 4], selector); - w[19] = hc_byte_perm (w[ 4], w[ 3], selector); - w[18] = hc_byte_perm (w[ 3], w[ 2], selector); - w[17] = hc_byte_perm (w[ 2], w[ 1], selector); - w[16] = hc_byte_perm (w[ 1], w[ 0], selector); - w[15] = hc_byte_perm (w[ 0], 0, selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm (w[47], w[46], selector); - w[62] = hc_byte_perm (w[46], w[45], selector); - w[61] = hc_byte_perm (w[45], w[44], selector); - w[60] = hc_byte_perm (w[44], w[43], selector); - w[59] = hc_byte_perm (w[43], w[42], selector); - w[58] = hc_byte_perm (w[42], w[41], selector); - w[57] = hc_byte_perm (w[41], w[40], selector); - w[56] = hc_byte_perm (w[40], w[39], selector); - w[55] = hc_byte_perm (w[39], w[38], selector); - w[54] = hc_byte_perm (w[38], w[37], selector); - w[53] = hc_byte_perm (w[37], w[36], selector); - w[52] = hc_byte_perm (w[36], w[35], selector); - w[51] = hc_byte_perm (w[35], w[34], selector); - w[50] = hc_byte_perm (w[34], w[33], selector); - w[49] = hc_byte_perm (w[33], w[32], selector); - w[48] = hc_byte_perm (w[32], w[31], selector); - w[47] = hc_byte_perm (w[31], w[30], selector); - w[46] = hc_byte_perm (w[30], w[29], selector); - w[45] = hc_byte_perm (w[29], w[28], selector); - w[44] = hc_byte_perm (w[28], w[27], selector); - w[43] = hc_byte_perm (w[27], w[26], selector); - w[42] = hc_byte_perm (w[26], w[25], selector); - w[41] = hc_byte_perm (w[25], w[24], selector); - w[40] = hc_byte_perm (w[24], w[23], selector); - w[39] = hc_byte_perm (w[23], w[22], selector); - w[38] = hc_byte_perm (w[22], w[21], selector); - w[37] = hc_byte_perm (w[21], w[20], selector); - w[36] = hc_byte_perm (w[20], w[19], selector); - w[35] = hc_byte_perm (w[19], w[18], selector); - w[34] = hc_byte_perm (w[18], w[17], selector); - w[33] = hc_byte_perm (w[17], w[16], selector); - w[32] = hc_byte_perm (w[16], w[15], selector); - w[31] = hc_byte_perm (w[15], w[14], selector); - w[30] = hc_byte_perm (w[14], w[13], selector); - w[29] = hc_byte_perm (w[13], w[12], selector); - w[28] = hc_byte_perm (w[12], w[11], selector); - w[27] = hc_byte_perm (w[11], w[10], selector); - w[26] = hc_byte_perm (w[10], w[ 9], selector); - w[25] = hc_byte_perm (w[ 9], w[ 8], selector); - w[24] = hc_byte_perm (w[ 8], w[ 7], selector); - w[23] = hc_byte_perm (w[ 7], w[ 6], selector); - w[22] = hc_byte_perm (w[ 6], w[ 5], selector); - w[21] = hc_byte_perm (w[ 5], w[ 4], selector); - w[20] = hc_byte_perm (w[ 4], w[ 3], selector); - w[19] = hc_byte_perm (w[ 3], w[ 2], selector); - w[18] = hc_byte_perm (w[ 2], w[ 1], selector); - w[17] = hc_byte_perm (w[ 1], w[ 0], selector); - w[16] = hc_byte_perm (w[ 0], 0, selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm (w[46], w[45], selector); - w[62] = hc_byte_perm (w[45], w[44], selector); - w[61] = hc_byte_perm (w[44], w[43], selector); - w[60] = hc_byte_perm (w[43], w[42], selector); - w[59] = hc_byte_perm (w[42], w[41], selector); - w[58] = hc_byte_perm (w[41], w[40], selector); - w[57] = hc_byte_perm (w[40], w[39], selector); - w[56] = hc_byte_perm (w[39], w[38], selector); - w[55] = hc_byte_perm (w[38], w[37], selector); - w[54] = hc_byte_perm (w[37], w[36], selector); - w[53] = hc_byte_perm (w[36], w[35], selector); - w[52] = hc_byte_perm (w[35], w[34], selector); - w[51] = hc_byte_perm (w[34], w[33], selector); - w[50] = hc_byte_perm (w[33], w[32], selector); - w[49] = hc_byte_perm (w[32], w[31], selector); - w[48] = hc_byte_perm (w[31], w[30], selector); - w[47] = hc_byte_perm (w[30], w[29], selector); - w[46] = hc_byte_perm (w[29], w[28], selector); - w[45] = hc_byte_perm (w[28], w[27], selector); - w[44] = hc_byte_perm (w[27], w[26], selector); - w[43] = hc_byte_perm (w[26], w[25], selector); - w[42] = hc_byte_perm (w[25], w[24], selector); - w[41] = hc_byte_perm (w[24], w[23], selector); - w[40] = hc_byte_perm (w[23], w[22], selector); - w[39] = hc_byte_perm (w[22], w[21], selector); - w[38] = hc_byte_perm (w[21], w[20], selector); - w[37] = hc_byte_perm (w[20], w[19], selector); - w[36] = hc_byte_perm (w[19], w[18], selector); - w[35] = hc_byte_perm (w[18], w[17], selector); - w[34] = hc_byte_perm (w[17], w[16], selector); - w[33] = hc_byte_perm (w[16], w[15], selector); - w[32] = hc_byte_perm (w[15], w[14], selector); - w[31] = hc_byte_perm (w[14], w[13], selector); - w[30] = hc_byte_perm (w[13], w[12], selector); - w[29] = hc_byte_perm (w[12], w[11], selector); - w[28] = hc_byte_perm (w[11], w[10], selector); - w[27] = hc_byte_perm (w[10], w[ 9], selector); - w[26] = hc_byte_perm (w[ 9], w[ 8], selector); - w[25] = hc_byte_perm (w[ 8], w[ 7], selector); - w[24] = hc_byte_perm (w[ 7], w[ 6], selector); - w[23] = hc_byte_perm (w[ 6], w[ 5], selector); - w[22] = hc_byte_perm (w[ 5], w[ 4], selector); - w[21] = hc_byte_perm (w[ 4], w[ 3], selector); - w[20] = hc_byte_perm (w[ 3], w[ 2], selector); - w[19] = hc_byte_perm (w[ 2], w[ 1], selector); - w[18] = hc_byte_perm (w[ 1], w[ 0], selector); - w[17] = hc_byte_perm (w[ 0], 0, selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm (w[45], w[44], selector); - w[62] = hc_byte_perm (w[44], w[43], selector); - w[61] = hc_byte_perm (w[43], w[42], selector); - w[60] = hc_byte_perm (w[42], w[41], selector); - w[59] = hc_byte_perm (w[41], w[40], selector); - w[58] = hc_byte_perm (w[40], w[39], selector); - w[57] = hc_byte_perm (w[39], w[38], selector); - w[56] = hc_byte_perm (w[38], w[37], selector); - w[55] = hc_byte_perm (w[37], w[36], selector); - w[54] = hc_byte_perm (w[36], w[35], selector); - w[53] = hc_byte_perm (w[35], w[34], selector); - w[52] = hc_byte_perm (w[34], w[33], selector); - w[51] = hc_byte_perm (w[33], w[32], selector); - w[50] = hc_byte_perm (w[32], w[31], selector); - w[49] = hc_byte_perm (w[31], w[30], selector); - w[48] = hc_byte_perm (w[30], w[29], selector); - w[47] = hc_byte_perm (w[29], w[28], selector); - w[46] = hc_byte_perm (w[28], w[27], selector); - w[45] = hc_byte_perm (w[27], w[26], selector); - w[44] = hc_byte_perm (w[26], w[25], selector); - w[43] = hc_byte_perm (w[25], w[24], selector); - w[42] = hc_byte_perm (w[24], w[23], selector); - w[41] = hc_byte_perm (w[23], w[22], selector); - w[40] = hc_byte_perm (w[22], w[21], selector); - w[39] = hc_byte_perm (w[21], w[20], selector); - w[38] = hc_byte_perm (w[20], w[19], selector); - w[37] = hc_byte_perm (w[19], w[18], selector); - w[36] = hc_byte_perm (w[18], w[17], selector); - w[35] = hc_byte_perm (w[17], w[16], selector); - w[34] = hc_byte_perm (w[16], w[15], selector); - w[33] = hc_byte_perm (w[15], w[14], selector); - w[32] = hc_byte_perm (w[14], w[13], selector); - w[31] = hc_byte_perm (w[13], w[12], selector); - w[30] = hc_byte_perm (w[12], w[11], selector); - w[29] = hc_byte_perm (w[11], w[10], selector); - w[28] = hc_byte_perm (w[10], w[ 9], selector); - w[27] = hc_byte_perm (w[ 9], w[ 8], selector); - w[26] = hc_byte_perm (w[ 8], w[ 7], selector); - w[25] = hc_byte_perm (w[ 7], w[ 6], selector); - w[24] = hc_byte_perm (w[ 6], w[ 5], selector); - w[23] = hc_byte_perm (w[ 5], w[ 4], selector); - w[22] = hc_byte_perm (w[ 4], w[ 3], selector); - w[21] = hc_byte_perm (w[ 3], w[ 2], selector); - w[20] = hc_byte_perm (w[ 2], w[ 1], selector); - w[19] = hc_byte_perm (w[ 1], w[ 0], selector); - w[18] = hc_byte_perm (w[ 0], 0, selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm (w[44], w[43], selector); - w[62] = hc_byte_perm (w[43], w[42], selector); - w[61] = hc_byte_perm (w[42], w[41], selector); - w[60] = hc_byte_perm (w[41], w[40], selector); - w[59] = hc_byte_perm (w[40], w[39], selector); - w[58] = hc_byte_perm (w[39], w[38], selector); - w[57] = hc_byte_perm (w[38], w[37], selector); - w[56] = hc_byte_perm (w[37], w[36], selector); - w[55] = hc_byte_perm (w[36], w[35], selector); - w[54] = hc_byte_perm (w[35], w[34], selector); - w[53] = hc_byte_perm (w[34], w[33], selector); - w[52] = hc_byte_perm (w[33], w[32], selector); - w[51] = hc_byte_perm (w[32], w[31], selector); - w[50] = hc_byte_perm (w[31], w[30], selector); - w[49] = hc_byte_perm (w[30], w[29], selector); - w[48] = hc_byte_perm (w[29], w[28], selector); - w[47] = hc_byte_perm (w[28], w[27], selector); - w[46] = hc_byte_perm (w[27], w[26], selector); - w[45] = hc_byte_perm (w[26], w[25], selector); - w[44] = hc_byte_perm (w[25], w[24], selector); - w[43] = hc_byte_perm (w[24], w[23], selector); - w[42] = hc_byte_perm (w[23], w[22], selector); - w[41] = hc_byte_perm (w[22], w[21], selector); - w[40] = hc_byte_perm (w[21], w[20], selector); - w[39] = hc_byte_perm (w[20], w[19], selector); - w[38] = hc_byte_perm (w[19], w[18], selector); - w[37] = hc_byte_perm (w[18], w[17], selector); - w[36] = hc_byte_perm (w[17], w[16], selector); - w[35] = hc_byte_perm (w[16], w[15], selector); - w[34] = hc_byte_perm (w[15], w[14], selector); - w[33] = hc_byte_perm (w[14], w[13], selector); - w[32] = hc_byte_perm (w[13], w[12], selector); - w[31] = hc_byte_perm (w[12], w[11], selector); - w[30] = hc_byte_perm (w[11], w[10], selector); - w[29] = hc_byte_perm (w[10], w[ 9], selector); - w[28] = hc_byte_perm (w[ 9], w[ 8], selector); - w[27] = hc_byte_perm (w[ 8], w[ 7], selector); - w[26] = hc_byte_perm (w[ 7], w[ 6], selector); - w[25] = hc_byte_perm (w[ 6], w[ 5], selector); - w[24] = hc_byte_perm (w[ 5], w[ 4], selector); - w[23] = hc_byte_perm (w[ 4], w[ 3], selector); - w[22] = hc_byte_perm (w[ 3], w[ 2], selector); - w[21] = hc_byte_perm (w[ 2], w[ 1], selector); - w[20] = hc_byte_perm (w[ 1], w[ 0], selector); - w[19] = hc_byte_perm (w[ 0], 0, selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm (w[43], w[42], selector); - w[62] = hc_byte_perm (w[42], w[41], selector); - w[61] = hc_byte_perm (w[41], w[40], selector); - w[60] = hc_byte_perm (w[40], w[39], selector); - w[59] = hc_byte_perm (w[39], w[38], selector); - w[58] = hc_byte_perm (w[38], w[37], selector); - w[57] = hc_byte_perm (w[37], w[36], selector); - w[56] = hc_byte_perm (w[36], w[35], selector); - w[55] = hc_byte_perm (w[35], w[34], selector); - w[54] = hc_byte_perm (w[34], w[33], selector); - w[53] = hc_byte_perm (w[33], w[32], selector); - w[52] = hc_byte_perm (w[32], w[31], selector); - w[51] = hc_byte_perm (w[31], w[30], selector); - w[50] = hc_byte_perm (w[30], w[29], selector); - w[49] = hc_byte_perm (w[29], w[28], selector); - w[48] = hc_byte_perm (w[28], w[27], selector); - w[47] = hc_byte_perm (w[27], w[26], selector); - w[46] = hc_byte_perm (w[26], w[25], selector); - w[45] = hc_byte_perm (w[25], w[24], selector); - w[44] = hc_byte_perm (w[24], w[23], selector); - w[43] = hc_byte_perm (w[23], w[22], selector); - w[42] = hc_byte_perm (w[22], w[21], selector); - w[41] = hc_byte_perm (w[21], w[20], selector); - w[40] = hc_byte_perm (w[20], w[19], selector); - w[39] = hc_byte_perm (w[19], w[18], selector); - w[38] = hc_byte_perm (w[18], w[17], selector); - w[37] = hc_byte_perm (w[17], w[16], selector); - w[36] = hc_byte_perm (w[16], w[15], selector); - w[35] = hc_byte_perm (w[15], w[14], selector); - w[34] = hc_byte_perm (w[14], w[13], selector); - w[33] = hc_byte_perm (w[13], w[12], selector); - w[32] = hc_byte_perm (w[12], w[11], selector); - w[31] = hc_byte_perm (w[11], w[10], selector); - w[30] = hc_byte_perm (w[10], w[ 9], selector); - w[29] = hc_byte_perm (w[ 9], w[ 8], selector); - w[28] = hc_byte_perm (w[ 8], w[ 7], selector); - w[27] = hc_byte_perm (w[ 7], w[ 6], selector); - w[26] = hc_byte_perm (w[ 6], w[ 5], selector); - w[25] = hc_byte_perm (w[ 5], w[ 4], selector); - w[24] = hc_byte_perm (w[ 4], w[ 3], selector); - w[23] = hc_byte_perm (w[ 3], w[ 2], selector); - w[22] = hc_byte_perm (w[ 2], w[ 1], selector); - w[21] = hc_byte_perm (w[ 1], w[ 0], selector); - w[20] = hc_byte_perm (w[ 0], 0, selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm (w[42], w[41], selector); - w[62] = hc_byte_perm (w[41], w[40], selector); - w[61] = hc_byte_perm (w[40], w[39], selector); - w[60] = hc_byte_perm (w[39], w[38], selector); - w[59] = hc_byte_perm (w[38], w[37], selector); - w[58] = hc_byte_perm (w[37], w[36], selector); - w[57] = hc_byte_perm (w[36], w[35], selector); - w[56] = hc_byte_perm (w[35], w[34], selector); - w[55] = hc_byte_perm (w[34], w[33], selector); - w[54] = hc_byte_perm (w[33], w[32], selector); - w[53] = hc_byte_perm (w[32], w[31], selector); - w[52] = hc_byte_perm (w[31], w[30], selector); - w[51] = hc_byte_perm (w[30], w[29], selector); - w[50] = hc_byte_perm (w[29], w[28], selector); - w[49] = hc_byte_perm (w[28], w[27], selector); - w[48] = hc_byte_perm (w[27], w[26], selector); - w[47] = hc_byte_perm (w[26], w[25], selector); - w[46] = hc_byte_perm (w[25], w[24], selector); - w[45] = hc_byte_perm (w[24], w[23], selector); - w[44] = hc_byte_perm (w[23], w[22], selector); - w[43] = hc_byte_perm (w[22], w[21], selector); - w[42] = hc_byte_perm (w[21], w[20], selector); - w[41] = hc_byte_perm (w[20], w[19], selector); - w[40] = hc_byte_perm (w[19], w[18], selector); - w[39] = hc_byte_perm (w[18], w[17], selector); - w[38] = hc_byte_perm (w[17], w[16], selector); - w[37] = hc_byte_perm (w[16], w[15], selector); - w[36] = hc_byte_perm (w[15], w[14], selector); - w[35] = hc_byte_perm (w[14], w[13], selector); - w[34] = hc_byte_perm (w[13], w[12], selector); - w[33] = hc_byte_perm (w[12], w[11], selector); - w[32] = hc_byte_perm (w[11], w[10], selector); - w[31] = hc_byte_perm (w[10], w[ 9], selector); - w[30] = hc_byte_perm (w[ 9], w[ 8], selector); - w[29] = hc_byte_perm (w[ 8], w[ 7], selector); - w[28] = hc_byte_perm (w[ 7], w[ 6], selector); - w[27] = hc_byte_perm (w[ 6], w[ 5], selector); - w[26] = hc_byte_perm (w[ 5], w[ 4], selector); - w[25] = hc_byte_perm (w[ 4], w[ 3], selector); - w[24] = hc_byte_perm (w[ 3], w[ 2], selector); - w[23] = hc_byte_perm (w[ 2], w[ 1], selector); - w[22] = hc_byte_perm (w[ 1], w[ 0], selector); - w[21] = hc_byte_perm (w[ 0], 0, selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm (w[41], w[40], selector); - w[62] = hc_byte_perm (w[40], w[39], selector); - w[61] = hc_byte_perm (w[39], w[38], selector); - w[60] = hc_byte_perm (w[38], w[37], selector); - w[59] = hc_byte_perm (w[37], w[36], selector); - w[58] = hc_byte_perm (w[36], w[35], selector); - w[57] = hc_byte_perm (w[35], w[34], selector); - w[56] = hc_byte_perm (w[34], w[33], selector); - w[55] = hc_byte_perm (w[33], w[32], selector); - w[54] = hc_byte_perm (w[32], w[31], selector); - w[53] = hc_byte_perm (w[31], w[30], selector); - w[52] = hc_byte_perm (w[30], w[29], selector); - w[51] = hc_byte_perm (w[29], w[28], selector); - w[50] = hc_byte_perm (w[28], w[27], selector); - w[49] = hc_byte_perm (w[27], w[26], selector); - w[48] = hc_byte_perm (w[26], w[25], selector); - w[47] = hc_byte_perm (w[25], w[24], selector); - w[46] = hc_byte_perm (w[24], w[23], selector); - w[45] = hc_byte_perm (w[23], w[22], selector); - w[44] = hc_byte_perm (w[22], w[21], selector); - w[43] = hc_byte_perm (w[21], w[20], selector); - w[42] = hc_byte_perm (w[20], w[19], selector); - w[41] = hc_byte_perm (w[19], w[18], selector); - w[40] = hc_byte_perm (w[18], w[17], selector); - w[39] = hc_byte_perm (w[17], w[16], selector); - w[38] = hc_byte_perm (w[16], w[15], selector); - w[37] = hc_byte_perm (w[15], w[14], selector); - w[36] = hc_byte_perm (w[14], w[13], selector); - w[35] = hc_byte_perm (w[13], w[12], selector); - w[34] = hc_byte_perm (w[12], w[11], selector); - w[33] = hc_byte_perm (w[11], w[10], selector); - w[32] = hc_byte_perm (w[10], w[ 9], selector); - w[31] = hc_byte_perm (w[ 9], w[ 8], selector); - w[30] = hc_byte_perm (w[ 8], w[ 7], selector); - w[29] = hc_byte_perm (w[ 7], w[ 6], selector); - w[28] = hc_byte_perm (w[ 6], w[ 5], selector); - w[27] = hc_byte_perm (w[ 5], w[ 4], selector); - w[26] = hc_byte_perm (w[ 4], w[ 3], selector); - w[25] = hc_byte_perm (w[ 3], w[ 2], selector); - w[24] = hc_byte_perm (w[ 2], w[ 1], selector); - w[23] = hc_byte_perm (w[ 1], w[ 0], selector); - w[22] = hc_byte_perm (w[ 0], 0, selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm (w[40], w[39], selector); - w[62] = hc_byte_perm (w[39], w[38], selector); - w[61] = hc_byte_perm (w[38], w[37], selector); - w[60] = hc_byte_perm (w[37], w[36], selector); - w[59] = hc_byte_perm (w[36], w[35], selector); - w[58] = hc_byte_perm (w[35], w[34], selector); - w[57] = hc_byte_perm (w[34], w[33], selector); - w[56] = hc_byte_perm (w[33], w[32], selector); - w[55] = hc_byte_perm (w[32], w[31], selector); - w[54] = hc_byte_perm (w[31], w[30], selector); - w[53] = hc_byte_perm (w[30], w[29], selector); - w[52] = hc_byte_perm (w[29], w[28], selector); - w[51] = hc_byte_perm (w[28], w[27], selector); - w[50] = hc_byte_perm (w[27], w[26], selector); - w[49] = hc_byte_perm (w[26], w[25], selector); - w[48] = hc_byte_perm (w[25], w[24], selector); - w[47] = hc_byte_perm (w[24], w[23], selector); - w[46] = hc_byte_perm (w[23], w[22], selector); - w[45] = hc_byte_perm (w[22], w[21], selector); - w[44] = hc_byte_perm (w[21], w[20], selector); - w[43] = hc_byte_perm (w[20], w[19], selector); - w[42] = hc_byte_perm (w[19], w[18], selector); - w[41] = hc_byte_perm (w[18], w[17], selector); - w[40] = hc_byte_perm (w[17], w[16], selector); - w[39] = hc_byte_perm (w[16], w[15], selector); - w[38] = hc_byte_perm (w[15], w[14], selector); - w[37] = hc_byte_perm (w[14], w[13], selector); - w[36] = hc_byte_perm (w[13], w[12], selector); - w[35] = hc_byte_perm (w[12], w[11], selector); - w[34] = hc_byte_perm (w[11], w[10], selector); - w[33] = hc_byte_perm (w[10], w[ 9], selector); - w[32] = hc_byte_perm (w[ 9], w[ 8], selector); - w[31] = hc_byte_perm (w[ 8], w[ 7], selector); - w[30] = hc_byte_perm (w[ 7], w[ 6], selector); - w[29] = hc_byte_perm (w[ 6], w[ 5], selector); - w[28] = hc_byte_perm (w[ 5], w[ 4], selector); - w[27] = hc_byte_perm (w[ 4], w[ 3], selector); - w[26] = hc_byte_perm (w[ 3], w[ 2], selector); - w[25] = hc_byte_perm (w[ 2], w[ 1], selector); - w[24] = hc_byte_perm (w[ 1], w[ 0], selector); - w[23] = hc_byte_perm (w[ 0], 0, selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm (w[39], w[38], selector); - w[62] = hc_byte_perm (w[38], w[37], selector); - w[61] = hc_byte_perm (w[37], w[36], selector); - w[60] = hc_byte_perm (w[36], w[35], selector); - w[59] = hc_byte_perm (w[35], w[34], selector); - w[58] = hc_byte_perm (w[34], w[33], selector); - w[57] = hc_byte_perm (w[33], w[32], selector); - w[56] = hc_byte_perm (w[32], w[31], selector); - w[55] = hc_byte_perm (w[31], w[30], selector); - w[54] = hc_byte_perm (w[30], w[29], selector); - w[53] = hc_byte_perm (w[29], w[28], selector); - w[52] = hc_byte_perm (w[28], w[27], selector); - w[51] = hc_byte_perm (w[27], w[26], selector); - w[50] = hc_byte_perm (w[26], w[25], selector); - w[49] = hc_byte_perm (w[25], w[24], selector); - w[48] = hc_byte_perm (w[24], w[23], selector); - w[47] = hc_byte_perm (w[23], w[22], selector); - w[46] = hc_byte_perm (w[22], w[21], selector); - w[45] = hc_byte_perm (w[21], w[20], selector); - w[44] = hc_byte_perm (w[20], w[19], selector); - w[43] = hc_byte_perm (w[19], w[18], selector); - w[42] = hc_byte_perm (w[18], w[17], selector); - w[41] = hc_byte_perm (w[17], w[16], selector); - w[40] = hc_byte_perm (w[16], w[15], selector); - w[39] = hc_byte_perm (w[15], w[14], selector); - w[38] = hc_byte_perm (w[14], w[13], selector); - w[37] = hc_byte_perm (w[13], w[12], selector); - w[36] = hc_byte_perm (w[12], w[11], selector); - w[35] = hc_byte_perm (w[11], w[10], selector); - w[34] = hc_byte_perm (w[10], w[ 9], selector); - w[33] = hc_byte_perm (w[ 9], w[ 8], selector); - w[32] = hc_byte_perm (w[ 8], w[ 7], selector); - w[31] = hc_byte_perm (w[ 7], w[ 6], selector); - w[30] = hc_byte_perm (w[ 6], w[ 5], selector); - w[29] = hc_byte_perm (w[ 5], w[ 4], selector); - w[28] = hc_byte_perm (w[ 4], w[ 3], selector); - w[27] = hc_byte_perm (w[ 3], w[ 2], selector); - w[26] = hc_byte_perm (w[ 2], w[ 1], selector); - w[25] = hc_byte_perm (w[ 1], w[ 0], selector); - w[24] = hc_byte_perm (w[ 0], 0, selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm (w[38], w[37], selector); - w[62] = hc_byte_perm (w[37], w[36], selector); - w[61] = hc_byte_perm (w[36], w[35], selector); - w[60] = hc_byte_perm (w[35], w[34], selector); - w[59] = hc_byte_perm (w[34], w[33], selector); - w[58] = hc_byte_perm (w[33], w[32], selector); - w[57] = hc_byte_perm (w[32], w[31], selector); - w[56] = hc_byte_perm (w[31], w[30], selector); - w[55] = hc_byte_perm (w[30], w[29], selector); - w[54] = hc_byte_perm (w[29], w[28], selector); - w[53] = hc_byte_perm (w[28], w[27], selector); - w[52] = hc_byte_perm (w[27], w[26], selector); - w[51] = hc_byte_perm (w[26], w[25], selector); - w[50] = hc_byte_perm (w[25], w[24], selector); - w[49] = hc_byte_perm (w[24], w[23], selector); - w[48] = hc_byte_perm (w[23], w[22], selector); - w[47] = hc_byte_perm (w[22], w[21], selector); - w[46] = hc_byte_perm (w[21], w[20], selector); - w[45] = hc_byte_perm (w[20], w[19], selector); - w[44] = hc_byte_perm (w[19], w[18], selector); - w[43] = hc_byte_perm (w[18], w[17], selector); - w[42] = hc_byte_perm (w[17], w[16], selector); - w[41] = hc_byte_perm (w[16], w[15], selector); - w[40] = hc_byte_perm (w[15], w[14], selector); - w[39] = hc_byte_perm (w[14], w[13], selector); - w[38] = hc_byte_perm (w[13], w[12], selector); - w[37] = hc_byte_perm (w[12], w[11], selector); - w[36] = hc_byte_perm (w[11], w[10], selector); - w[35] = hc_byte_perm (w[10], w[ 9], selector); - w[34] = hc_byte_perm (w[ 9], w[ 8], selector); - w[33] = hc_byte_perm (w[ 8], w[ 7], selector); - w[32] = hc_byte_perm (w[ 7], w[ 6], selector); - w[31] = hc_byte_perm (w[ 6], w[ 5], selector); - w[30] = hc_byte_perm (w[ 5], w[ 4], selector); - w[29] = hc_byte_perm (w[ 4], w[ 3], selector); - w[28] = hc_byte_perm (w[ 3], w[ 2], selector); - w[27] = hc_byte_perm (w[ 2], w[ 1], selector); - w[26] = hc_byte_perm (w[ 1], w[ 0], selector); - w[25] = hc_byte_perm (w[ 0], 0, selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm (w[37], w[36], selector); - w[62] = hc_byte_perm (w[36], w[35], selector); - w[61] = hc_byte_perm (w[35], w[34], selector); - w[60] = hc_byte_perm (w[34], w[33], selector); - w[59] = hc_byte_perm (w[33], w[32], selector); - w[58] = hc_byte_perm (w[32], w[31], selector); - w[57] = hc_byte_perm (w[31], w[30], selector); - w[56] = hc_byte_perm (w[30], w[29], selector); - w[55] = hc_byte_perm (w[29], w[28], selector); - w[54] = hc_byte_perm (w[28], w[27], selector); - w[53] = hc_byte_perm (w[27], w[26], selector); - w[52] = hc_byte_perm (w[26], w[25], selector); - w[51] = hc_byte_perm (w[25], w[24], selector); - w[50] = hc_byte_perm (w[24], w[23], selector); - w[49] = hc_byte_perm (w[23], w[22], selector); - w[48] = hc_byte_perm (w[22], w[21], selector); - w[47] = hc_byte_perm (w[21], w[20], selector); - w[46] = hc_byte_perm (w[20], w[19], selector); - w[45] = hc_byte_perm (w[19], w[18], selector); - w[44] = hc_byte_perm (w[18], w[17], selector); - w[43] = hc_byte_perm (w[17], w[16], selector); - w[42] = hc_byte_perm (w[16], w[15], selector); - w[41] = hc_byte_perm (w[15], w[14], selector); - w[40] = hc_byte_perm (w[14], w[13], selector); - w[39] = hc_byte_perm (w[13], w[12], selector); - w[38] = hc_byte_perm (w[12], w[11], selector); - w[37] = hc_byte_perm (w[11], w[10], selector); - w[36] = hc_byte_perm (w[10], w[ 9], selector); - w[35] = hc_byte_perm (w[ 9], w[ 8], selector); - w[34] = hc_byte_perm (w[ 8], w[ 7], selector); - w[33] = hc_byte_perm (w[ 7], w[ 6], selector); - w[32] = hc_byte_perm (w[ 6], w[ 5], selector); - w[31] = hc_byte_perm (w[ 5], w[ 4], selector); - w[30] = hc_byte_perm (w[ 4], w[ 3], selector); - w[29] = hc_byte_perm (w[ 3], w[ 2], selector); - w[28] = hc_byte_perm (w[ 2], w[ 1], selector); - w[27] = hc_byte_perm (w[ 1], w[ 0], selector); - w[26] = hc_byte_perm (w[ 0], 0, selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm (w[36], w[35], selector); - w[62] = hc_byte_perm (w[35], w[34], selector); - w[61] = hc_byte_perm (w[34], w[33], selector); - w[60] = hc_byte_perm (w[33], w[32], selector); - w[59] = hc_byte_perm (w[32], w[31], selector); - w[58] = hc_byte_perm (w[31], w[30], selector); - w[57] = hc_byte_perm (w[30], w[29], selector); - w[56] = hc_byte_perm (w[29], w[28], selector); - w[55] = hc_byte_perm (w[28], w[27], selector); - w[54] = hc_byte_perm (w[27], w[26], selector); - w[53] = hc_byte_perm (w[26], w[25], selector); - w[52] = hc_byte_perm (w[25], w[24], selector); - w[51] = hc_byte_perm (w[24], w[23], selector); - w[50] = hc_byte_perm (w[23], w[22], selector); - w[49] = hc_byte_perm (w[22], w[21], selector); - w[48] = hc_byte_perm (w[21], w[20], selector); - w[47] = hc_byte_perm (w[20], w[19], selector); - w[46] = hc_byte_perm (w[19], w[18], selector); - w[45] = hc_byte_perm (w[18], w[17], selector); - w[44] = hc_byte_perm (w[17], w[16], selector); - w[43] = hc_byte_perm (w[16], w[15], selector); - w[42] = hc_byte_perm (w[15], w[14], selector); - w[41] = hc_byte_perm (w[14], w[13], selector); - w[40] = hc_byte_perm (w[13], w[12], selector); - w[39] = hc_byte_perm (w[12], w[11], selector); - w[38] = hc_byte_perm (w[11], w[10], selector); - w[37] = hc_byte_perm (w[10], w[ 9], selector); - w[36] = hc_byte_perm (w[ 9], w[ 8], selector); - w[35] = hc_byte_perm (w[ 8], w[ 7], selector); - w[34] = hc_byte_perm (w[ 7], w[ 6], selector); - w[33] = hc_byte_perm (w[ 6], w[ 5], selector); - w[32] = hc_byte_perm (w[ 5], w[ 4], selector); - w[31] = hc_byte_perm (w[ 4], w[ 3], selector); - w[30] = hc_byte_perm (w[ 3], w[ 2], selector); - w[29] = hc_byte_perm (w[ 2], w[ 1], selector); - w[28] = hc_byte_perm (w[ 1], w[ 0], selector); - w[27] = hc_byte_perm (w[ 0], 0, selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm (w[35], w[34], selector); - w[62] = hc_byte_perm (w[34], w[33], selector); - w[61] = hc_byte_perm (w[33], w[32], selector); - w[60] = hc_byte_perm (w[32], w[31], selector); - w[59] = hc_byte_perm (w[31], w[30], selector); - w[58] = hc_byte_perm (w[30], w[29], selector); - w[57] = hc_byte_perm (w[29], w[28], selector); - w[56] = hc_byte_perm (w[28], w[27], selector); - w[55] = hc_byte_perm (w[27], w[26], selector); - w[54] = hc_byte_perm (w[26], w[25], selector); - w[53] = hc_byte_perm (w[25], w[24], selector); - w[52] = hc_byte_perm (w[24], w[23], selector); - w[51] = hc_byte_perm (w[23], w[22], selector); - w[50] = hc_byte_perm (w[22], w[21], selector); - w[49] = hc_byte_perm (w[21], w[20], selector); - w[48] = hc_byte_perm (w[20], w[19], selector); - w[47] = hc_byte_perm (w[19], w[18], selector); - w[46] = hc_byte_perm (w[18], w[17], selector); - w[45] = hc_byte_perm (w[17], w[16], selector); - w[44] = hc_byte_perm (w[16], w[15], selector); - w[43] = hc_byte_perm (w[15], w[14], selector); - w[42] = hc_byte_perm (w[14], w[13], selector); - w[41] = hc_byte_perm (w[13], w[12], selector); - w[40] = hc_byte_perm (w[12], w[11], selector); - w[39] = hc_byte_perm (w[11], w[10], selector); - w[38] = hc_byte_perm (w[10], w[ 9], selector); - w[37] = hc_byte_perm (w[ 9], w[ 8], selector); - w[36] = hc_byte_perm (w[ 8], w[ 7], selector); - w[35] = hc_byte_perm (w[ 7], w[ 6], selector); - w[34] = hc_byte_perm (w[ 6], w[ 5], selector); - w[33] = hc_byte_perm (w[ 5], w[ 4], selector); - w[32] = hc_byte_perm (w[ 4], w[ 3], selector); - w[31] = hc_byte_perm (w[ 3], w[ 2], selector); - w[30] = hc_byte_perm (w[ 2], w[ 1], selector); - w[29] = hc_byte_perm (w[ 1], w[ 0], selector); - w[28] = hc_byte_perm (w[ 0], 0, selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm (w[34], w[33], selector); - w[62] = hc_byte_perm (w[33], w[32], selector); - w[61] = hc_byte_perm (w[32], w[31], selector); - w[60] = hc_byte_perm (w[31], w[30], selector); - w[59] = hc_byte_perm (w[30], w[29], selector); - w[58] = hc_byte_perm (w[29], w[28], selector); - w[57] = hc_byte_perm (w[28], w[27], selector); - w[56] = hc_byte_perm (w[27], w[26], selector); - w[55] = hc_byte_perm (w[26], w[25], selector); - w[54] = hc_byte_perm (w[25], w[24], selector); - w[53] = hc_byte_perm (w[24], w[23], selector); - w[52] = hc_byte_perm (w[23], w[22], selector); - w[51] = hc_byte_perm (w[22], w[21], selector); - w[50] = hc_byte_perm (w[21], w[20], selector); - w[49] = hc_byte_perm (w[20], w[19], selector); - w[48] = hc_byte_perm (w[19], w[18], selector); - w[47] = hc_byte_perm (w[18], w[17], selector); - w[46] = hc_byte_perm (w[17], w[16], selector); - w[45] = hc_byte_perm (w[16], w[15], selector); - w[44] = hc_byte_perm (w[15], w[14], selector); - w[43] = hc_byte_perm (w[14], w[13], selector); - w[42] = hc_byte_perm (w[13], w[12], selector); - w[41] = hc_byte_perm (w[12], w[11], selector); - w[40] = hc_byte_perm (w[11], w[10], selector); - w[39] = hc_byte_perm (w[10], w[ 9], selector); - w[38] = hc_byte_perm (w[ 9], w[ 8], selector); - w[37] = hc_byte_perm (w[ 8], w[ 7], selector); - w[36] = hc_byte_perm (w[ 7], w[ 6], selector); - w[35] = hc_byte_perm (w[ 6], w[ 5], selector); - w[34] = hc_byte_perm (w[ 5], w[ 4], selector); - w[33] = hc_byte_perm (w[ 4], w[ 3], selector); - w[32] = hc_byte_perm (w[ 3], w[ 2], selector); - w[31] = hc_byte_perm (w[ 2], w[ 1], selector); - w[30] = hc_byte_perm (w[ 1], w[ 0], selector); - w[29] = hc_byte_perm (w[ 0], 0, selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm (w[33], w[32], selector); - w[62] = hc_byte_perm (w[32], w[31], selector); - w[61] = hc_byte_perm (w[31], w[30], selector); - w[60] = hc_byte_perm (w[30], w[29], selector); - w[59] = hc_byte_perm (w[29], w[28], selector); - w[58] = hc_byte_perm (w[28], w[27], selector); - w[57] = hc_byte_perm (w[27], w[26], selector); - w[56] = hc_byte_perm (w[26], w[25], selector); - w[55] = hc_byte_perm (w[25], w[24], selector); - w[54] = hc_byte_perm (w[24], w[23], selector); - w[53] = hc_byte_perm (w[23], w[22], selector); - w[52] = hc_byte_perm (w[22], w[21], selector); - w[51] = hc_byte_perm (w[21], w[20], selector); - w[50] = hc_byte_perm (w[20], w[19], selector); - w[49] = hc_byte_perm (w[19], w[18], selector); - w[48] = hc_byte_perm (w[18], w[17], selector); - w[47] = hc_byte_perm (w[17], w[16], selector); - w[46] = hc_byte_perm (w[16], w[15], selector); - w[45] = hc_byte_perm (w[15], w[14], selector); - w[44] = hc_byte_perm (w[14], w[13], selector); - w[43] = hc_byte_perm (w[13], w[12], selector); - w[42] = hc_byte_perm (w[12], w[11], selector); - w[41] = hc_byte_perm (w[11], w[10], selector); - w[40] = hc_byte_perm (w[10], w[ 9], selector); - w[39] = hc_byte_perm (w[ 9], w[ 8], selector); - w[38] = hc_byte_perm (w[ 8], w[ 7], selector); - w[37] = hc_byte_perm (w[ 7], w[ 6], selector); - w[36] = hc_byte_perm (w[ 6], w[ 5], selector); - w[35] = hc_byte_perm (w[ 5], w[ 4], selector); - w[34] = hc_byte_perm (w[ 4], w[ 3], selector); - w[33] = hc_byte_perm (w[ 3], w[ 2], selector); - w[32] = hc_byte_perm (w[ 2], w[ 1], selector); - w[31] = hc_byte_perm (w[ 1], w[ 0], selector); - w[30] = hc_byte_perm (w[ 0], 0, selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm (w[32], w[31], selector); - w[62] = hc_byte_perm (w[31], w[30], selector); - w[61] = hc_byte_perm (w[30], w[29], selector); - w[60] = hc_byte_perm (w[29], w[28], selector); - w[59] = hc_byte_perm (w[28], w[27], selector); - w[58] = hc_byte_perm (w[27], w[26], selector); - w[57] = hc_byte_perm (w[26], w[25], selector); - w[56] = hc_byte_perm (w[25], w[24], selector); - w[55] = hc_byte_perm (w[24], w[23], selector); - w[54] = hc_byte_perm (w[23], w[22], selector); - w[53] = hc_byte_perm (w[22], w[21], selector); - w[52] = hc_byte_perm (w[21], w[20], selector); - w[51] = hc_byte_perm (w[20], w[19], selector); - w[50] = hc_byte_perm (w[19], w[18], selector); - w[49] = hc_byte_perm (w[18], w[17], selector); - w[48] = hc_byte_perm (w[17], w[16], selector); - w[47] = hc_byte_perm (w[16], w[15], selector); - w[46] = hc_byte_perm (w[15], w[14], selector); - w[45] = hc_byte_perm (w[14], w[13], selector); - w[44] = hc_byte_perm (w[13], w[12], selector); - w[43] = hc_byte_perm (w[12], w[11], selector); - w[42] = hc_byte_perm (w[11], w[10], selector); - w[41] = hc_byte_perm (w[10], w[ 9], selector); - w[40] = hc_byte_perm (w[ 9], w[ 8], selector); - w[39] = hc_byte_perm (w[ 8], w[ 7], selector); - w[38] = hc_byte_perm (w[ 7], w[ 6], selector); - w[37] = hc_byte_perm (w[ 6], w[ 5], selector); - w[36] = hc_byte_perm (w[ 5], w[ 4], selector); - w[35] = hc_byte_perm (w[ 4], w[ 3], selector); - w[34] = hc_byte_perm (w[ 3], w[ 2], selector); - w[33] = hc_byte_perm (w[ 2], w[ 1], selector); - w[32] = hc_byte_perm (w[ 1], w[ 0], selector); - w[31] = hc_byte_perm (w[ 0], 0, selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm (w[31], w[30], selector); - w[62] = hc_byte_perm (w[30], w[29], selector); - w[61] = hc_byte_perm (w[29], w[28], selector); - w[60] = hc_byte_perm (w[28], w[27], selector); - w[59] = hc_byte_perm (w[27], w[26], selector); - w[58] = hc_byte_perm (w[26], w[25], selector); - w[57] = hc_byte_perm (w[25], w[24], selector); - w[56] = hc_byte_perm (w[24], w[23], selector); - w[55] = hc_byte_perm (w[23], w[22], selector); - w[54] = hc_byte_perm (w[22], w[21], selector); - w[53] = hc_byte_perm (w[21], w[20], selector); - w[52] = hc_byte_perm (w[20], w[19], selector); - w[51] = hc_byte_perm (w[19], w[18], selector); - w[50] = hc_byte_perm (w[18], w[17], selector); - w[49] = hc_byte_perm (w[17], w[16], selector); - w[48] = hc_byte_perm (w[16], w[15], selector); - w[47] = hc_byte_perm (w[15], w[14], selector); - w[46] = hc_byte_perm (w[14], w[13], selector); - w[45] = hc_byte_perm (w[13], w[12], selector); - w[44] = hc_byte_perm (w[12], w[11], selector); - w[43] = hc_byte_perm (w[11], w[10], selector); - w[42] = hc_byte_perm (w[10], w[ 9], selector); - w[41] = hc_byte_perm (w[ 9], w[ 8], selector); - w[40] = hc_byte_perm (w[ 8], w[ 7], selector); - w[39] = hc_byte_perm (w[ 7], w[ 6], selector); - w[38] = hc_byte_perm (w[ 6], w[ 5], selector); - w[37] = hc_byte_perm (w[ 5], w[ 4], selector); - w[36] = hc_byte_perm (w[ 4], w[ 3], selector); - w[35] = hc_byte_perm (w[ 3], w[ 2], selector); - w[34] = hc_byte_perm (w[ 2], w[ 1], selector); - w[33] = hc_byte_perm (w[ 1], w[ 0], selector); - w[32] = hc_byte_perm (w[ 0], 0, selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm (w[30], w[29], selector); - w[62] = hc_byte_perm (w[29], w[28], selector); - w[61] = hc_byte_perm (w[28], w[27], selector); - w[60] = hc_byte_perm (w[27], w[26], selector); - w[59] = hc_byte_perm (w[26], w[25], selector); - w[58] = hc_byte_perm (w[25], w[24], selector); - w[57] = hc_byte_perm (w[24], w[23], selector); - w[56] = hc_byte_perm (w[23], w[22], selector); - w[55] = hc_byte_perm (w[22], w[21], selector); - w[54] = hc_byte_perm (w[21], w[20], selector); - w[53] = hc_byte_perm (w[20], w[19], selector); - w[52] = hc_byte_perm (w[19], w[18], selector); - w[51] = hc_byte_perm (w[18], w[17], selector); - w[50] = hc_byte_perm (w[17], w[16], selector); - w[49] = hc_byte_perm (w[16], w[15], selector); - w[48] = hc_byte_perm (w[15], w[14], selector); - w[47] = hc_byte_perm (w[14], w[13], selector); - w[46] = hc_byte_perm (w[13], w[12], selector); - w[45] = hc_byte_perm (w[12], w[11], selector); - w[44] = hc_byte_perm (w[11], w[10], selector); - w[43] = hc_byte_perm (w[10], w[ 9], selector); - w[42] = hc_byte_perm (w[ 9], w[ 8], selector); - w[41] = hc_byte_perm (w[ 8], w[ 7], selector); - w[40] = hc_byte_perm (w[ 7], w[ 6], selector); - w[39] = hc_byte_perm (w[ 6], w[ 5], selector); - w[38] = hc_byte_perm (w[ 5], w[ 4], selector); - w[37] = hc_byte_perm (w[ 4], w[ 3], selector); - w[36] = hc_byte_perm (w[ 3], w[ 2], selector); - w[35] = hc_byte_perm (w[ 2], w[ 1], selector); - w[34] = hc_byte_perm (w[ 1], w[ 0], selector); - w[33] = hc_byte_perm (w[ 0], 0, selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm (w[29], w[28], selector); - w[62] = hc_byte_perm (w[28], w[27], selector); - w[61] = hc_byte_perm (w[27], w[26], selector); - w[60] = hc_byte_perm (w[26], w[25], selector); - w[59] = hc_byte_perm (w[25], w[24], selector); - w[58] = hc_byte_perm (w[24], w[23], selector); - w[57] = hc_byte_perm (w[23], w[22], selector); - w[56] = hc_byte_perm (w[22], w[21], selector); - w[55] = hc_byte_perm (w[21], w[20], selector); - w[54] = hc_byte_perm (w[20], w[19], selector); - w[53] = hc_byte_perm (w[19], w[18], selector); - w[52] = hc_byte_perm (w[18], w[17], selector); - w[51] = hc_byte_perm (w[17], w[16], selector); - w[50] = hc_byte_perm (w[16], w[15], selector); - w[49] = hc_byte_perm (w[15], w[14], selector); - w[48] = hc_byte_perm (w[14], w[13], selector); - w[47] = hc_byte_perm (w[13], w[12], selector); - w[46] = hc_byte_perm (w[12], w[11], selector); - w[45] = hc_byte_perm (w[11], w[10], selector); - w[44] = hc_byte_perm (w[10], w[ 9], selector); - w[43] = hc_byte_perm (w[ 9], w[ 8], selector); - w[42] = hc_byte_perm (w[ 8], w[ 7], selector); - w[41] = hc_byte_perm (w[ 7], w[ 6], selector); - w[40] = hc_byte_perm (w[ 6], w[ 5], selector); - w[39] = hc_byte_perm (w[ 5], w[ 4], selector); - w[38] = hc_byte_perm (w[ 4], w[ 3], selector); - w[37] = hc_byte_perm (w[ 3], w[ 2], selector); - w[36] = hc_byte_perm (w[ 2], w[ 1], selector); - w[35] = hc_byte_perm (w[ 1], w[ 0], selector); - w[34] = hc_byte_perm (w[ 0], 0, selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm (w[28], w[27], selector); - w[62] = hc_byte_perm (w[27], w[26], selector); - w[61] = hc_byte_perm (w[26], w[25], selector); - w[60] = hc_byte_perm (w[25], w[24], selector); - w[59] = hc_byte_perm (w[24], w[23], selector); - w[58] = hc_byte_perm (w[23], w[22], selector); - w[57] = hc_byte_perm (w[22], w[21], selector); - w[56] = hc_byte_perm (w[21], w[20], selector); - w[55] = hc_byte_perm (w[20], w[19], selector); - w[54] = hc_byte_perm (w[19], w[18], selector); - w[53] = hc_byte_perm (w[18], w[17], selector); - w[52] = hc_byte_perm (w[17], w[16], selector); - w[51] = hc_byte_perm (w[16], w[15], selector); - w[50] = hc_byte_perm (w[15], w[14], selector); - w[49] = hc_byte_perm (w[14], w[13], selector); - w[48] = hc_byte_perm (w[13], w[12], selector); - w[47] = hc_byte_perm (w[12], w[11], selector); - w[46] = hc_byte_perm (w[11], w[10], selector); - w[45] = hc_byte_perm (w[10], w[ 9], selector); - w[44] = hc_byte_perm (w[ 9], w[ 8], selector); - w[43] = hc_byte_perm (w[ 8], w[ 7], selector); - w[42] = hc_byte_perm (w[ 7], w[ 6], selector); - w[41] = hc_byte_perm (w[ 6], w[ 5], selector); - w[40] = hc_byte_perm (w[ 5], w[ 4], selector); - w[39] = hc_byte_perm (w[ 4], w[ 3], selector); - w[38] = hc_byte_perm (w[ 3], w[ 2], selector); - w[37] = hc_byte_perm (w[ 2], w[ 1], selector); - w[36] = hc_byte_perm (w[ 1], w[ 0], selector); - w[35] = hc_byte_perm (w[ 0], 0, selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm (w[27], w[26], selector); - w[62] = hc_byte_perm (w[26], w[25], selector); - w[61] = hc_byte_perm (w[25], w[24], selector); - w[60] = hc_byte_perm (w[24], w[23], selector); - w[59] = hc_byte_perm (w[23], w[22], selector); - w[58] = hc_byte_perm (w[22], w[21], selector); - w[57] = hc_byte_perm (w[21], w[20], selector); - w[56] = hc_byte_perm (w[20], w[19], selector); - w[55] = hc_byte_perm (w[19], w[18], selector); - w[54] = hc_byte_perm (w[18], w[17], selector); - w[53] = hc_byte_perm (w[17], w[16], selector); - w[52] = hc_byte_perm (w[16], w[15], selector); - w[51] = hc_byte_perm (w[15], w[14], selector); - w[50] = hc_byte_perm (w[14], w[13], selector); - w[49] = hc_byte_perm (w[13], w[12], selector); - w[48] = hc_byte_perm (w[12], w[11], selector); - w[47] = hc_byte_perm (w[11], w[10], selector); - w[46] = hc_byte_perm (w[10], w[ 9], selector); - w[45] = hc_byte_perm (w[ 9], w[ 8], selector); - w[44] = hc_byte_perm (w[ 8], w[ 7], selector); - w[43] = hc_byte_perm (w[ 7], w[ 6], selector); - w[42] = hc_byte_perm (w[ 6], w[ 5], selector); - w[41] = hc_byte_perm (w[ 5], w[ 4], selector); - w[40] = hc_byte_perm (w[ 4], w[ 3], selector); - w[39] = hc_byte_perm (w[ 3], w[ 2], selector); - w[38] = hc_byte_perm (w[ 2], w[ 1], selector); - w[37] = hc_byte_perm (w[ 1], w[ 0], selector); - w[36] = hc_byte_perm (w[ 0], 0, selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm (w[26], w[25], selector); - w[62] = hc_byte_perm (w[25], w[24], selector); - w[61] = hc_byte_perm (w[24], w[23], selector); - w[60] = hc_byte_perm (w[23], w[22], selector); - w[59] = hc_byte_perm (w[22], w[21], selector); - w[58] = hc_byte_perm (w[21], w[20], selector); - w[57] = hc_byte_perm (w[20], w[19], selector); - w[56] = hc_byte_perm (w[19], w[18], selector); - w[55] = hc_byte_perm (w[18], w[17], selector); - w[54] = hc_byte_perm (w[17], w[16], selector); - w[53] = hc_byte_perm (w[16], w[15], selector); - w[52] = hc_byte_perm (w[15], w[14], selector); - w[51] = hc_byte_perm (w[14], w[13], selector); - w[50] = hc_byte_perm (w[13], w[12], selector); - w[49] = hc_byte_perm (w[12], w[11], selector); - w[48] = hc_byte_perm (w[11], w[10], selector); - w[47] = hc_byte_perm (w[10], w[ 9], selector); - w[46] = hc_byte_perm (w[ 9], w[ 8], selector); - w[45] = hc_byte_perm (w[ 8], w[ 7], selector); - w[44] = hc_byte_perm (w[ 7], w[ 6], selector); - w[43] = hc_byte_perm (w[ 6], w[ 5], selector); - w[42] = hc_byte_perm (w[ 5], w[ 4], selector); - w[41] = hc_byte_perm (w[ 4], w[ 3], selector); - w[40] = hc_byte_perm (w[ 3], w[ 2], selector); - w[39] = hc_byte_perm (w[ 2], w[ 1], selector); - w[38] = hc_byte_perm (w[ 1], w[ 0], selector); - w[37] = hc_byte_perm (w[ 0], 0, selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm (w[25], w[24], selector); - w[62] = hc_byte_perm (w[24], w[23], selector); - w[61] = hc_byte_perm (w[23], w[22], selector); - w[60] = hc_byte_perm (w[22], w[21], selector); - w[59] = hc_byte_perm (w[21], w[20], selector); - w[58] = hc_byte_perm (w[20], w[19], selector); - w[57] = hc_byte_perm (w[19], w[18], selector); - w[56] = hc_byte_perm (w[18], w[17], selector); - w[55] = hc_byte_perm (w[17], w[16], selector); - w[54] = hc_byte_perm (w[16], w[15], selector); - w[53] = hc_byte_perm (w[15], w[14], selector); - w[52] = hc_byte_perm (w[14], w[13], selector); - w[51] = hc_byte_perm (w[13], w[12], selector); - w[50] = hc_byte_perm (w[12], w[11], selector); - w[49] = hc_byte_perm (w[11], w[10], selector); - w[48] = hc_byte_perm (w[10], w[ 9], selector); - w[47] = hc_byte_perm (w[ 9], w[ 8], selector); - w[46] = hc_byte_perm (w[ 8], w[ 7], selector); - w[45] = hc_byte_perm (w[ 7], w[ 6], selector); - w[44] = hc_byte_perm (w[ 6], w[ 5], selector); - w[43] = hc_byte_perm (w[ 5], w[ 4], selector); - w[42] = hc_byte_perm (w[ 4], w[ 3], selector); - w[41] = hc_byte_perm (w[ 3], w[ 2], selector); - w[40] = hc_byte_perm (w[ 2], w[ 1], selector); - w[39] = hc_byte_perm (w[ 1], w[ 0], selector); - w[38] = hc_byte_perm (w[ 0], 0, selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm (w[24], w[23], selector); - w[62] = hc_byte_perm (w[23], w[22], selector); - w[61] = hc_byte_perm (w[22], w[21], selector); - w[60] = hc_byte_perm (w[21], w[20], selector); - w[59] = hc_byte_perm (w[20], w[19], selector); - w[58] = hc_byte_perm (w[19], w[18], selector); - w[57] = hc_byte_perm (w[18], w[17], selector); - w[56] = hc_byte_perm (w[17], w[16], selector); - w[55] = hc_byte_perm (w[16], w[15], selector); - w[54] = hc_byte_perm (w[15], w[14], selector); - w[53] = hc_byte_perm (w[14], w[13], selector); - w[52] = hc_byte_perm (w[13], w[12], selector); - w[51] = hc_byte_perm (w[12], w[11], selector); - w[50] = hc_byte_perm (w[11], w[10], selector); - w[49] = hc_byte_perm (w[10], w[ 9], selector); - w[48] = hc_byte_perm (w[ 9], w[ 8], selector); - w[47] = hc_byte_perm (w[ 8], w[ 7], selector); - w[46] = hc_byte_perm (w[ 7], w[ 6], selector); - w[45] = hc_byte_perm (w[ 6], w[ 5], selector); - w[44] = hc_byte_perm (w[ 5], w[ 4], selector); - w[43] = hc_byte_perm (w[ 4], w[ 3], selector); - w[42] = hc_byte_perm (w[ 3], w[ 2], selector); - w[41] = hc_byte_perm (w[ 2], w[ 1], selector); - w[40] = hc_byte_perm (w[ 1], w[ 0], selector); - w[39] = hc_byte_perm (w[ 0], 0, selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm (w[23], w[22], selector); - w[62] = hc_byte_perm (w[22], w[21], selector); - w[61] = hc_byte_perm (w[21], w[20], selector); - w[60] = hc_byte_perm (w[20], w[19], selector); - w[59] = hc_byte_perm (w[19], w[18], selector); - w[58] = hc_byte_perm (w[18], w[17], selector); - w[57] = hc_byte_perm (w[17], w[16], selector); - w[56] = hc_byte_perm (w[16], w[15], selector); - w[55] = hc_byte_perm (w[15], w[14], selector); - w[54] = hc_byte_perm (w[14], w[13], selector); - w[53] = hc_byte_perm (w[13], w[12], selector); - w[52] = hc_byte_perm (w[12], w[11], selector); - w[51] = hc_byte_perm (w[11], w[10], selector); - w[50] = hc_byte_perm (w[10], w[ 9], selector); - w[49] = hc_byte_perm (w[ 9], w[ 8], selector); - w[48] = hc_byte_perm (w[ 8], w[ 7], selector); - w[47] = hc_byte_perm (w[ 7], w[ 6], selector); - w[46] = hc_byte_perm (w[ 6], w[ 5], selector); - w[45] = hc_byte_perm (w[ 5], w[ 4], selector); - w[44] = hc_byte_perm (w[ 4], w[ 3], selector); - w[43] = hc_byte_perm (w[ 3], w[ 2], selector); - w[42] = hc_byte_perm (w[ 2], w[ 1], selector); - w[41] = hc_byte_perm (w[ 1], w[ 0], selector); - w[40] = hc_byte_perm (w[ 0], 0, selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm (w[22], w[21], selector); - w[62] = hc_byte_perm (w[21], w[20], selector); - w[61] = hc_byte_perm (w[20], w[19], selector); - w[60] = hc_byte_perm (w[19], w[18], selector); - w[59] = hc_byte_perm (w[18], w[17], selector); - w[58] = hc_byte_perm (w[17], w[16], selector); - w[57] = hc_byte_perm (w[16], w[15], selector); - w[56] = hc_byte_perm (w[15], w[14], selector); - w[55] = hc_byte_perm (w[14], w[13], selector); - w[54] = hc_byte_perm (w[13], w[12], selector); - w[53] = hc_byte_perm (w[12], w[11], selector); - w[52] = hc_byte_perm (w[11], w[10], selector); - w[51] = hc_byte_perm (w[10], w[ 9], selector); - w[50] = hc_byte_perm (w[ 9], w[ 8], selector); - w[49] = hc_byte_perm (w[ 8], w[ 7], selector); - w[48] = hc_byte_perm (w[ 7], w[ 6], selector); - w[47] = hc_byte_perm (w[ 6], w[ 5], selector); - w[46] = hc_byte_perm (w[ 5], w[ 4], selector); - w[45] = hc_byte_perm (w[ 4], w[ 3], selector); - w[44] = hc_byte_perm (w[ 3], w[ 2], selector); - w[43] = hc_byte_perm (w[ 2], w[ 1], selector); - w[42] = hc_byte_perm (w[ 1], w[ 0], selector); - w[41] = hc_byte_perm (w[ 0], 0, selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm (w[21], w[20], selector); - w[62] = hc_byte_perm (w[20], w[19], selector); - w[61] = hc_byte_perm (w[19], w[18], selector); - w[60] = hc_byte_perm (w[18], w[17], selector); - w[59] = hc_byte_perm (w[17], w[16], selector); - w[58] = hc_byte_perm (w[16], w[15], selector); - w[57] = hc_byte_perm (w[15], w[14], selector); - w[56] = hc_byte_perm (w[14], w[13], selector); - w[55] = hc_byte_perm (w[13], w[12], selector); - w[54] = hc_byte_perm (w[12], w[11], selector); - w[53] = hc_byte_perm (w[11], w[10], selector); - w[52] = hc_byte_perm (w[10], w[ 9], selector); - w[51] = hc_byte_perm (w[ 9], w[ 8], selector); - w[50] = hc_byte_perm (w[ 8], w[ 7], selector); - w[49] = hc_byte_perm (w[ 7], w[ 6], selector); - w[48] = hc_byte_perm (w[ 6], w[ 5], selector); - w[47] = hc_byte_perm (w[ 5], w[ 4], selector); - w[46] = hc_byte_perm (w[ 4], w[ 3], selector); - w[45] = hc_byte_perm (w[ 3], w[ 2], selector); - w[44] = hc_byte_perm (w[ 2], w[ 1], selector); - w[43] = hc_byte_perm (w[ 1], w[ 0], selector); - w[42] = hc_byte_perm (w[ 0], 0, selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm (w[20], w[19], selector); - w[62] = hc_byte_perm (w[19], w[18], selector); - w[61] = hc_byte_perm (w[18], w[17], selector); - w[60] = hc_byte_perm (w[17], w[16], selector); - w[59] = hc_byte_perm (w[16], w[15], selector); - w[58] = hc_byte_perm (w[15], w[14], selector); - w[57] = hc_byte_perm (w[14], w[13], selector); - w[56] = hc_byte_perm (w[13], w[12], selector); - w[55] = hc_byte_perm (w[12], w[11], selector); - w[54] = hc_byte_perm (w[11], w[10], selector); - w[53] = hc_byte_perm (w[10], w[ 9], selector); - w[52] = hc_byte_perm (w[ 9], w[ 8], selector); - w[51] = hc_byte_perm (w[ 8], w[ 7], selector); - w[50] = hc_byte_perm (w[ 7], w[ 6], selector); - w[49] = hc_byte_perm (w[ 6], w[ 5], selector); - w[48] = hc_byte_perm (w[ 5], w[ 4], selector); - w[47] = hc_byte_perm (w[ 4], w[ 3], selector); - w[46] = hc_byte_perm (w[ 3], w[ 2], selector); - w[45] = hc_byte_perm (w[ 2], w[ 1], selector); - w[44] = hc_byte_perm (w[ 1], w[ 0], selector); - w[43] = hc_byte_perm (w[ 0], 0, selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm (w[19], w[18], selector); - w[62] = hc_byte_perm (w[18], w[17], selector); - w[61] = hc_byte_perm (w[17], w[16], selector); - w[60] = hc_byte_perm (w[16], w[15], selector); - w[59] = hc_byte_perm (w[15], w[14], selector); - w[58] = hc_byte_perm (w[14], w[13], selector); - w[57] = hc_byte_perm (w[13], w[12], selector); - w[56] = hc_byte_perm (w[12], w[11], selector); - w[55] = hc_byte_perm (w[11], w[10], selector); - w[54] = hc_byte_perm (w[10], w[ 9], selector); - w[53] = hc_byte_perm (w[ 9], w[ 8], selector); - w[52] = hc_byte_perm (w[ 8], w[ 7], selector); - w[51] = hc_byte_perm (w[ 7], w[ 6], selector); - w[50] = hc_byte_perm (w[ 6], w[ 5], selector); - w[49] = hc_byte_perm (w[ 5], w[ 4], selector); - w[48] = hc_byte_perm (w[ 4], w[ 3], selector); - w[47] = hc_byte_perm (w[ 3], w[ 2], selector); - w[46] = hc_byte_perm (w[ 2], w[ 1], selector); - w[45] = hc_byte_perm (w[ 1], w[ 0], selector); - w[44] = hc_byte_perm (w[ 0], 0, selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm (w[18], w[17], selector); - w[62] = hc_byte_perm (w[17], w[16], selector); - w[61] = hc_byte_perm (w[16], w[15], selector); - w[60] = hc_byte_perm (w[15], w[14], selector); - w[59] = hc_byte_perm (w[14], w[13], selector); - w[58] = hc_byte_perm (w[13], w[12], selector); - w[57] = hc_byte_perm (w[12], w[11], selector); - w[56] = hc_byte_perm (w[11], w[10], selector); - w[55] = hc_byte_perm (w[10], w[ 9], selector); - w[54] = hc_byte_perm (w[ 9], w[ 8], selector); - w[53] = hc_byte_perm (w[ 8], w[ 7], selector); - w[52] = hc_byte_perm (w[ 7], w[ 6], selector); - w[51] = hc_byte_perm (w[ 6], w[ 5], selector); - w[50] = hc_byte_perm (w[ 5], w[ 4], selector); - w[49] = hc_byte_perm (w[ 4], w[ 3], selector); - w[48] = hc_byte_perm (w[ 3], w[ 2], selector); - w[47] = hc_byte_perm (w[ 2], w[ 1], selector); - w[46] = hc_byte_perm (w[ 1], w[ 0], selector); - w[45] = hc_byte_perm (w[ 0], 0, selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm (w[17], w[16], selector); - w[62] = hc_byte_perm (w[16], w[15], selector); - w[61] = hc_byte_perm (w[15], w[14], selector); - w[60] = hc_byte_perm (w[14], w[13], selector); - w[59] = hc_byte_perm (w[13], w[12], selector); - w[58] = hc_byte_perm (w[12], w[11], selector); - w[57] = hc_byte_perm (w[11], w[10], selector); - w[56] = hc_byte_perm (w[10], w[ 9], selector); - w[55] = hc_byte_perm (w[ 9], w[ 8], selector); - w[54] = hc_byte_perm (w[ 8], w[ 7], selector); - w[53] = hc_byte_perm (w[ 7], w[ 6], selector); - w[52] = hc_byte_perm (w[ 6], w[ 5], selector); - w[51] = hc_byte_perm (w[ 5], w[ 4], selector); - w[50] = hc_byte_perm (w[ 4], w[ 3], selector); - w[49] = hc_byte_perm (w[ 3], w[ 2], selector); - w[48] = hc_byte_perm (w[ 2], w[ 1], selector); - w[47] = hc_byte_perm (w[ 1], w[ 0], selector); - w[46] = hc_byte_perm (w[ 0], 0, selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm (w[16], w[15], selector); - w[62] = hc_byte_perm (w[15], w[14], selector); - w[61] = hc_byte_perm (w[14], w[13], selector); - w[60] = hc_byte_perm (w[13], w[12], selector); - w[59] = hc_byte_perm (w[12], w[11], selector); - w[58] = hc_byte_perm (w[11], w[10], selector); - w[57] = hc_byte_perm (w[10], w[ 9], selector); - w[56] = hc_byte_perm (w[ 9], w[ 8], selector); - w[55] = hc_byte_perm (w[ 8], w[ 7], selector); - w[54] = hc_byte_perm (w[ 7], w[ 6], selector); - w[53] = hc_byte_perm (w[ 6], w[ 5], selector); - w[52] = hc_byte_perm (w[ 5], w[ 4], selector); - w[51] = hc_byte_perm (w[ 4], w[ 3], selector); - w[50] = hc_byte_perm (w[ 3], w[ 2], selector); - w[49] = hc_byte_perm (w[ 2], w[ 1], selector); - w[48] = hc_byte_perm (w[ 1], w[ 0], selector); - w[47] = hc_byte_perm (w[ 0], 0, selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm (w[15], w[14], selector); - w[62] = hc_byte_perm (w[14], w[13], selector); - w[61] = hc_byte_perm (w[13], w[12], selector); - w[60] = hc_byte_perm (w[12], w[11], selector); - w[59] = hc_byte_perm (w[11], w[10], selector); - w[58] = hc_byte_perm (w[10], w[ 9], selector); - w[57] = hc_byte_perm (w[ 9], w[ 8], selector); - w[56] = hc_byte_perm (w[ 8], w[ 7], selector); - w[55] = hc_byte_perm (w[ 7], w[ 6], selector); - w[54] = hc_byte_perm (w[ 6], w[ 5], selector); - w[53] = hc_byte_perm (w[ 5], w[ 4], selector); - w[52] = hc_byte_perm (w[ 4], w[ 3], selector); - w[51] = hc_byte_perm (w[ 3], w[ 2], selector); - w[50] = hc_byte_perm (w[ 2], w[ 1], selector); - w[49] = hc_byte_perm (w[ 1], w[ 0], selector); - w[48] = hc_byte_perm (w[ 0], 0, selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm (w[14], w[13], selector); - w[62] = hc_byte_perm (w[13], w[12], selector); - w[61] = hc_byte_perm (w[12], w[11], selector); - w[60] = hc_byte_perm (w[11], w[10], selector); - w[59] = hc_byte_perm (w[10], w[ 9], selector); - w[58] = hc_byte_perm (w[ 9], w[ 8], selector); - w[57] = hc_byte_perm (w[ 8], w[ 7], selector); - w[56] = hc_byte_perm (w[ 7], w[ 6], selector); - w[55] = hc_byte_perm (w[ 6], w[ 5], selector); - w[54] = hc_byte_perm (w[ 5], w[ 4], selector); - w[53] = hc_byte_perm (w[ 4], w[ 3], selector); - w[52] = hc_byte_perm (w[ 3], w[ 2], selector); - w[51] = hc_byte_perm (w[ 2], w[ 1], selector); - w[50] = hc_byte_perm (w[ 1], w[ 0], selector); - w[49] = hc_byte_perm (w[ 0], 0, selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm (w[13], w[12], selector); - w[62] = hc_byte_perm (w[12], w[11], selector); - w[61] = hc_byte_perm (w[11], w[10], selector); - w[60] = hc_byte_perm (w[10], w[ 9], selector); - w[59] = hc_byte_perm (w[ 9], w[ 8], selector); - w[58] = hc_byte_perm (w[ 8], w[ 7], selector); - w[57] = hc_byte_perm (w[ 7], w[ 6], selector); - w[56] = hc_byte_perm (w[ 6], w[ 5], selector); - w[55] = hc_byte_perm (w[ 5], w[ 4], selector); - w[54] = hc_byte_perm (w[ 4], w[ 3], selector); - w[53] = hc_byte_perm (w[ 3], w[ 2], selector); - w[52] = hc_byte_perm (w[ 2], w[ 1], selector); - w[51] = hc_byte_perm (w[ 1], w[ 0], selector); - w[50] = hc_byte_perm (w[ 0], 0, selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm (w[12], w[11], selector); - w[62] = hc_byte_perm (w[11], w[10], selector); - w[61] = hc_byte_perm (w[10], w[ 9], selector); - w[60] = hc_byte_perm (w[ 9], w[ 8], selector); - w[59] = hc_byte_perm (w[ 8], w[ 7], selector); - w[58] = hc_byte_perm (w[ 7], w[ 6], selector); - w[57] = hc_byte_perm (w[ 6], w[ 5], selector); - w[56] = hc_byte_perm (w[ 5], w[ 4], selector); - w[55] = hc_byte_perm (w[ 4], w[ 3], selector); - w[54] = hc_byte_perm (w[ 3], w[ 2], selector); - w[53] = hc_byte_perm (w[ 2], w[ 1], selector); - w[52] = hc_byte_perm (w[ 1], w[ 0], selector); - w[51] = hc_byte_perm (w[ 0], 0, selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm (w[11], w[10], selector); - w[62] = hc_byte_perm (w[10], w[ 9], selector); - w[61] = hc_byte_perm (w[ 9], w[ 8], selector); - w[60] = hc_byte_perm (w[ 8], w[ 7], selector); - w[59] = hc_byte_perm (w[ 7], w[ 6], selector); - w[58] = hc_byte_perm (w[ 6], w[ 5], selector); - w[57] = hc_byte_perm (w[ 5], w[ 4], selector); - w[56] = hc_byte_perm (w[ 4], w[ 3], selector); - w[55] = hc_byte_perm (w[ 3], w[ 2], selector); - w[54] = hc_byte_perm (w[ 2], w[ 1], selector); - w[53] = hc_byte_perm (w[ 1], w[ 0], selector); - w[52] = hc_byte_perm (w[ 0], 0, selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm (w[10], w[ 9], selector); - w[62] = hc_byte_perm (w[ 9], w[ 8], selector); - w[61] = hc_byte_perm (w[ 8], w[ 7], selector); - w[60] = hc_byte_perm (w[ 7], w[ 6], selector); - w[59] = hc_byte_perm (w[ 6], w[ 5], selector); - w[58] = hc_byte_perm (w[ 5], w[ 4], selector); - w[57] = hc_byte_perm (w[ 4], w[ 3], selector); - w[56] = hc_byte_perm (w[ 3], w[ 2], selector); - w[55] = hc_byte_perm (w[ 2], w[ 1], selector); - w[54] = hc_byte_perm (w[ 1], w[ 0], selector); - w[53] = hc_byte_perm (w[ 0], 0, selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm (w[ 9], w[ 8], selector); - w[62] = hc_byte_perm (w[ 8], w[ 7], selector); - w[61] = hc_byte_perm (w[ 7], w[ 6], selector); - w[60] = hc_byte_perm (w[ 6], w[ 5], selector); - w[59] = hc_byte_perm (w[ 5], w[ 4], selector); - w[58] = hc_byte_perm (w[ 4], w[ 3], selector); - w[57] = hc_byte_perm (w[ 3], w[ 2], selector); - w[56] = hc_byte_perm (w[ 2], w[ 1], selector); - w[55] = hc_byte_perm (w[ 1], w[ 0], selector); - w[54] = hc_byte_perm (w[ 0], 0, selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm (w[ 8], w[ 7], selector); - w[62] = hc_byte_perm (w[ 7], w[ 6], selector); - w[61] = hc_byte_perm (w[ 6], w[ 5], selector); - w[60] = hc_byte_perm (w[ 5], w[ 4], selector); - w[59] = hc_byte_perm (w[ 4], w[ 3], selector); - w[58] = hc_byte_perm (w[ 3], w[ 2], selector); - w[57] = hc_byte_perm (w[ 2], w[ 1], selector); - w[56] = hc_byte_perm (w[ 1], w[ 0], selector); - w[55] = hc_byte_perm (w[ 0], 0, selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm (w[ 7], w[ 6], selector); - w[62] = hc_byte_perm (w[ 6], w[ 5], selector); - w[61] = hc_byte_perm (w[ 5], w[ 4], selector); - w[60] = hc_byte_perm (w[ 4], w[ 3], selector); - w[59] = hc_byte_perm (w[ 3], w[ 2], selector); - w[58] = hc_byte_perm (w[ 2], w[ 1], selector); - w[57] = hc_byte_perm (w[ 1], w[ 0], selector); - w[56] = hc_byte_perm (w[ 0], 0, selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm (w[ 6], w[ 5], selector); - w[62] = hc_byte_perm (w[ 5], w[ 4], selector); - w[61] = hc_byte_perm (w[ 4], w[ 3], selector); - w[60] = hc_byte_perm (w[ 3], w[ 2], selector); - w[59] = hc_byte_perm (w[ 2], w[ 1], selector); - w[58] = hc_byte_perm (w[ 1], w[ 0], selector); - w[57] = hc_byte_perm (w[ 0], 0, selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm (w[ 5], w[ 4], selector); - w[62] = hc_byte_perm (w[ 4], w[ 3], selector); - w[61] = hc_byte_perm (w[ 3], w[ 2], selector); - w[60] = hc_byte_perm (w[ 2], w[ 1], selector); - w[59] = hc_byte_perm (w[ 1], w[ 0], selector); - w[58] = hc_byte_perm (w[ 0], 0, selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm (w[ 4], w[ 3], selector); - w[62] = hc_byte_perm (w[ 3], w[ 2], selector); - w[61] = hc_byte_perm (w[ 2], w[ 1], selector); - w[60] = hc_byte_perm (w[ 1], w[ 0], selector); - w[59] = hc_byte_perm (w[ 0], 0, selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm (w[ 3], w[ 2], selector); - w[62] = hc_byte_perm (w[ 2], w[ 1], selector); - w[61] = hc_byte_perm (w[ 1], w[ 0], selector); - w[60] = hc_byte_perm (w[ 0], 0, selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm (w[ 2], w[ 1], selector); - w[62] = hc_byte_perm (w[ 1], w[ 0], selector); - w[61] = hc_byte_perm (w[ 0], 0, selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm (w[ 1], w[ 0], selector); - w[62] = hc_byte_perm (w[ 0], 0, selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm (w[ 0], 0, selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } /** @@ -36899,7 +21520,7 @@ DECLSPEC void make_utf16be_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PR out1[1] = hc_byte_perm_S (in[0], 0, 0x3727); out1[0] = hc_byte_perm_S (in[0], 0, 0x1707); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x03070207); out2[2] = hc_byte_perm_S (in[3], 0, 0x01070007); @@ -36937,7 +21558,7 @@ DECLSPEC void make_utf16beN_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, P out1[1] = hc_byte_perm_S (in[0], 0, 0x1707); out1[0] = hc_byte_perm_S (in[0], 0, 0x3727); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x01070007); out2[2] = hc_byte_perm_S (in[3], 0, 0x03070207); @@ -36975,7 +21596,7 @@ DECLSPEC void make_utf16le_S (PRIVATE_AS const u32 *in, PRIVATE_AS u32 *out1, PR out1[1] = hc_byte_perm_S (in[0], 0, 0x7372); out1[0] = hc_byte_perm_S (in[0], 0, 0x7170); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out2[3] = hc_byte_perm_S (in[3], 0, 0x07030702); out2[2] = hc_byte_perm_S (in[3], 0, 0x07010700); @@ -37009,7 +21630,7 @@ DECLSPEC void undo_utf16be_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *i out[2] = hc_byte_perm_S (in2[0], in2[1], 0x4602); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x4602); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm_S (in1[0], in1[1], 0x04060002); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x04060002); @@ -37039,7 +21660,7 @@ DECLSPEC void undo_utf16le_S (PRIVATE_AS const u32 *in1, PRIVATE_AS const u32 *i out[2] = hc_byte_perm_S (in2[0], in2[1], 0x6420); out[3] = hc_byte_perm_S (in2[2], in2[3], 0x6420); - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif defined IS_AMD || defined IS_HIP out[0] = hc_byte_perm_S (in1[0], in1[1], 0x06040200); out[1] = hc_byte_perm_S (in1[2], in1[3], 0x06040200); @@ -37064,7 +21685,6 @@ DECLSPEC void switch_buffer_by_offset_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -37387,352 +22007,12 @@ DECLSPEC void switch_buffer_by_offset_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - - break; - - case 1: - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; - #if (defined IS_AMD || defined IS_HIP) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38191,476 +22471,12 @@ DECLSPEC void switch_buffer_by_offset_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS break; } - #endif - - #ifdef IS_NV - // could be improved, too - switch (offset_switch) - { - case 0: - c0[0] = hc_bytealign_S (w3[3], 0, offset); - w3[3] = hc_bytealign_S (w3[2], w3[3], offset); - w3[2] = hc_bytealign_S (w3[1], w3[2], offset); - w3[1] = hc_bytealign_S (w3[0], w3[1], offset); - w3[0] = hc_bytealign_S (w2[3], w3[0], offset); - w2[3] = hc_bytealign_S (w2[2], w2[3], offset); - w2[2] = hc_bytealign_S (w2[1], w2[2], offset); - w2[1] = hc_bytealign_S (w2[0], w2[1], offset); - w2[0] = hc_bytealign_S (w1[3], w2[0], offset); - w1[3] = hc_bytealign_S (w1[2], w1[3], offset); - w1[2] = hc_bytealign_S (w1[1], w1[2], offset); - w1[1] = hc_bytealign_S (w1[0], w1[1], offset); - w1[0] = hc_bytealign_S (w0[3], w1[0], offset); - w0[3] = hc_bytealign_S (w0[2], w0[3], offset); - w0[2] = hc_bytealign_S (w0[1], w0[2], offset); - w0[1] = hc_bytealign_S (w0[0], w0[1], offset); - w0[0] = hc_bytealign_S ( 0, w0[0], offset); - - break; - - case 1: - c0[1] = hc_bytealign_S (w3[3], 0, offset); - c0[0] = hc_bytealign_S (w3[2], w3[3], offset); - w3[3] = hc_bytealign_S (w3[1], w3[2], offset); - w3[2] = hc_bytealign_S (w3[0], w3[1], offset); - w3[1] = hc_bytealign_S (w2[3], w3[0], offset); - w3[0] = hc_bytealign_S (w2[2], w2[3], offset); - w2[3] = hc_bytealign_S (w2[1], w2[2], offset); - w2[2] = hc_bytealign_S (w2[0], w2[1], offset); - w2[1] = hc_bytealign_S (w1[3], w2[0], offset); - w2[0] = hc_bytealign_S (w1[2], w1[3], offset); - w1[3] = hc_bytealign_S (w1[1], w1[2], offset); - w1[2] = hc_bytealign_S (w1[0], w1[1], offset); - w1[1] = hc_bytealign_S (w0[3], w1[0], offset); - w1[0] = hc_bytealign_S (w0[2], w0[3], offset); - w0[3] = hc_bytealign_S (w0[1], w0[2], offset); - w0[2] = hc_bytealign_S (w0[0], w0[1], offset); - w0[1] = hc_bytealign_S ( 0, w0[0], offset); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_bytealign_S (w3[3], 0, offset); - c0[1] = hc_bytealign_S (w3[2], w3[3], offset); - c0[0] = hc_bytealign_S (w3[1], w3[2], offset); - w3[3] = hc_bytealign_S (w3[0], w3[1], offset); - w3[2] = hc_bytealign_S (w2[3], w3[0], offset); - w3[1] = hc_bytealign_S (w2[2], w2[3], offset); - w3[0] = hc_bytealign_S (w2[1], w2[2], offset); - w2[3] = hc_bytealign_S (w2[0], w2[1], offset); - w2[2] = hc_bytealign_S (w1[3], w2[0], offset); - w2[1] = hc_bytealign_S (w1[2], w1[3], offset); - w2[0] = hc_bytealign_S (w1[1], w1[2], offset); - w1[3] = hc_bytealign_S (w1[0], w1[1], offset); - w1[2] = hc_bytealign_S (w0[3], w1[0], offset); - w1[1] = hc_bytealign_S (w0[2], w0[3], offset); - w1[0] = hc_bytealign_S (w0[1], w0[2], offset); - w0[3] = hc_bytealign_S (w0[0], w0[1], offset); - w0[2] = hc_bytealign_S ( 0, w0[0], offset); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_bytealign_S (w3[3], 0, offset); - c0[2] = hc_bytealign_S (w3[2], w3[3], offset); - c0[1] = hc_bytealign_S (w3[1], w3[2], offset); - c0[0] = hc_bytealign_S (w3[0], w3[1], offset); - w3[3] = hc_bytealign_S (w2[3], w3[0], offset); - w3[2] = hc_bytealign_S (w2[2], w2[3], offset); - w3[1] = hc_bytealign_S (w2[1], w2[2], offset); - w3[0] = hc_bytealign_S (w2[0], w2[1], offset); - w2[3] = hc_bytealign_S (w1[3], w2[0], offset); - w2[2] = hc_bytealign_S (w1[2], w1[3], offset); - w2[1] = hc_bytealign_S (w1[1], w1[2], offset); - w2[0] = hc_bytealign_S (w1[0], w1[1], offset); - w1[3] = hc_bytealign_S (w0[3], w1[0], offset); - w1[2] = hc_bytealign_S (w0[2], w0[3], offset); - w1[1] = hc_bytealign_S (w0[1], w0[2], offset); - w1[0] = hc_bytealign_S (w0[0], w0[1], offset); - w0[3] = hc_bytealign_S ( 0, w0[0], offset); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_bytealign_S (w3[3], 0, offset); - c0[3] = hc_bytealign_S (w3[2], w3[3], offset); - c0[2] = hc_bytealign_S (w3[1], w3[2], offset); - c0[1] = hc_bytealign_S (w3[0], w3[1], offset); - c0[0] = hc_bytealign_S (w2[3], w3[0], offset); - w3[3] = hc_bytealign_S (w2[2], w2[3], offset); - w3[2] = hc_bytealign_S (w2[1], w2[2], offset); - w3[1] = hc_bytealign_S (w2[0], w2[1], offset); - w3[0] = hc_bytealign_S (w1[3], w2[0], offset); - w2[3] = hc_bytealign_S (w1[2], w1[3], offset); - w2[2] = hc_bytealign_S (w1[1], w1[2], offset); - w2[1] = hc_bytealign_S (w1[0], w1[1], offset); - w2[0] = hc_bytealign_S (w0[3], w1[0], offset); - w1[3] = hc_bytealign_S (w0[2], w0[3], offset); - w1[2] = hc_bytealign_S (w0[1], w0[2], offset); - w1[1] = hc_bytealign_S (w0[0], w0[1], offset); - w1[0] = hc_bytealign_S ( 0, w0[0], offset); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_bytealign_S (w3[3], 0, offset); - c1[0] = hc_bytealign_S (w3[2], w3[3], offset); - c0[3] = hc_bytealign_S (w3[1], w3[2], offset); - c0[2] = hc_bytealign_S (w3[0], w3[1], offset); - c0[1] = hc_bytealign_S (w2[3], w3[0], offset); - c0[0] = hc_bytealign_S (w2[2], w2[3], offset); - w3[3] = hc_bytealign_S (w2[1], w2[2], offset); - w3[2] = hc_bytealign_S (w2[0], w2[1], offset); - w3[1] = hc_bytealign_S (w1[3], w2[0], offset); - w3[0] = hc_bytealign_S (w1[2], w1[3], offset); - w2[3] = hc_bytealign_S (w1[1], w1[2], offset); - w2[2] = hc_bytealign_S (w1[0], w1[1], offset); - w2[1] = hc_bytealign_S (w0[3], w1[0], offset); - w2[0] = hc_bytealign_S (w0[2], w0[3], offset); - w1[3] = hc_bytealign_S (w0[1], w0[2], offset); - w1[2] = hc_bytealign_S (w0[0], w0[1], offset); - w1[1] = hc_bytealign_S ( 0, w0[0], offset); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_bytealign_S (w3[3], 0, offset); - c1[1] = hc_bytealign_S (w3[2], w3[3], offset); - c1[0] = hc_bytealign_S (w3[1], w3[2], offset); - c0[3] = hc_bytealign_S (w3[0], w3[1], offset); - c0[2] = hc_bytealign_S (w2[3], w3[0], offset); - c0[1] = hc_bytealign_S (w2[2], w2[3], offset); - c0[0] = hc_bytealign_S (w2[1], w2[2], offset); - w3[3] = hc_bytealign_S (w2[0], w2[1], offset); - w3[2] = hc_bytealign_S (w1[3], w2[0], offset); - w3[1] = hc_bytealign_S (w1[2], w1[3], offset); - w3[0] = hc_bytealign_S (w1[1], w1[2], offset); - w2[3] = hc_bytealign_S (w1[0], w1[1], offset); - w2[2] = hc_bytealign_S (w0[3], w1[0], offset); - w2[1] = hc_bytealign_S (w0[2], w0[3], offset); - w2[0] = hc_bytealign_S (w0[1], w0[2], offset); - w1[3] = hc_bytealign_S (w0[0], w0[1], offset); - w1[2] = hc_bytealign_S ( 0, w0[0], offset); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_bytealign_S (w3[3], 0, offset); - c1[2] = hc_bytealign_S (w3[2], w3[3], offset); - c1[1] = hc_bytealign_S (w3[1], w3[2], offset); - c1[0] = hc_bytealign_S (w3[0], w3[1], offset); - c0[3] = hc_bytealign_S (w2[3], w3[0], offset); - c0[2] = hc_bytealign_S (w2[2], w2[3], offset); - c0[1] = hc_bytealign_S (w2[1], w2[2], offset); - c0[0] = hc_bytealign_S (w2[0], w2[1], offset); - w3[3] = hc_bytealign_S (w1[3], w2[0], offset); - w3[2] = hc_bytealign_S (w1[2], w1[3], offset); - w3[1] = hc_bytealign_S (w1[1], w1[2], offset); - w3[0] = hc_bytealign_S (w1[0], w1[1], offset); - w2[3] = hc_bytealign_S (w0[3], w1[0], offset); - w2[2] = hc_bytealign_S (w0[2], w0[3], offset); - w2[1] = hc_bytealign_S (w0[1], w0[2], offset); - w2[0] = hc_bytealign_S (w0[0], w0[1], offset); - w1[3] = hc_bytealign_S ( 0, w0[0], offset); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_bytealign_S (w3[3], 0, offset); - c1[3] = hc_bytealign_S (w3[2], w3[3], offset); - c1[2] = hc_bytealign_S (w3[1], w3[2], offset); - c1[1] = hc_bytealign_S (w3[0], w3[1], offset); - c1[0] = hc_bytealign_S (w2[3], w3[0], offset); - c0[3] = hc_bytealign_S (w2[2], w2[3], offset); - c0[2] = hc_bytealign_S (w2[1], w2[2], offset); - c0[1] = hc_bytealign_S (w2[0], w2[1], offset); - c0[0] = hc_bytealign_S (w1[3], w2[0], offset); - w3[3] = hc_bytealign_S (w1[2], w1[3], offset); - w3[2] = hc_bytealign_S (w1[1], w1[2], offset); - w3[1] = hc_bytealign_S (w1[0], w1[1], offset); - w3[0] = hc_bytealign_S (w0[3], w1[0], offset); - w2[3] = hc_bytealign_S (w0[2], w0[3], offset); - w2[2] = hc_bytealign_S (w0[1], w0[2], offset); - w2[1] = hc_bytealign_S (w0[0], w0[1], offset); - w2[0] = hc_bytealign_S ( 0, w0[0], offset); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_bytealign_S (w3[3], 0, offset); - c2[0] = hc_bytealign_S (w3[2], w3[3], offset); - c1[3] = hc_bytealign_S (w3[1], w3[2], offset); - c1[2] = hc_bytealign_S (w3[0], w3[1], offset); - c1[1] = hc_bytealign_S (w2[3], w3[0], offset); - c1[0] = hc_bytealign_S (w2[2], w2[3], offset); - c0[3] = hc_bytealign_S (w2[1], w2[2], offset); - c0[2] = hc_bytealign_S (w2[0], w2[1], offset); - c0[1] = hc_bytealign_S (w1[3], w2[0], offset); - c0[0] = hc_bytealign_S (w1[2], w1[3], offset); - w3[3] = hc_bytealign_S (w1[1], w1[2], offset); - w3[2] = hc_bytealign_S (w1[0], w1[1], offset); - w3[1] = hc_bytealign_S (w0[3], w1[0], offset); - w3[0] = hc_bytealign_S (w0[2], w0[3], offset); - w2[3] = hc_bytealign_S (w0[1], w0[2], offset); - w2[2] = hc_bytealign_S (w0[0], w0[1], offset); - w2[1] = hc_bytealign_S ( 0, w0[0], offset); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_bytealign_S (w3[3], 0, offset); - c2[1] = hc_bytealign_S (w3[2], w3[3], offset); - c2[0] = hc_bytealign_S (w3[1], w3[2], offset); - c1[3] = hc_bytealign_S (w3[0], w3[1], offset); - c1[2] = hc_bytealign_S (w2[3], w3[0], offset); - c1[1] = hc_bytealign_S (w2[2], w2[3], offset); - c1[0] = hc_bytealign_S (w2[1], w2[2], offset); - c0[3] = hc_bytealign_S (w2[0], w2[1], offset); - c0[2] = hc_bytealign_S (w1[3], w2[0], offset); - c0[1] = hc_bytealign_S (w1[2], w1[3], offset); - c0[0] = hc_bytealign_S (w1[1], w1[2], offset); - w3[3] = hc_bytealign_S (w1[0], w1[1], offset); - w3[2] = hc_bytealign_S (w0[3], w1[0], offset); - w3[1] = hc_bytealign_S (w0[2], w0[3], offset); - w3[0] = hc_bytealign_S (w0[1], w0[2], offset); - w2[3] = hc_bytealign_S (w0[0], w0[1], offset); - w2[2] = hc_bytealign_S ( 0, w0[0], offset); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_bytealign_S (w3[3], 0, offset); - c2[2] = hc_bytealign_S (w3[2], w3[3], offset); - c2[1] = hc_bytealign_S (w3[1], w3[2], offset); - c2[0] = hc_bytealign_S (w3[0], w3[1], offset); - c1[3] = hc_bytealign_S (w2[3], w3[0], offset); - c1[2] = hc_bytealign_S (w2[2], w2[3], offset); - c1[1] = hc_bytealign_S (w2[1], w2[2], offset); - c1[0] = hc_bytealign_S (w2[0], w2[1], offset); - c0[3] = hc_bytealign_S (w1[3], w2[0], offset); - c0[2] = hc_bytealign_S (w1[2], w1[3], offset); - c0[1] = hc_bytealign_S (w1[1], w1[2], offset); - c0[0] = hc_bytealign_S (w1[0], w1[1], offset); - w3[3] = hc_bytealign_S (w0[3], w1[0], offset); - w3[2] = hc_bytealign_S (w0[2], w0[3], offset); - w3[1] = hc_bytealign_S (w0[1], w0[2], offset); - w3[0] = hc_bytealign_S (w0[0], w0[1], offset); - w2[3] = hc_bytealign_S ( 0, w0[0], offset); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_bytealign_S (w3[3], 0, offset); - c2[3] = hc_bytealign_S (w3[2], w3[3], offset); - c2[2] = hc_bytealign_S (w3[1], w3[2], offset); - c2[1] = hc_bytealign_S (w3[0], w3[1], offset); - c2[0] = hc_bytealign_S (w2[3], w3[0], offset); - c1[3] = hc_bytealign_S (w2[2], w2[3], offset); - c1[2] = hc_bytealign_S (w2[1], w2[2], offset); - c1[1] = hc_bytealign_S (w2[0], w2[1], offset); - c1[0] = hc_bytealign_S (w1[3], w2[0], offset); - c0[3] = hc_bytealign_S (w1[2], w1[3], offset); - c0[2] = hc_bytealign_S (w1[1], w1[2], offset); - c0[1] = hc_bytealign_S (w1[0], w1[1], offset); - c0[0] = hc_bytealign_S (w0[3], w1[0], offset); - w3[3] = hc_bytealign_S (w0[2], w0[3], offset); - w3[2] = hc_bytealign_S (w0[1], w0[2], offset); - w3[1] = hc_bytealign_S (w0[0], w0[1], offset); - w3[0] = hc_bytealign_S ( 0, w0[0], offset); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_bytealign_S (w3[3], 0, offset); - c3[0] = hc_bytealign_S (w3[2], w3[3], offset); - c2[3] = hc_bytealign_S (w3[1], w3[2], offset); - c2[2] = hc_bytealign_S (w3[0], w3[1], offset); - c2[1] = hc_bytealign_S (w2[3], w3[0], offset); - c2[0] = hc_bytealign_S (w2[2], w2[3], offset); - c1[3] = hc_bytealign_S (w2[1], w2[2], offset); - c1[2] = hc_bytealign_S (w2[0], w2[1], offset); - c1[1] = hc_bytealign_S (w1[3], w2[0], offset); - c1[0] = hc_bytealign_S (w1[2], w1[3], offset); - c0[3] = hc_bytealign_S (w1[1], w1[2], offset); - c0[2] = hc_bytealign_S (w1[0], w1[1], offset); - c0[1] = hc_bytealign_S (w0[3], w1[0], offset); - c0[0] = hc_bytealign_S (w0[2], w0[3], offset); - w3[3] = hc_bytealign_S (w0[1], w0[2], offset); - w3[2] = hc_bytealign_S (w0[0], w0[1], offset); - w3[1] = hc_bytealign_S ( 0, w0[0], offset); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_bytealign_S (w3[3], 0, offset); - c3[1] = hc_bytealign_S (w3[2], w3[3], offset); - c3[0] = hc_bytealign_S (w3[1], w3[2], offset); - c2[3] = hc_bytealign_S (w3[0], w3[1], offset); - c2[2] = hc_bytealign_S (w2[3], w3[0], offset); - c2[1] = hc_bytealign_S (w2[2], w2[3], offset); - c2[0] = hc_bytealign_S (w2[1], w2[2], offset); - c1[3] = hc_bytealign_S (w2[0], w2[1], offset); - c1[2] = hc_bytealign_S (w1[3], w2[0], offset); - c1[1] = hc_bytealign_S (w1[2], w1[3], offset); - c1[0] = hc_bytealign_S (w1[1], w1[2], offset); - c0[3] = hc_bytealign_S (w1[0], w1[1], offset); - c0[2] = hc_bytealign_S (w0[3], w1[0], offset); - c0[1] = hc_bytealign_S (w0[2], w0[3], offset); - c0[0] = hc_bytealign_S (w0[1], w0[2], offset); - w3[3] = hc_bytealign_S (w0[0], w0[1], offset); - w3[2] = hc_bytealign_S ( 0, w0[0], offset); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_bytealign_S (w3[3], 0, offset); - c3[2] = hc_bytealign_S (w3[2], w3[3], offset); - c3[1] = hc_bytealign_S (w3[1], w3[2], offset); - c3[0] = hc_bytealign_S (w3[0], w3[1], offset); - c2[3] = hc_bytealign_S (w2[3], w3[0], offset); - c2[2] = hc_bytealign_S (w2[2], w2[3], offset); - c2[1] = hc_bytealign_S (w2[1], w2[2], offset); - c2[0] = hc_bytealign_S (w2[0], w2[1], offset); - c1[3] = hc_bytealign_S (w1[3], w2[0], offset); - c1[2] = hc_bytealign_S (w1[2], w1[3], offset); - c1[1] = hc_bytealign_S (w1[1], w1[2], offset); - c1[0] = hc_bytealign_S (w1[0], w1[1], offset); - c0[3] = hc_bytealign_S (w0[3], w1[0], offset); - c0[2] = hc_bytealign_S (w0[2], w0[3], offset); - c0[1] = hc_bytealign_S (w0[1], w0[2], offset); - c0[0] = hc_bytealign_S (w0[0], w0[1], offset); - w3[3] = hc_bytealign_S ( 0, w0[0], offset); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -38983,348 +22799,12 @@ DECLSPEC void switch_buffer_by_offset_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 * break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -39783,484 +23263,12 @@ DECLSPEC void switch_buffer_by_offset_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S ( 0, w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S ( 0, w3[3], selector); - c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S ( 0, w3[3], selector); - c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S ( 0, w3[3], selector); - c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S ( 0, w3[3], selector); - c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S ( 0, w3[3], selector); - c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S ( 0, w3[3], selector); - c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S ( 0, w3[3], selector); - c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S ( 0, w3[3], selector); - c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S ( 0, w3[3], selector); - c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S ( 0, w3[3], selector); - c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S ( 0, w3[3], selector); - c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S ( 0, w3[3], selector); - c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S ( 0, w3[3], selector); - c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S ( 0, w3[3], selector); - c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S ( 0, w3[3], selector); - c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -41415,1167 +24423,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - break; - - case 1: - w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - break; - - case 2: - w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 4: - w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 5: - w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 6: - w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 7: - w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 8: - w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 9: - w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 10: - w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 11: - w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 12: - w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 13: - w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 14: - w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 15: - w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - case 16: - w7[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[0] = hc_byte_perm_S ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[1] = hc_byte_perm_S ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[2] = hc_byte_perm_S ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[3] = hc_byte_perm_S ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[0] = hc_byte_perm_S ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[1] = hc_byte_perm_S ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[2] = hc_byte_perm_S ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[3] = hc_byte_perm_S ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[0] = hc_byte_perm_S ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[1] = hc_byte_perm_S ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[2] = hc_byte_perm_S ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[3] = hc_byte_perm_S ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[0] = hc_byte_perm_S ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[1] = hc_byte_perm_S ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[2] = hc_byte_perm_S ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm_S ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -44258,1712 +26111,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_le_S (PRIVATE_AS u32 *w0, PRIVAT break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S (w7[3], 0, selector); - w7[3] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[1] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[0] = hc_byte_perm_S ( 0, w0[0], selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S (w7[3], 0, selector); - c0[0] = hc_byte_perm_S (w7[2], w7[3], selector); - w7[3] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[1] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[1] = hc_byte_perm_S ( 0, w0[0], selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S (w7[3], 0, selector); - c0[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[0] = hc_byte_perm_S (w7[1], w7[2], selector); - w7[3] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[2] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[2] = hc_byte_perm_S ( 0, w0[0], selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S (w7[3], 0, selector); - c0[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[0] = hc_byte_perm_S (w7[0], w7[1], selector); - w7[3] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w6[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w5[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w4[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w3[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w2[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w1[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w0[3] = hc_byte_perm_S ( 0, w0[0], selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S (w7[3], 0, selector); - c0[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[0] = hc_byte_perm_S (w6[3], w7[0], selector); - w7[3] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[0] = hc_byte_perm_S ( 0, w0[0], selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S (w7[3], 0, selector); - c1[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c0[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[0] = hc_byte_perm_S (w6[2], w6[3], selector); - w7[3] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[1] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[1] = hc_byte_perm_S ( 0, w0[0], selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S (w7[3], 0, selector); - c1[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c0[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[0] = hc_byte_perm_S (w6[1], w6[2], selector); - w7[3] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[2] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[2] = hc_byte_perm_S ( 0, w0[0], selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S (w7[3], 0, selector); - c1[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c0[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[0] = hc_byte_perm_S (w6[0], w6[1], selector); - w7[3] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w6[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w5[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w4[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w3[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w2[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w1[3] = hc_byte_perm_S ( 0, w0[0], selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S (w7[3], 0, selector); - c1[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c0[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[0] = hc_byte_perm_S (w5[3], w6[0], selector); - w7[3] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[0] = hc_byte_perm_S ( 0, w0[0], selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S (w7[3], 0, selector); - c2[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c1[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c0[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[0] = hc_byte_perm_S (w5[2], w5[3], selector); - w7[3] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[1] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[1] = hc_byte_perm_S ( 0, w0[0], selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S (w7[3], 0, selector); - c2[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c1[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c0[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[0] = hc_byte_perm_S (w5[1], w5[2], selector); - w7[3] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[2] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[2] = hc_byte_perm_S ( 0, w0[0], selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S (w7[3], 0, selector); - c2[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c1[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c0[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[0] = hc_byte_perm_S (w5[0], w5[1], selector); - w7[3] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w6[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w5[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w4[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w3[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w2[3] = hc_byte_perm_S ( 0, w0[0], selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S (w7[3], 0, selector); - c2[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c1[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c0[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[0] = hc_byte_perm_S (w4[3], w5[0], selector); - w7[3] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[0] = hc_byte_perm_S ( 0, w0[0], selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S (w7[3], 0, selector); - c3[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c2[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c1[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c0[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[0] = hc_byte_perm_S (w4[2], w4[3], selector); - w7[3] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[1] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[1] = hc_byte_perm_S ( 0, w0[0], selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S (w7[3], 0, selector); - c3[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c2[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c1[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c0[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[0] = hc_byte_perm_S (w4[1], w4[2], selector); - w7[3] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[2] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[2] = hc_byte_perm_S ( 0, w0[0], selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S (w7[3], 0, selector); - c3[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c2[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c1[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c0[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[0] = hc_byte_perm_S (w4[0], w4[1], selector); - w7[3] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w6[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w5[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w4[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w3[3] = hc_byte_perm_S ( 0, w0[0], selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm_S (w7[3], 0, selector); - c3[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c2[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c1[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c0[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[0] = hc_byte_perm_S (w3[3], w4[0], selector); - w7[3] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[0] = hc_byte_perm_S ( 0, w0[0], selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm_S (w7[3], 0, selector); - c4[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c3[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c2[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c1[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c0[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[3], selector); - w7[3] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[1] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[1] = hc_byte_perm_S ( 0, w0[0], selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm_S (w7[3], 0, selector); - c4[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c3[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c2[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c1[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c0[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[2], selector); - w7[3] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[2] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[2] = hc_byte_perm_S ( 0, w0[0], selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm_S (w7[3], 0, selector); - c4[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c3[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c2[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c1[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c0[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[0], w3[1], selector); - w7[3] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w6[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w5[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w4[3] = hc_byte_perm_S ( 0, w0[0], selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm_S (w7[3], 0, selector); - c4[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c3[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c2[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c1[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[0] = hc_byte_perm_S (w2[3], w3[0], selector); - w7[3] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[0] = hc_byte_perm_S ( 0, w0[0], selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm_S (w7[3], 0, selector); - c5[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c4[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c3[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c2[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c1[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[3], selector); - w7[3] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[1] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[1] = hc_byte_perm_S ( 0, w0[0], selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm_S (w7[3], 0, selector); - c5[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c4[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c3[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c2[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c1[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[2], selector); - w7[3] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[2] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[2] = hc_byte_perm_S ( 0, w0[0], selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm_S (w7[3], 0, selector); - c5[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c4[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c3[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c2[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c1[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c0[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[0], w2[1], selector); - w7[3] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w6[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w5[3] = hc_byte_perm_S ( 0, w0[0], selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm_S (w7[3], 0, selector); - c5[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c4[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c3[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c2[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[0] = hc_byte_perm_S (w2[3], w3[0], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[0] = hc_byte_perm_S (w1[3], w2[0], selector); - w7[3] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[0] = hc_byte_perm_S ( 0, w0[0], selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm_S (w7[3], 0, selector); - c6[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c5[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c4[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c3[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c2[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[1] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[3], selector); - w7[3] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[1] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[1] = hc_byte_perm_S ( 0, w0[0], selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm_S (w7[3], 0, selector); - c6[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c5[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c4[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c3[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c2[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[2] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[2], selector); - w7[3] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[2] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[2] = hc_byte_perm_S ( 0, w0[0], selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm_S (w7[3], 0, selector); - c6[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c5[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c4[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c3[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c2[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c1[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[0], w2[1], selector); - c0[3] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[0], w1[1], selector); - w7[3] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w6[3] = hc_byte_perm_S ( 0, w0[0], selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm_S (w7[3], 0, selector); - c6[3] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[2] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[1] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[0] = hc_byte_perm_S (w6[3], w7[0], selector); - c5[3] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[2] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[1] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[0] = hc_byte_perm_S (w5[3], w6[0], selector); - c4[3] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[2] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[1] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[0] = hc_byte_perm_S (w4[3], w5[0], selector); - c3[3] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[2] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[1] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[0] = hc_byte_perm_S (w3[3], w4[0], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[0] = hc_byte_perm_S (w2[3], w3[0], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[0] = hc_byte_perm_S (w1[3], w2[0], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[0] = hc_byte_perm_S (w0[3], w1[0], selector); - w7[3] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[0] = hc_byte_perm_S ( 0, w0[0], selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm_S (w7[3], 0, selector); - c7[0] = hc_byte_perm_S (w7[2], w7[3], selector); - c6[3] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[2] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[1] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[0] = hc_byte_perm_S (w6[2], w6[3], selector); - c5[3] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[2] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[1] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[0] = hc_byte_perm_S (w5[2], w5[3], selector); - c4[3] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[2] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[1] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[0] = hc_byte_perm_S (w4[2], w4[3], selector); - c3[3] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[2] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[1] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[3], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[1] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[1] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[1] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[3], selector); - w7[3] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[1] = hc_byte_perm_S ( 0, w0[0], selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm_S (w7[3], 0, selector); - c7[1] = hc_byte_perm_S (w7[2], w7[3], selector); - c7[0] = hc_byte_perm_S (w7[1], w7[2], selector); - c6[3] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[2] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[1] = hc_byte_perm_S (w6[2], w6[3], selector); - c6[0] = hc_byte_perm_S (w6[1], w6[2], selector); - c5[3] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[2] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[1] = hc_byte_perm_S (w5[2], w5[3], selector); - c5[0] = hc_byte_perm_S (w5[1], w5[2], selector); - c4[3] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[2] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[1] = hc_byte_perm_S (w4[2], w4[3], selector); - c4[0] = hc_byte_perm_S (w4[1], w4[2], selector); - c3[3] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[2] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[3], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[2] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[2] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[2] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[2], selector); - w7[3] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[2] = hc_byte_perm_S ( 0, w0[0], selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm_S (w7[3], 0, selector); - c7[2] = hc_byte_perm_S (w7[2], w7[3], selector); - c7[1] = hc_byte_perm_S (w7[1], w7[2], selector); - c7[0] = hc_byte_perm_S (w7[0], w7[1], selector); - c6[3] = hc_byte_perm_S (w6[3], w7[0], selector); - c6[2] = hc_byte_perm_S (w6[2], w6[3], selector); - c6[1] = hc_byte_perm_S (w6[1], w6[2], selector); - c6[0] = hc_byte_perm_S (w6[0], w6[1], selector); - c5[3] = hc_byte_perm_S (w5[3], w6[0], selector); - c5[2] = hc_byte_perm_S (w5[2], w5[3], selector); - c5[1] = hc_byte_perm_S (w5[1], w5[2], selector); - c5[0] = hc_byte_perm_S (w5[0], w5[1], selector); - c4[3] = hc_byte_perm_S (w4[3], w5[0], selector); - c4[2] = hc_byte_perm_S (w4[2], w4[3], selector); - c4[1] = hc_byte_perm_S (w4[1], w4[2], selector); - c4[0] = hc_byte_perm_S (w4[0], w4[1], selector); - c3[3] = hc_byte_perm_S (w3[3], w4[0], selector); - c3[2] = hc_byte_perm_S (w3[2], w3[3], selector); - c3[1] = hc_byte_perm_S (w3[1], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[0], w3[1], selector); - c2[3] = hc_byte_perm_S (w2[3], w3[0], selector); - c2[2] = hc_byte_perm_S (w2[2], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[1], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[0], w2[1], selector); - c1[3] = hc_byte_perm_S (w1[3], w2[0], selector); - c1[2] = hc_byte_perm_S (w1[2], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[1], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[0], w1[1], selector); - c0[3] = hc_byte_perm_S (w0[3], w1[0], selector); - c0[2] = hc_byte_perm_S (w0[2], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[1], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[0], w0[1], selector); - w7[3] = hc_byte_perm_S ( 0, w0[0], selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -47118,1180 +27271,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[0] = hc_byte_perm_S (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[1] = hc_byte_perm_S (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[2] = hc_byte_perm_S (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[3] = hc_byte_perm_S (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[0] = hc_byte_perm_S (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[1] = hc_byte_perm_S (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[2] = hc_byte_perm_S (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[3] = hc_byte_perm_S (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[0] = hc_byte_perm_S (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[1] = hc_byte_perm_S (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[2] = hc_byte_perm_S (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[3] = hc_byte_perm_S (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[0] = hc_byte_perm_S (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[1] = hc_byte_perm_S (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[2] = hc_byte_perm_S (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - w7[3] = hc_byte_perm_S (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, PRIVATE_AS u32 *w4, PRIVATE_AS u32 *w5, PRIVATE_AS u32 *w6, PRIVATE_AS u32 *w7, PRIVATE_AS u32 *c0, PRIVATE_AS u32 *c1, PRIVATE_AS u32 *c2, PRIVATE_AS u32 *c3, PRIVATE_AS u32 *c4, PRIVATE_AS u32 *c5, PRIVATE_AS u32 *c6, PRIVATE_AS u32 *c7, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -49974,1708 +28959,12 @@ DECLSPEC void switch_buffer_by_offset_8x4_carry_be_S (PRIVATE_AS u32 *w0, PRIVAT break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - c0[0] = hc_byte_perm_S ( 0, w7[3], selector); - w7[3] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[2] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[1] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w6[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w5[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w4[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w3[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w2[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w1[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - - break; - - case 1: - c0[1] = hc_byte_perm_S ( 0, w7[3], selector); - c0[0] = hc_byte_perm_S (w7[3], w7[2], selector); - w7[3] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[2] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[1] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w6[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w5[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w4[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w3[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w2[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w1[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - - break; - - case 2: - c0[2] = hc_byte_perm_S ( 0, w7[3], selector); - c0[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[0] = hc_byte_perm_S (w7[2], w7[1], selector); - w7[3] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[2] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[1] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w6[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w5[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w4[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w3[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w2[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w1[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - - break; - - case 3: - c0[3] = hc_byte_perm_S ( 0, w7[3], selector); - c0[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[0] = hc_byte_perm_S (w7[1], w7[0], selector); - w7[3] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[2] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[1] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w6[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w5[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w4[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w3[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w2[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w1[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 4: - c1[0] = hc_byte_perm_S ( 0, w7[3], selector); - c0[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[0] = hc_byte_perm_S (w7[0], w6[3], selector); - w7[3] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[2] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[1] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w6[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w5[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w4[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w3[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w2[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w1[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[0] = hc_byte_perm_S (w0[0], 0, selector); - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 5: - c1[1] = hc_byte_perm_S ( 0, w7[3], selector); - c1[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c0[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[0] = hc_byte_perm_S (w6[3], w6[2], selector); - w7[3] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[2] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[1] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w6[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w5[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w4[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w3[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w2[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w1[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[1] = hc_byte_perm_S (w0[0], 0, selector); - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 6: - c1[2] = hc_byte_perm_S ( 0, w7[3], selector); - c1[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c0[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[0] = hc_byte_perm_S (w6[2], w6[1], selector); - w7[3] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[2] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[1] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w6[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w5[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w4[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w3[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w2[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w1[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[2] = hc_byte_perm_S (w0[0], 0, selector); - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 7: - c1[3] = hc_byte_perm_S ( 0, w7[3], selector); - c1[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c0[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[0] = hc_byte_perm_S (w6[1], w6[0], selector); - w7[3] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[2] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[1] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w6[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w5[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w4[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w3[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w2[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w1[3] = hc_byte_perm_S (w0[0], 0, selector); - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 8: - c2[0] = hc_byte_perm_S ( 0, w7[3], selector); - c1[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c0[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[0] = hc_byte_perm_S (w6[0], w5[3], selector); - w7[3] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[2] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[1] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w6[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w5[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w4[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w3[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w2[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[0] = hc_byte_perm_S (w0[0], 0, selector); - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 9: - c2[1] = hc_byte_perm_S ( 0, w7[3], selector); - c2[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c1[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c0[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[0] = hc_byte_perm_S (w5[3], w5[2], selector); - w7[3] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[2] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[1] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w6[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w5[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w4[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w3[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w2[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[1] = hc_byte_perm_S (w0[0], 0, selector); - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 10: - c2[2] = hc_byte_perm_S ( 0, w7[3], selector); - c2[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c1[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c0[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[0] = hc_byte_perm_S (w5[2], w5[1], selector); - w7[3] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[2] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[1] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w6[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w5[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w4[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w3[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w2[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[2] = hc_byte_perm_S (w0[0], 0, selector); - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 11: - c2[3] = hc_byte_perm_S ( 0, w7[3], selector); - c2[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c1[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c0[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[0] = hc_byte_perm_S (w5[1], w5[0], selector); - w7[3] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[2] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[1] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w6[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w5[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w4[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w3[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w2[3] = hc_byte_perm_S (w0[0], 0, selector); - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 12: - c3[0] = hc_byte_perm_S ( 0, w7[3], selector); - c2[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c1[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c0[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[0] = hc_byte_perm_S (w5[0], w4[3], selector); - w7[3] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[2] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[1] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w6[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w5[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w4[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w3[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[0] = hc_byte_perm_S (w0[0], 0, selector); - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 13: - c3[1] = hc_byte_perm_S ( 0, w7[3], selector); - c3[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c2[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c1[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c0[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[0] = hc_byte_perm_S (w4[3], w4[2], selector); - w7[3] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[2] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[1] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w6[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w5[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w4[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w3[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[1] = hc_byte_perm_S (w0[0], 0, selector); - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 14: - c3[2] = hc_byte_perm_S ( 0, w7[3], selector); - c3[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c2[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c1[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c0[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[0] = hc_byte_perm_S (w4[2], w4[1], selector); - w7[3] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[2] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[1] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w6[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w5[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w4[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w3[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[2] = hc_byte_perm_S (w0[0], 0, selector); - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 15: - c3[3] = hc_byte_perm_S ( 0, w7[3], selector); - c3[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c2[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c1[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c0[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[0] = hc_byte_perm_S (w4[1], w4[0], selector); - w7[3] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[2] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[1] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w6[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w5[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w4[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w3[3] = hc_byte_perm_S (w0[0], 0, selector); - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 16: - c4[0] = hc_byte_perm_S ( 0, w7[3], selector); - c3[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c2[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c1[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c0[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[0] = hc_byte_perm_S (w4[0], w3[3], selector); - w7[3] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[2] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[1] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w6[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w5[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w4[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[0] = hc_byte_perm_S (w0[0], 0, selector); - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 17: - c4[1] = hc_byte_perm_S ( 0, w7[3], selector); - c4[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c3[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c2[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c1[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c0[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[0] = hc_byte_perm_S (w3[3], w3[2], selector); - w7[3] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[2] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[1] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w6[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w5[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w4[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[1] = hc_byte_perm_S (w0[0], 0, selector); - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 18: - c4[2] = hc_byte_perm_S ( 0, w7[3], selector); - c4[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c3[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c2[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c1[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c0[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[0] = hc_byte_perm_S (w3[2], w3[1], selector); - w7[3] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[2] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[1] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w6[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w5[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w4[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[2] = hc_byte_perm_S (w0[0], 0, selector); - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 19: - c4[3] = hc_byte_perm_S ( 0, w7[3], selector); - c4[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c3[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c2[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c1[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c0[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[0] = hc_byte_perm_S (w3[1], w3[0], selector); - w7[3] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[2] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[1] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w6[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w5[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w4[3] = hc_byte_perm_S (w0[0], 0, selector); - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 20: - c5[0] = hc_byte_perm_S ( 0, w7[3], selector); - c4[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c3[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c2[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c1[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c0[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[0] = hc_byte_perm_S (w3[0], w2[3], selector); - w7[3] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[2] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[1] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w6[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w5[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[0] = hc_byte_perm_S (w0[0], 0, selector); - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 21: - c5[1] = hc_byte_perm_S ( 0, w7[3], selector); - c5[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c4[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c3[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c2[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c1[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c0[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[0] = hc_byte_perm_S (w2[3], w2[2], selector); - w7[3] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[2] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[1] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w6[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w5[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[1] = hc_byte_perm_S (w0[0], 0, selector); - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 22: - c5[2] = hc_byte_perm_S ( 0, w7[3], selector); - c5[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c4[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c3[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c2[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c1[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c0[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[0] = hc_byte_perm_S (w2[2], w2[1], selector); - w7[3] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[2] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[1] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w6[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w5[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[2] = hc_byte_perm_S (w0[0], 0, selector); - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 23: - c5[3] = hc_byte_perm_S ( 0, w7[3], selector); - c5[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c4[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c3[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c2[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c1[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c0[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[0] = hc_byte_perm_S (w2[1], w2[0], selector); - w7[3] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[2] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[1] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w6[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w5[3] = hc_byte_perm_S (w0[0], 0, selector); - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 24: - c6[0] = hc_byte_perm_S ( 0, w7[3], selector); - c5[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c4[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c3[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c2[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c1[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c0[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[0] = hc_byte_perm_S (w2[0], w1[3], selector); - w7[3] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[2] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[1] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w6[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[0] = hc_byte_perm_S (w0[0], 0, selector); - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 25: - c6[1] = hc_byte_perm_S ( 0, w7[3], selector); - c6[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c5[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c4[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c3[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c2[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c1[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c0[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[0] = hc_byte_perm_S (w1[3], w1[2], selector); - w7[3] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[2] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[1] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w6[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[1] = hc_byte_perm_S (w0[0], 0, selector); - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 26: - c6[2] = hc_byte_perm_S ( 0, w7[3], selector); - c6[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c5[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c4[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c3[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c2[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c1[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c0[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[0] = hc_byte_perm_S (w1[2], w1[1], selector); - w7[3] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[2] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[1] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w6[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[2] = hc_byte_perm_S (w0[0], 0, selector); - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 27: - c6[3] = hc_byte_perm_S ( 0, w7[3], selector); - c6[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c5[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c4[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c3[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c2[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c1[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c0[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[0] = hc_byte_perm_S (w1[1], w1[0], selector); - w7[3] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[2] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[1] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w6[3] = hc_byte_perm_S (w0[0], 0, selector); - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 28: - c7[0] = hc_byte_perm_S ( 0, w7[3], selector); - c6[3] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[2] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[1] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[0] = hc_byte_perm_S (w7[0], w6[3], selector); - c5[3] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[2] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[1] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[0] = hc_byte_perm_S (w6[0], w5[3], selector); - c4[3] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[2] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[1] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[0] = hc_byte_perm_S (w5[0], w4[3], selector); - c3[3] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[2] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[1] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[0] = hc_byte_perm_S (w4[0], w3[3], selector); - c2[3] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[2] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[1] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[0] = hc_byte_perm_S (w3[0], w2[3], selector); - c1[3] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[2] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[1] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[0] = hc_byte_perm_S (w2[0], w1[3], selector); - c0[3] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[2] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[1] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[0] = hc_byte_perm_S (w1[0], w0[3], selector); - w7[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[0] = hc_byte_perm_S (w0[0], 0, selector); - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 29: - c7[1] = hc_byte_perm_S ( 0, w7[3], selector); - c7[0] = hc_byte_perm_S (w7[3], w7[2], selector); - c6[3] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[2] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[1] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[0] = hc_byte_perm_S (w6[3], w6[2], selector); - c5[3] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[2] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[1] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[0] = hc_byte_perm_S (w5[3], w5[2], selector); - c4[3] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[2] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[1] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[0] = hc_byte_perm_S (w4[3], w4[2], selector); - c3[3] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[2] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[1] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[0] = hc_byte_perm_S (w3[3], w3[2], selector); - c2[3] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[2] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[1] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[0] = hc_byte_perm_S (w2[3], w2[2], selector); - c1[3] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[2] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[1] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[0] = hc_byte_perm_S (w1[3], w1[2], selector); - c0[3] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[2] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[1] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[0] = hc_byte_perm_S (w0[3], w0[2], selector); - w7[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[1] = hc_byte_perm_S (w0[0], 0, selector); - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 30: - c7[2] = hc_byte_perm_S ( 0, w7[3], selector); - c7[1] = hc_byte_perm_S (w7[3], w7[2], selector); - c7[0] = hc_byte_perm_S (w7[2], w7[1], selector); - c6[3] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[2] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[1] = hc_byte_perm_S (w6[3], w6[2], selector); - c6[0] = hc_byte_perm_S (w6[2], w6[1], selector); - c5[3] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[2] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[1] = hc_byte_perm_S (w5[3], w5[2], selector); - c5[0] = hc_byte_perm_S (w5[2], w5[1], selector); - c4[3] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[2] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[1] = hc_byte_perm_S (w4[3], w4[2], selector); - c4[0] = hc_byte_perm_S (w4[2], w4[1], selector); - c3[3] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[2] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[1] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[0] = hc_byte_perm_S (w3[2], w3[1], selector); - c2[3] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[2] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[1] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[0] = hc_byte_perm_S (w2[2], w2[1], selector); - c1[3] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[2] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[1] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[0] = hc_byte_perm_S (w1[2], w1[1], selector); - c0[3] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[2] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[1] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[0] = hc_byte_perm_S (w0[2], w0[1], selector); - w7[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[2] = hc_byte_perm_S (w0[0], 0, selector); - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - - case 31: - c7[3] = hc_byte_perm_S ( 0, w7[3], selector); - c7[2] = hc_byte_perm_S (w7[3], w7[2], selector); - c7[1] = hc_byte_perm_S (w7[2], w7[1], selector); - c7[0] = hc_byte_perm_S (w7[1], w7[0], selector); - c6[3] = hc_byte_perm_S (w7[0], w6[3], selector); - c6[2] = hc_byte_perm_S (w6[3], w6[2], selector); - c6[1] = hc_byte_perm_S (w6[2], w6[1], selector); - c6[0] = hc_byte_perm_S (w6[1], w6[0], selector); - c5[3] = hc_byte_perm_S (w6[0], w5[3], selector); - c5[2] = hc_byte_perm_S (w5[3], w5[2], selector); - c5[1] = hc_byte_perm_S (w5[2], w5[1], selector); - c5[0] = hc_byte_perm_S (w5[1], w5[0], selector); - c4[3] = hc_byte_perm_S (w5[0], w4[3], selector); - c4[2] = hc_byte_perm_S (w4[3], w4[2], selector); - c4[1] = hc_byte_perm_S (w4[2], w4[1], selector); - c4[0] = hc_byte_perm_S (w4[1], w4[0], selector); - c3[3] = hc_byte_perm_S (w4[0], w3[3], selector); - c3[2] = hc_byte_perm_S (w3[3], w3[2], selector); - c3[1] = hc_byte_perm_S (w3[2], w3[1], selector); - c3[0] = hc_byte_perm_S (w3[1], w3[0], selector); - c2[3] = hc_byte_perm_S (w3[0], w2[3], selector); - c2[2] = hc_byte_perm_S (w2[3], w2[2], selector); - c2[1] = hc_byte_perm_S (w2[2], w2[1], selector); - c2[0] = hc_byte_perm_S (w2[1], w2[0], selector); - c1[3] = hc_byte_perm_S (w2[0], w1[3], selector); - c1[2] = hc_byte_perm_S (w1[3], w1[2], selector); - c1[1] = hc_byte_perm_S (w1[2], w1[1], selector); - c1[0] = hc_byte_perm_S (w1[1], w1[0], selector); - c0[3] = hc_byte_perm_S (w1[0], w0[3], selector); - c0[2] = hc_byte_perm_S (w0[3], w0[2], selector); - c0[1] = hc_byte_perm_S (w0[2], w0[1], selector); - c0[0] = hc_byte_perm_S (w0[1], w0[0], selector); - w7[3] = hc_byte_perm_S (w0[0], 0, selector); - w7[2] = 0; - w7[1] = 0; - w7[0] = 0; - w6[3] = 0; - w6[2] = 0; - w6[1] = 0; - w6[0] = 0; - w5[3] = 0; - w5[2] = 0; - w5[1] = 0; - w5[0] = 0; - w4[3] = 0; - w4[2] = 0; - w4[1] = 0; - w4[0] = 0; - w3[3] = 0; - w3[2] = 0; - w3[1] = 0; - w3[0] = 0; - w2[3] = 0; - w2[2] = 0; - w2[1] = 0; - w2[0] = 0; - w1[3] = 0; - w1[2] = 0; - w1[1] = 0; - w1[0] = 0; - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_le_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -56030,4384 +33319,12 @@ DECLSPEC void switch_buffer_by_offset_1x64_le_S (PRIVATE_AS u32 *w, const u32 of break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm_S (w[62], w[63], selector); - w[62] = hc_byte_perm_S (w[61], w[62], selector); - w[61] = hc_byte_perm_S (w[60], w[61], selector); - w[60] = hc_byte_perm_S (w[59], w[60], selector); - w[59] = hc_byte_perm_S (w[58], w[59], selector); - w[58] = hc_byte_perm_S (w[57], w[58], selector); - w[57] = hc_byte_perm_S (w[56], w[57], selector); - w[56] = hc_byte_perm_S (w[55], w[56], selector); - w[55] = hc_byte_perm_S (w[54], w[55], selector); - w[54] = hc_byte_perm_S (w[53], w[54], selector); - w[53] = hc_byte_perm_S (w[52], w[53], selector); - w[52] = hc_byte_perm_S (w[51], w[52], selector); - w[51] = hc_byte_perm_S (w[50], w[51], selector); - w[50] = hc_byte_perm_S (w[49], w[50], selector); - w[49] = hc_byte_perm_S (w[48], w[49], selector); - w[48] = hc_byte_perm_S (w[47], w[48], selector); - w[47] = hc_byte_perm_S (w[46], w[47], selector); - w[46] = hc_byte_perm_S (w[45], w[46], selector); - w[45] = hc_byte_perm_S (w[44], w[45], selector); - w[44] = hc_byte_perm_S (w[43], w[44], selector); - w[43] = hc_byte_perm_S (w[42], w[43], selector); - w[42] = hc_byte_perm_S (w[41], w[42], selector); - w[41] = hc_byte_perm_S (w[40], w[41], selector); - w[40] = hc_byte_perm_S (w[39], w[40], selector); - w[39] = hc_byte_perm_S (w[38], w[39], selector); - w[38] = hc_byte_perm_S (w[37], w[38], selector); - w[37] = hc_byte_perm_S (w[36], w[37], selector); - w[36] = hc_byte_perm_S (w[35], w[36], selector); - w[35] = hc_byte_perm_S (w[34], w[35], selector); - w[34] = hc_byte_perm_S (w[33], w[34], selector); - w[33] = hc_byte_perm_S (w[32], w[33], selector); - w[32] = hc_byte_perm_S (w[31], w[32], selector); - w[31] = hc_byte_perm_S (w[30], w[31], selector); - w[30] = hc_byte_perm_S (w[29], w[30], selector); - w[29] = hc_byte_perm_S (w[28], w[29], selector); - w[28] = hc_byte_perm_S (w[27], w[28], selector); - w[27] = hc_byte_perm_S (w[26], w[27], selector); - w[26] = hc_byte_perm_S (w[25], w[26], selector); - w[25] = hc_byte_perm_S (w[24], w[25], selector); - w[24] = hc_byte_perm_S (w[23], w[24], selector); - w[23] = hc_byte_perm_S (w[22], w[23], selector); - w[22] = hc_byte_perm_S (w[21], w[22], selector); - w[21] = hc_byte_perm_S (w[20], w[21], selector); - w[20] = hc_byte_perm_S (w[19], w[20], selector); - w[19] = hc_byte_perm_S (w[18], w[19], selector); - w[18] = hc_byte_perm_S (w[17], w[18], selector); - w[17] = hc_byte_perm_S (w[16], w[17], selector); - w[16] = hc_byte_perm_S (w[15], w[16], selector); - w[15] = hc_byte_perm_S (w[14], w[15], selector); - w[14] = hc_byte_perm_S (w[13], w[14], selector); - w[13] = hc_byte_perm_S (w[12], w[13], selector); - w[12] = hc_byte_perm_S (w[11], w[12], selector); - w[11] = hc_byte_perm_S (w[10], w[11], selector); - w[10] = hc_byte_perm_S (w[ 9], w[10], selector); - w[ 9] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[ 8] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 7] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 6] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 5] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 4] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 3] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 2] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 1] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 0] = hc_byte_perm_S ( 0, w[ 0], selector); - - break; - - case 1: - w[63] = hc_byte_perm_S (w[61], w[62], selector); - w[62] = hc_byte_perm_S (w[60], w[61], selector); - w[61] = hc_byte_perm_S (w[59], w[60], selector); - w[60] = hc_byte_perm_S (w[58], w[59], selector); - w[59] = hc_byte_perm_S (w[57], w[58], selector); - w[58] = hc_byte_perm_S (w[56], w[57], selector); - w[57] = hc_byte_perm_S (w[55], w[56], selector); - w[56] = hc_byte_perm_S (w[54], w[55], selector); - w[55] = hc_byte_perm_S (w[53], w[54], selector); - w[54] = hc_byte_perm_S (w[52], w[53], selector); - w[53] = hc_byte_perm_S (w[51], w[52], selector); - w[52] = hc_byte_perm_S (w[50], w[51], selector); - w[51] = hc_byte_perm_S (w[49], w[50], selector); - w[50] = hc_byte_perm_S (w[48], w[49], selector); - w[49] = hc_byte_perm_S (w[47], w[48], selector); - w[48] = hc_byte_perm_S (w[46], w[47], selector); - w[47] = hc_byte_perm_S (w[45], w[46], selector); - w[46] = hc_byte_perm_S (w[44], w[45], selector); - w[45] = hc_byte_perm_S (w[43], w[44], selector); - w[44] = hc_byte_perm_S (w[42], w[43], selector); - w[43] = hc_byte_perm_S (w[41], w[42], selector); - w[42] = hc_byte_perm_S (w[40], w[41], selector); - w[41] = hc_byte_perm_S (w[39], w[40], selector); - w[40] = hc_byte_perm_S (w[38], w[39], selector); - w[39] = hc_byte_perm_S (w[37], w[38], selector); - w[38] = hc_byte_perm_S (w[36], w[37], selector); - w[37] = hc_byte_perm_S (w[35], w[36], selector); - w[36] = hc_byte_perm_S (w[34], w[35], selector); - w[35] = hc_byte_perm_S (w[33], w[34], selector); - w[34] = hc_byte_perm_S (w[32], w[33], selector); - w[33] = hc_byte_perm_S (w[31], w[32], selector); - w[32] = hc_byte_perm_S (w[30], w[31], selector); - w[31] = hc_byte_perm_S (w[29], w[30], selector); - w[30] = hc_byte_perm_S (w[28], w[29], selector); - w[29] = hc_byte_perm_S (w[27], w[28], selector); - w[28] = hc_byte_perm_S (w[26], w[27], selector); - w[27] = hc_byte_perm_S (w[25], w[26], selector); - w[26] = hc_byte_perm_S (w[24], w[25], selector); - w[25] = hc_byte_perm_S (w[23], w[24], selector); - w[24] = hc_byte_perm_S (w[22], w[23], selector); - w[23] = hc_byte_perm_S (w[21], w[22], selector); - w[22] = hc_byte_perm_S (w[20], w[21], selector); - w[21] = hc_byte_perm_S (w[19], w[20], selector); - w[20] = hc_byte_perm_S (w[18], w[19], selector); - w[19] = hc_byte_perm_S (w[17], w[18], selector); - w[18] = hc_byte_perm_S (w[16], w[17], selector); - w[17] = hc_byte_perm_S (w[15], w[16], selector); - w[16] = hc_byte_perm_S (w[14], w[15], selector); - w[15] = hc_byte_perm_S (w[13], w[14], selector); - w[14] = hc_byte_perm_S (w[12], w[13], selector); - w[13] = hc_byte_perm_S (w[11], w[12], selector); - w[12] = hc_byte_perm_S (w[10], w[11], selector); - w[11] = hc_byte_perm_S (w[ 9], w[10], selector); - w[10] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[ 9] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 8] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 7] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 6] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 5] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 4] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 3] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 2] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 1] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm_S (w[60], w[61], selector); - w[62] = hc_byte_perm_S (w[59], w[60], selector); - w[61] = hc_byte_perm_S (w[58], w[59], selector); - w[60] = hc_byte_perm_S (w[57], w[58], selector); - w[59] = hc_byte_perm_S (w[56], w[57], selector); - w[58] = hc_byte_perm_S (w[55], w[56], selector); - w[57] = hc_byte_perm_S (w[54], w[55], selector); - w[56] = hc_byte_perm_S (w[53], w[54], selector); - w[55] = hc_byte_perm_S (w[52], w[53], selector); - w[54] = hc_byte_perm_S (w[51], w[52], selector); - w[53] = hc_byte_perm_S (w[50], w[51], selector); - w[52] = hc_byte_perm_S (w[49], w[50], selector); - w[51] = hc_byte_perm_S (w[48], w[49], selector); - w[50] = hc_byte_perm_S (w[47], w[48], selector); - w[49] = hc_byte_perm_S (w[46], w[47], selector); - w[48] = hc_byte_perm_S (w[45], w[46], selector); - w[47] = hc_byte_perm_S (w[44], w[45], selector); - w[46] = hc_byte_perm_S (w[43], w[44], selector); - w[45] = hc_byte_perm_S (w[42], w[43], selector); - w[44] = hc_byte_perm_S (w[41], w[42], selector); - w[43] = hc_byte_perm_S (w[40], w[41], selector); - w[42] = hc_byte_perm_S (w[39], w[40], selector); - w[41] = hc_byte_perm_S (w[38], w[39], selector); - w[40] = hc_byte_perm_S (w[37], w[38], selector); - w[39] = hc_byte_perm_S (w[36], w[37], selector); - w[38] = hc_byte_perm_S (w[35], w[36], selector); - w[37] = hc_byte_perm_S (w[34], w[35], selector); - w[36] = hc_byte_perm_S (w[33], w[34], selector); - w[35] = hc_byte_perm_S (w[32], w[33], selector); - w[34] = hc_byte_perm_S (w[31], w[32], selector); - w[33] = hc_byte_perm_S (w[30], w[31], selector); - w[32] = hc_byte_perm_S (w[29], w[30], selector); - w[31] = hc_byte_perm_S (w[28], w[29], selector); - w[30] = hc_byte_perm_S (w[27], w[28], selector); - w[29] = hc_byte_perm_S (w[26], w[27], selector); - w[28] = hc_byte_perm_S (w[25], w[26], selector); - w[27] = hc_byte_perm_S (w[24], w[25], selector); - w[26] = hc_byte_perm_S (w[23], w[24], selector); - w[25] = hc_byte_perm_S (w[22], w[23], selector); - w[24] = hc_byte_perm_S (w[21], w[22], selector); - w[23] = hc_byte_perm_S (w[20], w[21], selector); - w[22] = hc_byte_perm_S (w[19], w[20], selector); - w[21] = hc_byte_perm_S (w[18], w[19], selector); - w[20] = hc_byte_perm_S (w[17], w[18], selector); - w[19] = hc_byte_perm_S (w[16], w[17], selector); - w[18] = hc_byte_perm_S (w[15], w[16], selector); - w[17] = hc_byte_perm_S (w[14], w[15], selector); - w[16] = hc_byte_perm_S (w[13], w[14], selector); - w[15] = hc_byte_perm_S (w[12], w[13], selector); - w[14] = hc_byte_perm_S (w[11], w[12], selector); - w[13] = hc_byte_perm_S (w[10], w[11], selector); - w[12] = hc_byte_perm_S (w[ 9], w[10], selector); - w[11] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[10] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[ 9] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 8] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 7] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 6] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 5] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 4] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 3] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 2] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm_S (w[59], w[60], selector); - w[62] = hc_byte_perm_S (w[58], w[59], selector); - w[61] = hc_byte_perm_S (w[57], w[58], selector); - w[60] = hc_byte_perm_S (w[56], w[57], selector); - w[59] = hc_byte_perm_S (w[55], w[56], selector); - w[58] = hc_byte_perm_S (w[54], w[55], selector); - w[57] = hc_byte_perm_S (w[53], w[54], selector); - w[56] = hc_byte_perm_S (w[52], w[53], selector); - w[55] = hc_byte_perm_S (w[51], w[52], selector); - w[54] = hc_byte_perm_S (w[50], w[51], selector); - w[53] = hc_byte_perm_S (w[49], w[50], selector); - w[52] = hc_byte_perm_S (w[48], w[49], selector); - w[51] = hc_byte_perm_S (w[47], w[48], selector); - w[50] = hc_byte_perm_S (w[46], w[47], selector); - w[49] = hc_byte_perm_S (w[45], w[46], selector); - w[48] = hc_byte_perm_S (w[44], w[45], selector); - w[47] = hc_byte_perm_S (w[43], w[44], selector); - w[46] = hc_byte_perm_S (w[42], w[43], selector); - w[45] = hc_byte_perm_S (w[41], w[42], selector); - w[44] = hc_byte_perm_S (w[40], w[41], selector); - w[43] = hc_byte_perm_S (w[39], w[40], selector); - w[42] = hc_byte_perm_S (w[38], w[39], selector); - w[41] = hc_byte_perm_S (w[37], w[38], selector); - w[40] = hc_byte_perm_S (w[36], w[37], selector); - w[39] = hc_byte_perm_S (w[35], w[36], selector); - w[38] = hc_byte_perm_S (w[34], w[35], selector); - w[37] = hc_byte_perm_S (w[33], w[34], selector); - w[36] = hc_byte_perm_S (w[32], w[33], selector); - w[35] = hc_byte_perm_S (w[31], w[32], selector); - w[34] = hc_byte_perm_S (w[30], w[31], selector); - w[33] = hc_byte_perm_S (w[29], w[30], selector); - w[32] = hc_byte_perm_S (w[28], w[29], selector); - w[31] = hc_byte_perm_S (w[27], w[28], selector); - w[30] = hc_byte_perm_S (w[26], w[27], selector); - w[29] = hc_byte_perm_S (w[25], w[26], selector); - w[28] = hc_byte_perm_S (w[24], w[25], selector); - w[27] = hc_byte_perm_S (w[23], w[24], selector); - w[26] = hc_byte_perm_S (w[22], w[23], selector); - w[25] = hc_byte_perm_S (w[21], w[22], selector); - w[24] = hc_byte_perm_S (w[20], w[21], selector); - w[23] = hc_byte_perm_S (w[19], w[20], selector); - w[22] = hc_byte_perm_S (w[18], w[19], selector); - w[21] = hc_byte_perm_S (w[17], w[18], selector); - w[20] = hc_byte_perm_S (w[16], w[17], selector); - w[19] = hc_byte_perm_S (w[15], w[16], selector); - w[18] = hc_byte_perm_S (w[14], w[15], selector); - w[17] = hc_byte_perm_S (w[13], w[14], selector); - w[16] = hc_byte_perm_S (w[12], w[13], selector); - w[15] = hc_byte_perm_S (w[11], w[12], selector); - w[14] = hc_byte_perm_S (w[10], w[11], selector); - w[13] = hc_byte_perm_S (w[ 9], w[10], selector); - w[12] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[11] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[10] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[ 9] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 8] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 7] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 6] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 5] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 4] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 3] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm_S (w[58], w[59], selector); - w[62] = hc_byte_perm_S (w[57], w[58], selector); - w[61] = hc_byte_perm_S (w[56], w[57], selector); - w[60] = hc_byte_perm_S (w[55], w[56], selector); - w[59] = hc_byte_perm_S (w[54], w[55], selector); - w[58] = hc_byte_perm_S (w[53], w[54], selector); - w[57] = hc_byte_perm_S (w[52], w[53], selector); - w[56] = hc_byte_perm_S (w[51], w[52], selector); - w[55] = hc_byte_perm_S (w[50], w[51], selector); - w[54] = hc_byte_perm_S (w[49], w[50], selector); - w[53] = hc_byte_perm_S (w[48], w[49], selector); - w[52] = hc_byte_perm_S (w[47], w[48], selector); - w[51] = hc_byte_perm_S (w[46], w[47], selector); - w[50] = hc_byte_perm_S (w[45], w[46], selector); - w[49] = hc_byte_perm_S (w[44], w[45], selector); - w[48] = hc_byte_perm_S (w[43], w[44], selector); - w[47] = hc_byte_perm_S (w[42], w[43], selector); - w[46] = hc_byte_perm_S (w[41], w[42], selector); - w[45] = hc_byte_perm_S (w[40], w[41], selector); - w[44] = hc_byte_perm_S (w[39], w[40], selector); - w[43] = hc_byte_perm_S (w[38], w[39], selector); - w[42] = hc_byte_perm_S (w[37], w[38], selector); - w[41] = hc_byte_perm_S (w[36], w[37], selector); - w[40] = hc_byte_perm_S (w[35], w[36], selector); - w[39] = hc_byte_perm_S (w[34], w[35], selector); - w[38] = hc_byte_perm_S (w[33], w[34], selector); - w[37] = hc_byte_perm_S (w[32], w[33], selector); - w[36] = hc_byte_perm_S (w[31], w[32], selector); - w[35] = hc_byte_perm_S (w[30], w[31], selector); - w[34] = hc_byte_perm_S (w[29], w[30], selector); - w[33] = hc_byte_perm_S (w[28], w[29], selector); - w[32] = hc_byte_perm_S (w[27], w[28], selector); - w[31] = hc_byte_perm_S (w[26], w[27], selector); - w[30] = hc_byte_perm_S (w[25], w[26], selector); - w[29] = hc_byte_perm_S (w[24], w[25], selector); - w[28] = hc_byte_perm_S (w[23], w[24], selector); - w[27] = hc_byte_perm_S (w[22], w[23], selector); - w[26] = hc_byte_perm_S (w[21], w[22], selector); - w[25] = hc_byte_perm_S (w[20], w[21], selector); - w[24] = hc_byte_perm_S (w[19], w[20], selector); - w[23] = hc_byte_perm_S (w[18], w[19], selector); - w[22] = hc_byte_perm_S (w[17], w[18], selector); - w[21] = hc_byte_perm_S (w[16], w[17], selector); - w[20] = hc_byte_perm_S (w[15], w[16], selector); - w[19] = hc_byte_perm_S (w[14], w[15], selector); - w[18] = hc_byte_perm_S (w[13], w[14], selector); - w[17] = hc_byte_perm_S (w[12], w[13], selector); - w[16] = hc_byte_perm_S (w[11], w[12], selector); - w[15] = hc_byte_perm_S (w[10], w[11], selector); - w[14] = hc_byte_perm_S (w[ 9], w[10], selector); - w[13] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[12] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[11] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[10] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[ 9] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 8] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 7] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 6] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 5] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 4] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm_S (w[57], w[58], selector); - w[62] = hc_byte_perm_S (w[56], w[57], selector); - w[61] = hc_byte_perm_S (w[55], w[56], selector); - w[60] = hc_byte_perm_S (w[54], w[55], selector); - w[59] = hc_byte_perm_S (w[53], w[54], selector); - w[58] = hc_byte_perm_S (w[52], w[53], selector); - w[57] = hc_byte_perm_S (w[51], w[52], selector); - w[56] = hc_byte_perm_S (w[50], w[51], selector); - w[55] = hc_byte_perm_S (w[49], w[50], selector); - w[54] = hc_byte_perm_S (w[48], w[49], selector); - w[53] = hc_byte_perm_S (w[47], w[48], selector); - w[52] = hc_byte_perm_S (w[46], w[47], selector); - w[51] = hc_byte_perm_S (w[45], w[46], selector); - w[50] = hc_byte_perm_S (w[44], w[45], selector); - w[49] = hc_byte_perm_S (w[43], w[44], selector); - w[48] = hc_byte_perm_S (w[42], w[43], selector); - w[47] = hc_byte_perm_S (w[41], w[42], selector); - w[46] = hc_byte_perm_S (w[40], w[41], selector); - w[45] = hc_byte_perm_S (w[39], w[40], selector); - w[44] = hc_byte_perm_S (w[38], w[39], selector); - w[43] = hc_byte_perm_S (w[37], w[38], selector); - w[42] = hc_byte_perm_S (w[36], w[37], selector); - w[41] = hc_byte_perm_S (w[35], w[36], selector); - w[40] = hc_byte_perm_S (w[34], w[35], selector); - w[39] = hc_byte_perm_S (w[33], w[34], selector); - w[38] = hc_byte_perm_S (w[32], w[33], selector); - w[37] = hc_byte_perm_S (w[31], w[32], selector); - w[36] = hc_byte_perm_S (w[30], w[31], selector); - w[35] = hc_byte_perm_S (w[29], w[30], selector); - w[34] = hc_byte_perm_S (w[28], w[29], selector); - w[33] = hc_byte_perm_S (w[27], w[28], selector); - w[32] = hc_byte_perm_S (w[26], w[27], selector); - w[31] = hc_byte_perm_S (w[25], w[26], selector); - w[30] = hc_byte_perm_S (w[24], w[25], selector); - w[29] = hc_byte_perm_S (w[23], w[24], selector); - w[28] = hc_byte_perm_S (w[22], w[23], selector); - w[27] = hc_byte_perm_S (w[21], w[22], selector); - w[26] = hc_byte_perm_S (w[20], w[21], selector); - w[25] = hc_byte_perm_S (w[19], w[20], selector); - w[24] = hc_byte_perm_S (w[18], w[19], selector); - w[23] = hc_byte_perm_S (w[17], w[18], selector); - w[22] = hc_byte_perm_S (w[16], w[17], selector); - w[21] = hc_byte_perm_S (w[15], w[16], selector); - w[20] = hc_byte_perm_S (w[14], w[15], selector); - w[19] = hc_byte_perm_S (w[13], w[14], selector); - w[18] = hc_byte_perm_S (w[12], w[13], selector); - w[17] = hc_byte_perm_S (w[11], w[12], selector); - w[16] = hc_byte_perm_S (w[10], w[11], selector); - w[15] = hc_byte_perm_S (w[ 9], w[10], selector); - w[14] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[13] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[12] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[11] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[10] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[ 9] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 8] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 7] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 6] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 5] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm_S (w[56], w[57], selector); - w[62] = hc_byte_perm_S (w[55], w[56], selector); - w[61] = hc_byte_perm_S (w[54], w[55], selector); - w[60] = hc_byte_perm_S (w[53], w[54], selector); - w[59] = hc_byte_perm_S (w[52], w[53], selector); - w[58] = hc_byte_perm_S (w[51], w[52], selector); - w[57] = hc_byte_perm_S (w[50], w[51], selector); - w[56] = hc_byte_perm_S (w[49], w[50], selector); - w[55] = hc_byte_perm_S (w[48], w[49], selector); - w[54] = hc_byte_perm_S (w[47], w[48], selector); - w[53] = hc_byte_perm_S (w[46], w[47], selector); - w[52] = hc_byte_perm_S (w[45], w[46], selector); - w[51] = hc_byte_perm_S (w[44], w[45], selector); - w[50] = hc_byte_perm_S (w[43], w[44], selector); - w[49] = hc_byte_perm_S (w[42], w[43], selector); - w[48] = hc_byte_perm_S (w[41], w[42], selector); - w[47] = hc_byte_perm_S (w[40], w[41], selector); - w[46] = hc_byte_perm_S (w[39], w[40], selector); - w[45] = hc_byte_perm_S (w[38], w[39], selector); - w[44] = hc_byte_perm_S (w[37], w[38], selector); - w[43] = hc_byte_perm_S (w[36], w[37], selector); - w[42] = hc_byte_perm_S (w[35], w[36], selector); - w[41] = hc_byte_perm_S (w[34], w[35], selector); - w[40] = hc_byte_perm_S (w[33], w[34], selector); - w[39] = hc_byte_perm_S (w[32], w[33], selector); - w[38] = hc_byte_perm_S (w[31], w[32], selector); - w[37] = hc_byte_perm_S (w[30], w[31], selector); - w[36] = hc_byte_perm_S (w[29], w[30], selector); - w[35] = hc_byte_perm_S (w[28], w[29], selector); - w[34] = hc_byte_perm_S (w[27], w[28], selector); - w[33] = hc_byte_perm_S (w[26], w[27], selector); - w[32] = hc_byte_perm_S (w[25], w[26], selector); - w[31] = hc_byte_perm_S (w[24], w[25], selector); - w[30] = hc_byte_perm_S (w[23], w[24], selector); - w[29] = hc_byte_perm_S (w[22], w[23], selector); - w[28] = hc_byte_perm_S (w[21], w[22], selector); - w[27] = hc_byte_perm_S (w[20], w[21], selector); - w[26] = hc_byte_perm_S (w[19], w[20], selector); - w[25] = hc_byte_perm_S (w[18], w[19], selector); - w[24] = hc_byte_perm_S (w[17], w[18], selector); - w[23] = hc_byte_perm_S (w[16], w[17], selector); - w[22] = hc_byte_perm_S (w[15], w[16], selector); - w[21] = hc_byte_perm_S (w[14], w[15], selector); - w[20] = hc_byte_perm_S (w[13], w[14], selector); - w[19] = hc_byte_perm_S (w[12], w[13], selector); - w[18] = hc_byte_perm_S (w[11], w[12], selector); - w[17] = hc_byte_perm_S (w[10], w[11], selector); - w[16] = hc_byte_perm_S (w[ 9], w[10], selector); - w[15] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[14] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[13] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[12] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[11] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[10] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[ 9] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 8] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 7] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 6] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm_S (w[55], w[56], selector); - w[62] = hc_byte_perm_S (w[54], w[55], selector); - w[61] = hc_byte_perm_S (w[53], w[54], selector); - w[60] = hc_byte_perm_S (w[52], w[53], selector); - w[59] = hc_byte_perm_S (w[51], w[52], selector); - w[58] = hc_byte_perm_S (w[50], w[51], selector); - w[57] = hc_byte_perm_S (w[49], w[50], selector); - w[56] = hc_byte_perm_S (w[48], w[49], selector); - w[55] = hc_byte_perm_S (w[47], w[48], selector); - w[54] = hc_byte_perm_S (w[46], w[47], selector); - w[53] = hc_byte_perm_S (w[45], w[46], selector); - w[52] = hc_byte_perm_S (w[44], w[45], selector); - w[51] = hc_byte_perm_S (w[43], w[44], selector); - w[50] = hc_byte_perm_S (w[42], w[43], selector); - w[49] = hc_byte_perm_S (w[41], w[42], selector); - w[48] = hc_byte_perm_S (w[40], w[41], selector); - w[47] = hc_byte_perm_S (w[39], w[40], selector); - w[46] = hc_byte_perm_S (w[38], w[39], selector); - w[45] = hc_byte_perm_S (w[37], w[38], selector); - w[44] = hc_byte_perm_S (w[36], w[37], selector); - w[43] = hc_byte_perm_S (w[35], w[36], selector); - w[42] = hc_byte_perm_S (w[34], w[35], selector); - w[41] = hc_byte_perm_S (w[33], w[34], selector); - w[40] = hc_byte_perm_S (w[32], w[33], selector); - w[39] = hc_byte_perm_S (w[31], w[32], selector); - w[38] = hc_byte_perm_S (w[30], w[31], selector); - w[37] = hc_byte_perm_S (w[29], w[30], selector); - w[36] = hc_byte_perm_S (w[28], w[29], selector); - w[35] = hc_byte_perm_S (w[27], w[28], selector); - w[34] = hc_byte_perm_S (w[26], w[27], selector); - w[33] = hc_byte_perm_S (w[25], w[26], selector); - w[32] = hc_byte_perm_S (w[24], w[25], selector); - w[31] = hc_byte_perm_S (w[23], w[24], selector); - w[30] = hc_byte_perm_S (w[22], w[23], selector); - w[29] = hc_byte_perm_S (w[21], w[22], selector); - w[28] = hc_byte_perm_S (w[20], w[21], selector); - w[27] = hc_byte_perm_S (w[19], w[20], selector); - w[26] = hc_byte_perm_S (w[18], w[19], selector); - w[25] = hc_byte_perm_S (w[17], w[18], selector); - w[24] = hc_byte_perm_S (w[16], w[17], selector); - w[23] = hc_byte_perm_S (w[15], w[16], selector); - w[22] = hc_byte_perm_S (w[14], w[15], selector); - w[21] = hc_byte_perm_S (w[13], w[14], selector); - w[20] = hc_byte_perm_S (w[12], w[13], selector); - w[19] = hc_byte_perm_S (w[11], w[12], selector); - w[18] = hc_byte_perm_S (w[10], w[11], selector); - w[17] = hc_byte_perm_S (w[ 9], w[10], selector); - w[16] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[15] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[14] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[13] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[12] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[11] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[10] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[ 9] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 8] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 7] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm_S (w[54], w[55], selector); - w[62] = hc_byte_perm_S (w[53], w[54], selector); - w[61] = hc_byte_perm_S (w[52], w[53], selector); - w[60] = hc_byte_perm_S (w[51], w[52], selector); - w[59] = hc_byte_perm_S (w[50], w[51], selector); - w[58] = hc_byte_perm_S (w[49], w[50], selector); - w[57] = hc_byte_perm_S (w[48], w[49], selector); - w[56] = hc_byte_perm_S (w[47], w[48], selector); - w[55] = hc_byte_perm_S (w[46], w[47], selector); - w[54] = hc_byte_perm_S (w[45], w[46], selector); - w[53] = hc_byte_perm_S (w[44], w[45], selector); - w[52] = hc_byte_perm_S (w[43], w[44], selector); - w[51] = hc_byte_perm_S (w[42], w[43], selector); - w[50] = hc_byte_perm_S (w[41], w[42], selector); - w[49] = hc_byte_perm_S (w[40], w[41], selector); - w[48] = hc_byte_perm_S (w[39], w[40], selector); - w[47] = hc_byte_perm_S (w[38], w[39], selector); - w[46] = hc_byte_perm_S (w[37], w[38], selector); - w[45] = hc_byte_perm_S (w[36], w[37], selector); - w[44] = hc_byte_perm_S (w[35], w[36], selector); - w[43] = hc_byte_perm_S (w[34], w[35], selector); - w[42] = hc_byte_perm_S (w[33], w[34], selector); - w[41] = hc_byte_perm_S (w[32], w[33], selector); - w[40] = hc_byte_perm_S (w[31], w[32], selector); - w[39] = hc_byte_perm_S (w[30], w[31], selector); - w[38] = hc_byte_perm_S (w[29], w[30], selector); - w[37] = hc_byte_perm_S (w[28], w[29], selector); - w[36] = hc_byte_perm_S (w[27], w[28], selector); - w[35] = hc_byte_perm_S (w[26], w[27], selector); - w[34] = hc_byte_perm_S (w[25], w[26], selector); - w[33] = hc_byte_perm_S (w[24], w[25], selector); - w[32] = hc_byte_perm_S (w[23], w[24], selector); - w[31] = hc_byte_perm_S (w[22], w[23], selector); - w[30] = hc_byte_perm_S (w[21], w[22], selector); - w[29] = hc_byte_perm_S (w[20], w[21], selector); - w[28] = hc_byte_perm_S (w[19], w[20], selector); - w[27] = hc_byte_perm_S (w[18], w[19], selector); - w[26] = hc_byte_perm_S (w[17], w[18], selector); - w[25] = hc_byte_perm_S (w[16], w[17], selector); - w[24] = hc_byte_perm_S (w[15], w[16], selector); - w[23] = hc_byte_perm_S (w[14], w[15], selector); - w[22] = hc_byte_perm_S (w[13], w[14], selector); - w[21] = hc_byte_perm_S (w[12], w[13], selector); - w[20] = hc_byte_perm_S (w[11], w[12], selector); - w[19] = hc_byte_perm_S (w[10], w[11], selector); - w[18] = hc_byte_perm_S (w[ 9], w[10], selector); - w[17] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[16] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[15] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[14] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[13] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[12] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[11] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[10] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[ 9] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 8] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm_S (w[53], w[54], selector); - w[62] = hc_byte_perm_S (w[52], w[53], selector); - w[61] = hc_byte_perm_S (w[51], w[52], selector); - w[60] = hc_byte_perm_S (w[50], w[51], selector); - w[59] = hc_byte_perm_S (w[49], w[50], selector); - w[58] = hc_byte_perm_S (w[48], w[49], selector); - w[57] = hc_byte_perm_S (w[47], w[48], selector); - w[56] = hc_byte_perm_S (w[46], w[47], selector); - w[55] = hc_byte_perm_S (w[45], w[46], selector); - w[54] = hc_byte_perm_S (w[44], w[45], selector); - w[53] = hc_byte_perm_S (w[43], w[44], selector); - w[52] = hc_byte_perm_S (w[42], w[43], selector); - w[51] = hc_byte_perm_S (w[41], w[42], selector); - w[50] = hc_byte_perm_S (w[40], w[41], selector); - w[49] = hc_byte_perm_S (w[39], w[40], selector); - w[48] = hc_byte_perm_S (w[38], w[39], selector); - w[47] = hc_byte_perm_S (w[37], w[38], selector); - w[46] = hc_byte_perm_S (w[36], w[37], selector); - w[45] = hc_byte_perm_S (w[35], w[36], selector); - w[44] = hc_byte_perm_S (w[34], w[35], selector); - w[43] = hc_byte_perm_S (w[33], w[34], selector); - w[42] = hc_byte_perm_S (w[32], w[33], selector); - w[41] = hc_byte_perm_S (w[31], w[32], selector); - w[40] = hc_byte_perm_S (w[30], w[31], selector); - w[39] = hc_byte_perm_S (w[29], w[30], selector); - w[38] = hc_byte_perm_S (w[28], w[29], selector); - w[37] = hc_byte_perm_S (w[27], w[28], selector); - w[36] = hc_byte_perm_S (w[26], w[27], selector); - w[35] = hc_byte_perm_S (w[25], w[26], selector); - w[34] = hc_byte_perm_S (w[24], w[25], selector); - w[33] = hc_byte_perm_S (w[23], w[24], selector); - w[32] = hc_byte_perm_S (w[22], w[23], selector); - w[31] = hc_byte_perm_S (w[21], w[22], selector); - w[30] = hc_byte_perm_S (w[20], w[21], selector); - w[29] = hc_byte_perm_S (w[19], w[20], selector); - w[28] = hc_byte_perm_S (w[18], w[19], selector); - w[27] = hc_byte_perm_S (w[17], w[18], selector); - w[26] = hc_byte_perm_S (w[16], w[17], selector); - w[25] = hc_byte_perm_S (w[15], w[16], selector); - w[24] = hc_byte_perm_S (w[14], w[15], selector); - w[23] = hc_byte_perm_S (w[13], w[14], selector); - w[22] = hc_byte_perm_S (w[12], w[13], selector); - w[21] = hc_byte_perm_S (w[11], w[12], selector); - w[20] = hc_byte_perm_S (w[10], w[11], selector); - w[19] = hc_byte_perm_S (w[ 9], w[10], selector); - w[18] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[17] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[16] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[15] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[14] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[13] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[12] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[11] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[10] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[ 9] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm_S (w[52], w[53], selector); - w[62] = hc_byte_perm_S (w[51], w[52], selector); - w[61] = hc_byte_perm_S (w[50], w[51], selector); - w[60] = hc_byte_perm_S (w[49], w[50], selector); - w[59] = hc_byte_perm_S (w[48], w[49], selector); - w[58] = hc_byte_perm_S (w[47], w[48], selector); - w[57] = hc_byte_perm_S (w[46], w[47], selector); - w[56] = hc_byte_perm_S (w[45], w[46], selector); - w[55] = hc_byte_perm_S (w[44], w[45], selector); - w[54] = hc_byte_perm_S (w[43], w[44], selector); - w[53] = hc_byte_perm_S (w[42], w[43], selector); - w[52] = hc_byte_perm_S (w[41], w[42], selector); - w[51] = hc_byte_perm_S (w[40], w[41], selector); - w[50] = hc_byte_perm_S (w[39], w[40], selector); - w[49] = hc_byte_perm_S (w[38], w[39], selector); - w[48] = hc_byte_perm_S (w[37], w[38], selector); - w[47] = hc_byte_perm_S (w[36], w[37], selector); - w[46] = hc_byte_perm_S (w[35], w[36], selector); - w[45] = hc_byte_perm_S (w[34], w[35], selector); - w[44] = hc_byte_perm_S (w[33], w[34], selector); - w[43] = hc_byte_perm_S (w[32], w[33], selector); - w[42] = hc_byte_perm_S (w[31], w[32], selector); - w[41] = hc_byte_perm_S (w[30], w[31], selector); - w[40] = hc_byte_perm_S (w[29], w[30], selector); - w[39] = hc_byte_perm_S (w[28], w[29], selector); - w[38] = hc_byte_perm_S (w[27], w[28], selector); - w[37] = hc_byte_perm_S (w[26], w[27], selector); - w[36] = hc_byte_perm_S (w[25], w[26], selector); - w[35] = hc_byte_perm_S (w[24], w[25], selector); - w[34] = hc_byte_perm_S (w[23], w[24], selector); - w[33] = hc_byte_perm_S (w[22], w[23], selector); - w[32] = hc_byte_perm_S (w[21], w[22], selector); - w[31] = hc_byte_perm_S (w[20], w[21], selector); - w[30] = hc_byte_perm_S (w[19], w[20], selector); - w[29] = hc_byte_perm_S (w[18], w[19], selector); - w[28] = hc_byte_perm_S (w[17], w[18], selector); - w[27] = hc_byte_perm_S (w[16], w[17], selector); - w[26] = hc_byte_perm_S (w[15], w[16], selector); - w[25] = hc_byte_perm_S (w[14], w[15], selector); - w[24] = hc_byte_perm_S (w[13], w[14], selector); - w[23] = hc_byte_perm_S (w[12], w[13], selector); - w[22] = hc_byte_perm_S (w[11], w[12], selector); - w[21] = hc_byte_perm_S (w[10], w[11], selector); - w[20] = hc_byte_perm_S (w[ 9], w[10], selector); - w[19] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[18] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[17] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[16] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[15] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[14] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[13] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[12] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[11] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[10] = hc_byte_perm_S ( 0, w[ 0], selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm_S (w[51], w[52], selector); - w[62] = hc_byte_perm_S (w[50], w[51], selector); - w[61] = hc_byte_perm_S (w[49], w[50], selector); - w[60] = hc_byte_perm_S (w[48], w[49], selector); - w[59] = hc_byte_perm_S (w[47], w[48], selector); - w[58] = hc_byte_perm_S (w[46], w[47], selector); - w[57] = hc_byte_perm_S (w[45], w[46], selector); - w[56] = hc_byte_perm_S (w[44], w[45], selector); - w[55] = hc_byte_perm_S (w[43], w[44], selector); - w[54] = hc_byte_perm_S (w[42], w[43], selector); - w[53] = hc_byte_perm_S (w[41], w[42], selector); - w[52] = hc_byte_perm_S (w[40], w[41], selector); - w[51] = hc_byte_perm_S (w[39], w[40], selector); - w[50] = hc_byte_perm_S (w[38], w[39], selector); - w[49] = hc_byte_perm_S (w[37], w[38], selector); - w[48] = hc_byte_perm_S (w[36], w[37], selector); - w[47] = hc_byte_perm_S (w[35], w[36], selector); - w[46] = hc_byte_perm_S (w[34], w[35], selector); - w[45] = hc_byte_perm_S (w[33], w[34], selector); - w[44] = hc_byte_perm_S (w[32], w[33], selector); - w[43] = hc_byte_perm_S (w[31], w[32], selector); - w[42] = hc_byte_perm_S (w[30], w[31], selector); - w[41] = hc_byte_perm_S (w[29], w[30], selector); - w[40] = hc_byte_perm_S (w[28], w[29], selector); - w[39] = hc_byte_perm_S (w[27], w[28], selector); - w[38] = hc_byte_perm_S (w[26], w[27], selector); - w[37] = hc_byte_perm_S (w[25], w[26], selector); - w[36] = hc_byte_perm_S (w[24], w[25], selector); - w[35] = hc_byte_perm_S (w[23], w[24], selector); - w[34] = hc_byte_perm_S (w[22], w[23], selector); - w[33] = hc_byte_perm_S (w[21], w[22], selector); - w[32] = hc_byte_perm_S (w[20], w[21], selector); - w[31] = hc_byte_perm_S (w[19], w[20], selector); - w[30] = hc_byte_perm_S (w[18], w[19], selector); - w[29] = hc_byte_perm_S (w[17], w[18], selector); - w[28] = hc_byte_perm_S (w[16], w[17], selector); - w[27] = hc_byte_perm_S (w[15], w[16], selector); - w[26] = hc_byte_perm_S (w[14], w[15], selector); - w[25] = hc_byte_perm_S (w[13], w[14], selector); - w[24] = hc_byte_perm_S (w[12], w[13], selector); - w[23] = hc_byte_perm_S (w[11], w[12], selector); - w[22] = hc_byte_perm_S (w[10], w[11], selector); - w[21] = hc_byte_perm_S (w[ 9], w[10], selector); - w[20] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[19] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[18] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[17] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[16] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[15] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[14] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[13] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[12] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[11] = hc_byte_perm_S ( 0, w[ 0], selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm_S (w[50], w[51], selector); - w[62] = hc_byte_perm_S (w[49], w[50], selector); - w[61] = hc_byte_perm_S (w[48], w[49], selector); - w[60] = hc_byte_perm_S (w[47], w[48], selector); - w[59] = hc_byte_perm_S (w[46], w[47], selector); - w[58] = hc_byte_perm_S (w[45], w[46], selector); - w[57] = hc_byte_perm_S (w[44], w[45], selector); - w[56] = hc_byte_perm_S (w[43], w[44], selector); - w[55] = hc_byte_perm_S (w[42], w[43], selector); - w[54] = hc_byte_perm_S (w[41], w[42], selector); - w[53] = hc_byte_perm_S (w[40], w[41], selector); - w[52] = hc_byte_perm_S (w[39], w[40], selector); - w[51] = hc_byte_perm_S (w[38], w[39], selector); - w[50] = hc_byte_perm_S (w[37], w[38], selector); - w[49] = hc_byte_perm_S (w[36], w[37], selector); - w[48] = hc_byte_perm_S (w[35], w[36], selector); - w[47] = hc_byte_perm_S (w[34], w[35], selector); - w[46] = hc_byte_perm_S (w[33], w[34], selector); - w[45] = hc_byte_perm_S (w[32], w[33], selector); - w[44] = hc_byte_perm_S (w[31], w[32], selector); - w[43] = hc_byte_perm_S (w[30], w[31], selector); - w[42] = hc_byte_perm_S (w[29], w[30], selector); - w[41] = hc_byte_perm_S (w[28], w[29], selector); - w[40] = hc_byte_perm_S (w[27], w[28], selector); - w[39] = hc_byte_perm_S (w[26], w[27], selector); - w[38] = hc_byte_perm_S (w[25], w[26], selector); - w[37] = hc_byte_perm_S (w[24], w[25], selector); - w[36] = hc_byte_perm_S (w[23], w[24], selector); - w[35] = hc_byte_perm_S (w[22], w[23], selector); - w[34] = hc_byte_perm_S (w[21], w[22], selector); - w[33] = hc_byte_perm_S (w[20], w[21], selector); - w[32] = hc_byte_perm_S (w[19], w[20], selector); - w[31] = hc_byte_perm_S (w[18], w[19], selector); - w[30] = hc_byte_perm_S (w[17], w[18], selector); - w[29] = hc_byte_perm_S (w[16], w[17], selector); - w[28] = hc_byte_perm_S (w[15], w[16], selector); - w[27] = hc_byte_perm_S (w[14], w[15], selector); - w[26] = hc_byte_perm_S (w[13], w[14], selector); - w[25] = hc_byte_perm_S (w[12], w[13], selector); - w[24] = hc_byte_perm_S (w[11], w[12], selector); - w[23] = hc_byte_perm_S (w[10], w[11], selector); - w[22] = hc_byte_perm_S (w[ 9], w[10], selector); - w[21] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[20] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[19] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[18] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[17] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[16] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[15] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[14] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[13] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[12] = hc_byte_perm_S ( 0, w[ 0], selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm_S (w[49], w[50], selector); - w[62] = hc_byte_perm_S (w[48], w[49], selector); - w[61] = hc_byte_perm_S (w[47], w[48], selector); - w[60] = hc_byte_perm_S (w[46], w[47], selector); - w[59] = hc_byte_perm_S (w[45], w[46], selector); - w[58] = hc_byte_perm_S (w[44], w[45], selector); - w[57] = hc_byte_perm_S (w[43], w[44], selector); - w[56] = hc_byte_perm_S (w[42], w[43], selector); - w[55] = hc_byte_perm_S (w[41], w[42], selector); - w[54] = hc_byte_perm_S (w[40], w[41], selector); - w[53] = hc_byte_perm_S (w[39], w[40], selector); - w[52] = hc_byte_perm_S (w[38], w[39], selector); - w[51] = hc_byte_perm_S (w[37], w[38], selector); - w[50] = hc_byte_perm_S (w[36], w[37], selector); - w[49] = hc_byte_perm_S (w[35], w[36], selector); - w[48] = hc_byte_perm_S (w[34], w[35], selector); - w[47] = hc_byte_perm_S (w[33], w[34], selector); - w[46] = hc_byte_perm_S (w[32], w[33], selector); - w[45] = hc_byte_perm_S (w[31], w[32], selector); - w[44] = hc_byte_perm_S (w[30], w[31], selector); - w[43] = hc_byte_perm_S (w[29], w[30], selector); - w[42] = hc_byte_perm_S (w[28], w[29], selector); - w[41] = hc_byte_perm_S (w[27], w[28], selector); - w[40] = hc_byte_perm_S (w[26], w[27], selector); - w[39] = hc_byte_perm_S (w[25], w[26], selector); - w[38] = hc_byte_perm_S (w[24], w[25], selector); - w[37] = hc_byte_perm_S (w[23], w[24], selector); - w[36] = hc_byte_perm_S (w[22], w[23], selector); - w[35] = hc_byte_perm_S (w[21], w[22], selector); - w[34] = hc_byte_perm_S (w[20], w[21], selector); - w[33] = hc_byte_perm_S (w[19], w[20], selector); - w[32] = hc_byte_perm_S (w[18], w[19], selector); - w[31] = hc_byte_perm_S (w[17], w[18], selector); - w[30] = hc_byte_perm_S (w[16], w[17], selector); - w[29] = hc_byte_perm_S (w[15], w[16], selector); - w[28] = hc_byte_perm_S (w[14], w[15], selector); - w[27] = hc_byte_perm_S (w[13], w[14], selector); - w[26] = hc_byte_perm_S (w[12], w[13], selector); - w[25] = hc_byte_perm_S (w[11], w[12], selector); - w[24] = hc_byte_perm_S (w[10], w[11], selector); - w[23] = hc_byte_perm_S (w[ 9], w[10], selector); - w[22] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[21] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[20] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[19] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[18] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[17] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[16] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[15] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[14] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[13] = hc_byte_perm_S ( 0, w[ 0], selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm_S (w[48], w[49], selector); - w[62] = hc_byte_perm_S (w[47], w[48], selector); - w[61] = hc_byte_perm_S (w[46], w[47], selector); - w[60] = hc_byte_perm_S (w[45], w[46], selector); - w[59] = hc_byte_perm_S (w[44], w[45], selector); - w[58] = hc_byte_perm_S (w[43], w[44], selector); - w[57] = hc_byte_perm_S (w[42], w[43], selector); - w[56] = hc_byte_perm_S (w[41], w[42], selector); - w[55] = hc_byte_perm_S (w[40], w[41], selector); - w[54] = hc_byte_perm_S (w[39], w[40], selector); - w[53] = hc_byte_perm_S (w[38], w[39], selector); - w[52] = hc_byte_perm_S (w[37], w[38], selector); - w[51] = hc_byte_perm_S (w[36], w[37], selector); - w[50] = hc_byte_perm_S (w[35], w[36], selector); - w[49] = hc_byte_perm_S (w[34], w[35], selector); - w[48] = hc_byte_perm_S (w[33], w[34], selector); - w[47] = hc_byte_perm_S (w[32], w[33], selector); - w[46] = hc_byte_perm_S (w[31], w[32], selector); - w[45] = hc_byte_perm_S (w[30], w[31], selector); - w[44] = hc_byte_perm_S (w[29], w[30], selector); - w[43] = hc_byte_perm_S (w[28], w[29], selector); - w[42] = hc_byte_perm_S (w[27], w[28], selector); - w[41] = hc_byte_perm_S (w[26], w[27], selector); - w[40] = hc_byte_perm_S (w[25], w[26], selector); - w[39] = hc_byte_perm_S (w[24], w[25], selector); - w[38] = hc_byte_perm_S (w[23], w[24], selector); - w[37] = hc_byte_perm_S (w[22], w[23], selector); - w[36] = hc_byte_perm_S (w[21], w[22], selector); - w[35] = hc_byte_perm_S (w[20], w[21], selector); - w[34] = hc_byte_perm_S (w[19], w[20], selector); - w[33] = hc_byte_perm_S (w[18], w[19], selector); - w[32] = hc_byte_perm_S (w[17], w[18], selector); - w[31] = hc_byte_perm_S (w[16], w[17], selector); - w[30] = hc_byte_perm_S (w[15], w[16], selector); - w[29] = hc_byte_perm_S (w[14], w[15], selector); - w[28] = hc_byte_perm_S (w[13], w[14], selector); - w[27] = hc_byte_perm_S (w[12], w[13], selector); - w[26] = hc_byte_perm_S (w[11], w[12], selector); - w[25] = hc_byte_perm_S (w[10], w[11], selector); - w[24] = hc_byte_perm_S (w[ 9], w[10], selector); - w[23] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[22] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[21] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[20] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[19] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[18] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[17] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[16] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[15] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[14] = hc_byte_perm_S ( 0, w[ 0], selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm_S (w[47], w[48], selector); - w[62] = hc_byte_perm_S (w[46], w[47], selector); - w[61] = hc_byte_perm_S (w[45], w[46], selector); - w[60] = hc_byte_perm_S (w[44], w[45], selector); - w[59] = hc_byte_perm_S (w[43], w[44], selector); - w[58] = hc_byte_perm_S (w[42], w[43], selector); - w[57] = hc_byte_perm_S (w[41], w[42], selector); - w[56] = hc_byte_perm_S (w[40], w[41], selector); - w[55] = hc_byte_perm_S (w[39], w[40], selector); - w[54] = hc_byte_perm_S (w[38], w[39], selector); - w[53] = hc_byte_perm_S (w[37], w[38], selector); - w[52] = hc_byte_perm_S (w[36], w[37], selector); - w[51] = hc_byte_perm_S (w[35], w[36], selector); - w[50] = hc_byte_perm_S (w[34], w[35], selector); - w[49] = hc_byte_perm_S (w[33], w[34], selector); - w[48] = hc_byte_perm_S (w[32], w[33], selector); - w[47] = hc_byte_perm_S (w[31], w[32], selector); - w[46] = hc_byte_perm_S (w[30], w[31], selector); - w[45] = hc_byte_perm_S (w[29], w[30], selector); - w[44] = hc_byte_perm_S (w[28], w[29], selector); - w[43] = hc_byte_perm_S (w[27], w[28], selector); - w[42] = hc_byte_perm_S (w[26], w[27], selector); - w[41] = hc_byte_perm_S (w[25], w[26], selector); - w[40] = hc_byte_perm_S (w[24], w[25], selector); - w[39] = hc_byte_perm_S (w[23], w[24], selector); - w[38] = hc_byte_perm_S (w[22], w[23], selector); - w[37] = hc_byte_perm_S (w[21], w[22], selector); - w[36] = hc_byte_perm_S (w[20], w[21], selector); - w[35] = hc_byte_perm_S (w[19], w[20], selector); - w[34] = hc_byte_perm_S (w[18], w[19], selector); - w[33] = hc_byte_perm_S (w[17], w[18], selector); - w[32] = hc_byte_perm_S (w[16], w[17], selector); - w[31] = hc_byte_perm_S (w[15], w[16], selector); - w[30] = hc_byte_perm_S (w[14], w[15], selector); - w[29] = hc_byte_perm_S (w[13], w[14], selector); - w[28] = hc_byte_perm_S (w[12], w[13], selector); - w[27] = hc_byte_perm_S (w[11], w[12], selector); - w[26] = hc_byte_perm_S (w[10], w[11], selector); - w[25] = hc_byte_perm_S (w[ 9], w[10], selector); - w[24] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[23] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[22] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[21] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[20] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[19] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[18] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[17] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[16] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[15] = hc_byte_perm_S ( 0, w[ 0], selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm_S (w[46], w[47], selector); - w[62] = hc_byte_perm_S (w[45], w[46], selector); - w[61] = hc_byte_perm_S (w[44], w[45], selector); - w[60] = hc_byte_perm_S (w[43], w[44], selector); - w[59] = hc_byte_perm_S (w[42], w[43], selector); - w[58] = hc_byte_perm_S (w[41], w[42], selector); - w[57] = hc_byte_perm_S (w[40], w[41], selector); - w[56] = hc_byte_perm_S (w[39], w[40], selector); - w[55] = hc_byte_perm_S (w[38], w[39], selector); - w[54] = hc_byte_perm_S (w[37], w[38], selector); - w[53] = hc_byte_perm_S (w[36], w[37], selector); - w[52] = hc_byte_perm_S (w[35], w[36], selector); - w[51] = hc_byte_perm_S (w[34], w[35], selector); - w[50] = hc_byte_perm_S (w[33], w[34], selector); - w[49] = hc_byte_perm_S (w[32], w[33], selector); - w[48] = hc_byte_perm_S (w[31], w[32], selector); - w[47] = hc_byte_perm_S (w[30], w[31], selector); - w[46] = hc_byte_perm_S (w[29], w[30], selector); - w[45] = hc_byte_perm_S (w[28], w[29], selector); - w[44] = hc_byte_perm_S (w[27], w[28], selector); - w[43] = hc_byte_perm_S (w[26], w[27], selector); - w[42] = hc_byte_perm_S (w[25], w[26], selector); - w[41] = hc_byte_perm_S (w[24], w[25], selector); - w[40] = hc_byte_perm_S (w[23], w[24], selector); - w[39] = hc_byte_perm_S (w[22], w[23], selector); - w[38] = hc_byte_perm_S (w[21], w[22], selector); - w[37] = hc_byte_perm_S (w[20], w[21], selector); - w[36] = hc_byte_perm_S (w[19], w[20], selector); - w[35] = hc_byte_perm_S (w[18], w[19], selector); - w[34] = hc_byte_perm_S (w[17], w[18], selector); - w[33] = hc_byte_perm_S (w[16], w[17], selector); - w[32] = hc_byte_perm_S (w[15], w[16], selector); - w[31] = hc_byte_perm_S (w[14], w[15], selector); - w[30] = hc_byte_perm_S (w[13], w[14], selector); - w[29] = hc_byte_perm_S (w[12], w[13], selector); - w[28] = hc_byte_perm_S (w[11], w[12], selector); - w[27] = hc_byte_perm_S (w[10], w[11], selector); - w[26] = hc_byte_perm_S (w[ 9], w[10], selector); - w[25] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[24] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[23] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[22] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[21] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[20] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[19] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[18] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[17] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[16] = hc_byte_perm_S ( 0, w[ 0], selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm_S (w[45], w[46], selector); - w[62] = hc_byte_perm_S (w[44], w[45], selector); - w[61] = hc_byte_perm_S (w[43], w[44], selector); - w[60] = hc_byte_perm_S (w[42], w[43], selector); - w[59] = hc_byte_perm_S (w[41], w[42], selector); - w[58] = hc_byte_perm_S (w[40], w[41], selector); - w[57] = hc_byte_perm_S (w[39], w[40], selector); - w[56] = hc_byte_perm_S (w[38], w[39], selector); - w[55] = hc_byte_perm_S (w[37], w[38], selector); - w[54] = hc_byte_perm_S (w[36], w[37], selector); - w[53] = hc_byte_perm_S (w[35], w[36], selector); - w[52] = hc_byte_perm_S (w[34], w[35], selector); - w[51] = hc_byte_perm_S (w[33], w[34], selector); - w[50] = hc_byte_perm_S (w[32], w[33], selector); - w[49] = hc_byte_perm_S (w[31], w[32], selector); - w[48] = hc_byte_perm_S (w[30], w[31], selector); - w[47] = hc_byte_perm_S (w[29], w[30], selector); - w[46] = hc_byte_perm_S (w[28], w[29], selector); - w[45] = hc_byte_perm_S (w[27], w[28], selector); - w[44] = hc_byte_perm_S (w[26], w[27], selector); - w[43] = hc_byte_perm_S (w[25], w[26], selector); - w[42] = hc_byte_perm_S (w[24], w[25], selector); - w[41] = hc_byte_perm_S (w[23], w[24], selector); - w[40] = hc_byte_perm_S (w[22], w[23], selector); - w[39] = hc_byte_perm_S (w[21], w[22], selector); - w[38] = hc_byte_perm_S (w[20], w[21], selector); - w[37] = hc_byte_perm_S (w[19], w[20], selector); - w[36] = hc_byte_perm_S (w[18], w[19], selector); - w[35] = hc_byte_perm_S (w[17], w[18], selector); - w[34] = hc_byte_perm_S (w[16], w[17], selector); - w[33] = hc_byte_perm_S (w[15], w[16], selector); - w[32] = hc_byte_perm_S (w[14], w[15], selector); - w[31] = hc_byte_perm_S (w[13], w[14], selector); - w[30] = hc_byte_perm_S (w[12], w[13], selector); - w[29] = hc_byte_perm_S (w[11], w[12], selector); - w[28] = hc_byte_perm_S (w[10], w[11], selector); - w[27] = hc_byte_perm_S (w[ 9], w[10], selector); - w[26] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[25] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[24] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[23] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[22] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[21] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[20] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[19] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[18] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[17] = hc_byte_perm_S ( 0, w[ 0], selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm_S (w[44], w[45], selector); - w[62] = hc_byte_perm_S (w[43], w[44], selector); - w[61] = hc_byte_perm_S (w[42], w[43], selector); - w[60] = hc_byte_perm_S (w[41], w[42], selector); - w[59] = hc_byte_perm_S (w[40], w[41], selector); - w[58] = hc_byte_perm_S (w[39], w[40], selector); - w[57] = hc_byte_perm_S (w[38], w[39], selector); - w[56] = hc_byte_perm_S (w[37], w[38], selector); - w[55] = hc_byte_perm_S (w[36], w[37], selector); - w[54] = hc_byte_perm_S (w[35], w[36], selector); - w[53] = hc_byte_perm_S (w[34], w[35], selector); - w[52] = hc_byte_perm_S (w[33], w[34], selector); - w[51] = hc_byte_perm_S (w[32], w[33], selector); - w[50] = hc_byte_perm_S (w[31], w[32], selector); - w[49] = hc_byte_perm_S (w[30], w[31], selector); - w[48] = hc_byte_perm_S (w[29], w[30], selector); - w[47] = hc_byte_perm_S (w[28], w[29], selector); - w[46] = hc_byte_perm_S (w[27], w[28], selector); - w[45] = hc_byte_perm_S (w[26], w[27], selector); - w[44] = hc_byte_perm_S (w[25], w[26], selector); - w[43] = hc_byte_perm_S (w[24], w[25], selector); - w[42] = hc_byte_perm_S (w[23], w[24], selector); - w[41] = hc_byte_perm_S (w[22], w[23], selector); - w[40] = hc_byte_perm_S (w[21], w[22], selector); - w[39] = hc_byte_perm_S (w[20], w[21], selector); - w[38] = hc_byte_perm_S (w[19], w[20], selector); - w[37] = hc_byte_perm_S (w[18], w[19], selector); - w[36] = hc_byte_perm_S (w[17], w[18], selector); - w[35] = hc_byte_perm_S (w[16], w[17], selector); - w[34] = hc_byte_perm_S (w[15], w[16], selector); - w[33] = hc_byte_perm_S (w[14], w[15], selector); - w[32] = hc_byte_perm_S (w[13], w[14], selector); - w[31] = hc_byte_perm_S (w[12], w[13], selector); - w[30] = hc_byte_perm_S (w[11], w[12], selector); - w[29] = hc_byte_perm_S (w[10], w[11], selector); - w[28] = hc_byte_perm_S (w[ 9], w[10], selector); - w[27] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[26] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[25] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[24] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[23] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[22] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[21] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[20] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[19] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[18] = hc_byte_perm_S ( 0, w[ 0], selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm_S (w[43], w[44], selector); - w[62] = hc_byte_perm_S (w[42], w[43], selector); - w[61] = hc_byte_perm_S (w[41], w[42], selector); - w[60] = hc_byte_perm_S (w[40], w[41], selector); - w[59] = hc_byte_perm_S (w[39], w[40], selector); - w[58] = hc_byte_perm_S (w[38], w[39], selector); - w[57] = hc_byte_perm_S (w[37], w[38], selector); - w[56] = hc_byte_perm_S (w[36], w[37], selector); - w[55] = hc_byte_perm_S (w[35], w[36], selector); - w[54] = hc_byte_perm_S (w[34], w[35], selector); - w[53] = hc_byte_perm_S (w[33], w[34], selector); - w[52] = hc_byte_perm_S (w[32], w[33], selector); - w[51] = hc_byte_perm_S (w[31], w[32], selector); - w[50] = hc_byte_perm_S (w[30], w[31], selector); - w[49] = hc_byte_perm_S (w[29], w[30], selector); - w[48] = hc_byte_perm_S (w[28], w[29], selector); - w[47] = hc_byte_perm_S (w[27], w[28], selector); - w[46] = hc_byte_perm_S (w[26], w[27], selector); - w[45] = hc_byte_perm_S (w[25], w[26], selector); - w[44] = hc_byte_perm_S (w[24], w[25], selector); - w[43] = hc_byte_perm_S (w[23], w[24], selector); - w[42] = hc_byte_perm_S (w[22], w[23], selector); - w[41] = hc_byte_perm_S (w[21], w[22], selector); - w[40] = hc_byte_perm_S (w[20], w[21], selector); - w[39] = hc_byte_perm_S (w[19], w[20], selector); - w[38] = hc_byte_perm_S (w[18], w[19], selector); - w[37] = hc_byte_perm_S (w[17], w[18], selector); - w[36] = hc_byte_perm_S (w[16], w[17], selector); - w[35] = hc_byte_perm_S (w[15], w[16], selector); - w[34] = hc_byte_perm_S (w[14], w[15], selector); - w[33] = hc_byte_perm_S (w[13], w[14], selector); - w[32] = hc_byte_perm_S (w[12], w[13], selector); - w[31] = hc_byte_perm_S (w[11], w[12], selector); - w[30] = hc_byte_perm_S (w[10], w[11], selector); - w[29] = hc_byte_perm_S (w[ 9], w[10], selector); - w[28] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[27] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[26] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[25] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[24] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[23] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[22] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[21] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[20] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[19] = hc_byte_perm_S ( 0, w[ 0], selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm_S (w[42], w[43], selector); - w[62] = hc_byte_perm_S (w[41], w[42], selector); - w[61] = hc_byte_perm_S (w[40], w[41], selector); - w[60] = hc_byte_perm_S (w[39], w[40], selector); - w[59] = hc_byte_perm_S (w[38], w[39], selector); - w[58] = hc_byte_perm_S (w[37], w[38], selector); - w[57] = hc_byte_perm_S (w[36], w[37], selector); - w[56] = hc_byte_perm_S (w[35], w[36], selector); - w[55] = hc_byte_perm_S (w[34], w[35], selector); - w[54] = hc_byte_perm_S (w[33], w[34], selector); - w[53] = hc_byte_perm_S (w[32], w[33], selector); - w[52] = hc_byte_perm_S (w[31], w[32], selector); - w[51] = hc_byte_perm_S (w[30], w[31], selector); - w[50] = hc_byte_perm_S (w[29], w[30], selector); - w[49] = hc_byte_perm_S (w[28], w[29], selector); - w[48] = hc_byte_perm_S (w[27], w[28], selector); - w[47] = hc_byte_perm_S (w[26], w[27], selector); - w[46] = hc_byte_perm_S (w[25], w[26], selector); - w[45] = hc_byte_perm_S (w[24], w[25], selector); - w[44] = hc_byte_perm_S (w[23], w[24], selector); - w[43] = hc_byte_perm_S (w[22], w[23], selector); - w[42] = hc_byte_perm_S (w[21], w[22], selector); - w[41] = hc_byte_perm_S (w[20], w[21], selector); - w[40] = hc_byte_perm_S (w[19], w[20], selector); - w[39] = hc_byte_perm_S (w[18], w[19], selector); - w[38] = hc_byte_perm_S (w[17], w[18], selector); - w[37] = hc_byte_perm_S (w[16], w[17], selector); - w[36] = hc_byte_perm_S (w[15], w[16], selector); - w[35] = hc_byte_perm_S (w[14], w[15], selector); - w[34] = hc_byte_perm_S (w[13], w[14], selector); - w[33] = hc_byte_perm_S (w[12], w[13], selector); - w[32] = hc_byte_perm_S (w[11], w[12], selector); - w[31] = hc_byte_perm_S (w[10], w[11], selector); - w[30] = hc_byte_perm_S (w[ 9], w[10], selector); - w[29] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[28] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[27] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[26] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[25] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[24] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[23] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[22] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[21] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[20] = hc_byte_perm_S ( 0, w[ 0], selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm_S (w[41], w[42], selector); - w[62] = hc_byte_perm_S (w[40], w[41], selector); - w[61] = hc_byte_perm_S (w[39], w[40], selector); - w[60] = hc_byte_perm_S (w[38], w[39], selector); - w[59] = hc_byte_perm_S (w[37], w[38], selector); - w[58] = hc_byte_perm_S (w[36], w[37], selector); - w[57] = hc_byte_perm_S (w[35], w[36], selector); - w[56] = hc_byte_perm_S (w[34], w[35], selector); - w[55] = hc_byte_perm_S (w[33], w[34], selector); - w[54] = hc_byte_perm_S (w[32], w[33], selector); - w[53] = hc_byte_perm_S (w[31], w[32], selector); - w[52] = hc_byte_perm_S (w[30], w[31], selector); - w[51] = hc_byte_perm_S (w[29], w[30], selector); - w[50] = hc_byte_perm_S (w[28], w[29], selector); - w[49] = hc_byte_perm_S (w[27], w[28], selector); - w[48] = hc_byte_perm_S (w[26], w[27], selector); - w[47] = hc_byte_perm_S (w[25], w[26], selector); - w[46] = hc_byte_perm_S (w[24], w[25], selector); - w[45] = hc_byte_perm_S (w[23], w[24], selector); - w[44] = hc_byte_perm_S (w[22], w[23], selector); - w[43] = hc_byte_perm_S (w[21], w[22], selector); - w[42] = hc_byte_perm_S (w[20], w[21], selector); - w[41] = hc_byte_perm_S (w[19], w[20], selector); - w[40] = hc_byte_perm_S (w[18], w[19], selector); - w[39] = hc_byte_perm_S (w[17], w[18], selector); - w[38] = hc_byte_perm_S (w[16], w[17], selector); - w[37] = hc_byte_perm_S (w[15], w[16], selector); - w[36] = hc_byte_perm_S (w[14], w[15], selector); - w[35] = hc_byte_perm_S (w[13], w[14], selector); - w[34] = hc_byte_perm_S (w[12], w[13], selector); - w[33] = hc_byte_perm_S (w[11], w[12], selector); - w[32] = hc_byte_perm_S (w[10], w[11], selector); - w[31] = hc_byte_perm_S (w[ 9], w[10], selector); - w[30] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[29] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[28] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[27] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[26] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[25] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[24] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[23] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[22] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[21] = hc_byte_perm_S ( 0, w[ 0], selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm_S (w[40], w[41], selector); - w[62] = hc_byte_perm_S (w[39], w[40], selector); - w[61] = hc_byte_perm_S (w[38], w[39], selector); - w[60] = hc_byte_perm_S (w[37], w[38], selector); - w[59] = hc_byte_perm_S (w[36], w[37], selector); - w[58] = hc_byte_perm_S (w[35], w[36], selector); - w[57] = hc_byte_perm_S (w[34], w[35], selector); - w[56] = hc_byte_perm_S (w[33], w[34], selector); - w[55] = hc_byte_perm_S (w[32], w[33], selector); - w[54] = hc_byte_perm_S (w[31], w[32], selector); - w[53] = hc_byte_perm_S (w[30], w[31], selector); - w[52] = hc_byte_perm_S (w[29], w[30], selector); - w[51] = hc_byte_perm_S (w[28], w[29], selector); - w[50] = hc_byte_perm_S (w[27], w[28], selector); - w[49] = hc_byte_perm_S (w[26], w[27], selector); - w[48] = hc_byte_perm_S (w[25], w[26], selector); - w[47] = hc_byte_perm_S (w[24], w[25], selector); - w[46] = hc_byte_perm_S (w[23], w[24], selector); - w[45] = hc_byte_perm_S (w[22], w[23], selector); - w[44] = hc_byte_perm_S (w[21], w[22], selector); - w[43] = hc_byte_perm_S (w[20], w[21], selector); - w[42] = hc_byte_perm_S (w[19], w[20], selector); - w[41] = hc_byte_perm_S (w[18], w[19], selector); - w[40] = hc_byte_perm_S (w[17], w[18], selector); - w[39] = hc_byte_perm_S (w[16], w[17], selector); - w[38] = hc_byte_perm_S (w[15], w[16], selector); - w[37] = hc_byte_perm_S (w[14], w[15], selector); - w[36] = hc_byte_perm_S (w[13], w[14], selector); - w[35] = hc_byte_perm_S (w[12], w[13], selector); - w[34] = hc_byte_perm_S (w[11], w[12], selector); - w[33] = hc_byte_perm_S (w[10], w[11], selector); - w[32] = hc_byte_perm_S (w[ 9], w[10], selector); - w[31] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[30] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[29] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[28] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[27] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[26] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[25] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[24] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[23] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[22] = hc_byte_perm_S ( 0, w[ 0], selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm_S (w[39], w[40], selector); - w[62] = hc_byte_perm_S (w[38], w[39], selector); - w[61] = hc_byte_perm_S (w[37], w[38], selector); - w[60] = hc_byte_perm_S (w[36], w[37], selector); - w[59] = hc_byte_perm_S (w[35], w[36], selector); - w[58] = hc_byte_perm_S (w[34], w[35], selector); - w[57] = hc_byte_perm_S (w[33], w[34], selector); - w[56] = hc_byte_perm_S (w[32], w[33], selector); - w[55] = hc_byte_perm_S (w[31], w[32], selector); - w[54] = hc_byte_perm_S (w[30], w[31], selector); - w[53] = hc_byte_perm_S (w[29], w[30], selector); - w[52] = hc_byte_perm_S (w[28], w[29], selector); - w[51] = hc_byte_perm_S (w[27], w[28], selector); - w[50] = hc_byte_perm_S (w[26], w[27], selector); - w[49] = hc_byte_perm_S (w[25], w[26], selector); - w[48] = hc_byte_perm_S (w[24], w[25], selector); - w[47] = hc_byte_perm_S (w[23], w[24], selector); - w[46] = hc_byte_perm_S (w[22], w[23], selector); - w[45] = hc_byte_perm_S (w[21], w[22], selector); - w[44] = hc_byte_perm_S (w[20], w[21], selector); - w[43] = hc_byte_perm_S (w[19], w[20], selector); - w[42] = hc_byte_perm_S (w[18], w[19], selector); - w[41] = hc_byte_perm_S (w[17], w[18], selector); - w[40] = hc_byte_perm_S (w[16], w[17], selector); - w[39] = hc_byte_perm_S (w[15], w[16], selector); - w[38] = hc_byte_perm_S (w[14], w[15], selector); - w[37] = hc_byte_perm_S (w[13], w[14], selector); - w[36] = hc_byte_perm_S (w[12], w[13], selector); - w[35] = hc_byte_perm_S (w[11], w[12], selector); - w[34] = hc_byte_perm_S (w[10], w[11], selector); - w[33] = hc_byte_perm_S (w[ 9], w[10], selector); - w[32] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[31] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[30] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[29] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[28] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[27] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[26] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[25] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[24] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[23] = hc_byte_perm_S ( 0, w[ 0], selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm_S (w[38], w[39], selector); - w[62] = hc_byte_perm_S (w[37], w[38], selector); - w[61] = hc_byte_perm_S (w[36], w[37], selector); - w[60] = hc_byte_perm_S (w[35], w[36], selector); - w[59] = hc_byte_perm_S (w[34], w[35], selector); - w[58] = hc_byte_perm_S (w[33], w[34], selector); - w[57] = hc_byte_perm_S (w[32], w[33], selector); - w[56] = hc_byte_perm_S (w[31], w[32], selector); - w[55] = hc_byte_perm_S (w[30], w[31], selector); - w[54] = hc_byte_perm_S (w[29], w[30], selector); - w[53] = hc_byte_perm_S (w[28], w[29], selector); - w[52] = hc_byte_perm_S (w[27], w[28], selector); - w[51] = hc_byte_perm_S (w[26], w[27], selector); - w[50] = hc_byte_perm_S (w[25], w[26], selector); - w[49] = hc_byte_perm_S (w[24], w[25], selector); - w[48] = hc_byte_perm_S (w[23], w[24], selector); - w[47] = hc_byte_perm_S (w[22], w[23], selector); - w[46] = hc_byte_perm_S (w[21], w[22], selector); - w[45] = hc_byte_perm_S (w[20], w[21], selector); - w[44] = hc_byte_perm_S (w[19], w[20], selector); - w[43] = hc_byte_perm_S (w[18], w[19], selector); - w[42] = hc_byte_perm_S (w[17], w[18], selector); - w[41] = hc_byte_perm_S (w[16], w[17], selector); - w[40] = hc_byte_perm_S (w[15], w[16], selector); - w[39] = hc_byte_perm_S (w[14], w[15], selector); - w[38] = hc_byte_perm_S (w[13], w[14], selector); - w[37] = hc_byte_perm_S (w[12], w[13], selector); - w[36] = hc_byte_perm_S (w[11], w[12], selector); - w[35] = hc_byte_perm_S (w[10], w[11], selector); - w[34] = hc_byte_perm_S (w[ 9], w[10], selector); - w[33] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[32] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[31] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[30] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[29] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[28] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[27] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[26] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[25] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[24] = hc_byte_perm_S ( 0, w[ 0], selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm_S (w[37], w[38], selector); - w[62] = hc_byte_perm_S (w[36], w[37], selector); - w[61] = hc_byte_perm_S (w[35], w[36], selector); - w[60] = hc_byte_perm_S (w[34], w[35], selector); - w[59] = hc_byte_perm_S (w[33], w[34], selector); - w[58] = hc_byte_perm_S (w[32], w[33], selector); - w[57] = hc_byte_perm_S (w[31], w[32], selector); - w[56] = hc_byte_perm_S (w[30], w[31], selector); - w[55] = hc_byte_perm_S (w[29], w[30], selector); - w[54] = hc_byte_perm_S (w[28], w[29], selector); - w[53] = hc_byte_perm_S (w[27], w[28], selector); - w[52] = hc_byte_perm_S (w[26], w[27], selector); - w[51] = hc_byte_perm_S (w[25], w[26], selector); - w[50] = hc_byte_perm_S (w[24], w[25], selector); - w[49] = hc_byte_perm_S (w[23], w[24], selector); - w[48] = hc_byte_perm_S (w[22], w[23], selector); - w[47] = hc_byte_perm_S (w[21], w[22], selector); - w[46] = hc_byte_perm_S (w[20], w[21], selector); - w[45] = hc_byte_perm_S (w[19], w[20], selector); - w[44] = hc_byte_perm_S (w[18], w[19], selector); - w[43] = hc_byte_perm_S (w[17], w[18], selector); - w[42] = hc_byte_perm_S (w[16], w[17], selector); - w[41] = hc_byte_perm_S (w[15], w[16], selector); - w[40] = hc_byte_perm_S (w[14], w[15], selector); - w[39] = hc_byte_perm_S (w[13], w[14], selector); - w[38] = hc_byte_perm_S (w[12], w[13], selector); - w[37] = hc_byte_perm_S (w[11], w[12], selector); - w[36] = hc_byte_perm_S (w[10], w[11], selector); - w[35] = hc_byte_perm_S (w[ 9], w[10], selector); - w[34] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[33] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[32] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[31] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[30] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[29] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[28] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[27] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[26] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[25] = hc_byte_perm_S ( 0, w[ 0], selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm_S (w[36], w[37], selector); - w[62] = hc_byte_perm_S (w[35], w[36], selector); - w[61] = hc_byte_perm_S (w[34], w[35], selector); - w[60] = hc_byte_perm_S (w[33], w[34], selector); - w[59] = hc_byte_perm_S (w[32], w[33], selector); - w[58] = hc_byte_perm_S (w[31], w[32], selector); - w[57] = hc_byte_perm_S (w[30], w[31], selector); - w[56] = hc_byte_perm_S (w[29], w[30], selector); - w[55] = hc_byte_perm_S (w[28], w[29], selector); - w[54] = hc_byte_perm_S (w[27], w[28], selector); - w[53] = hc_byte_perm_S (w[26], w[27], selector); - w[52] = hc_byte_perm_S (w[25], w[26], selector); - w[51] = hc_byte_perm_S (w[24], w[25], selector); - w[50] = hc_byte_perm_S (w[23], w[24], selector); - w[49] = hc_byte_perm_S (w[22], w[23], selector); - w[48] = hc_byte_perm_S (w[21], w[22], selector); - w[47] = hc_byte_perm_S (w[20], w[21], selector); - w[46] = hc_byte_perm_S (w[19], w[20], selector); - w[45] = hc_byte_perm_S (w[18], w[19], selector); - w[44] = hc_byte_perm_S (w[17], w[18], selector); - w[43] = hc_byte_perm_S (w[16], w[17], selector); - w[42] = hc_byte_perm_S (w[15], w[16], selector); - w[41] = hc_byte_perm_S (w[14], w[15], selector); - w[40] = hc_byte_perm_S (w[13], w[14], selector); - w[39] = hc_byte_perm_S (w[12], w[13], selector); - w[38] = hc_byte_perm_S (w[11], w[12], selector); - w[37] = hc_byte_perm_S (w[10], w[11], selector); - w[36] = hc_byte_perm_S (w[ 9], w[10], selector); - w[35] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[34] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[33] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[32] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[31] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[30] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[29] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[28] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[27] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[26] = hc_byte_perm_S ( 0, w[ 0], selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm_S (w[35], w[36], selector); - w[62] = hc_byte_perm_S (w[34], w[35], selector); - w[61] = hc_byte_perm_S (w[33], w[34], selector); - w[60] = hc_byte_perm_S (w[32], w[33], selector); - w[59] = hc_byte_perm_S (w[31], w[32], selector); - w[58] = hc_byte_perm_S (w[30], w[31], selector); - w[57] = hc_byte_perm_S (w[29], w[30], selector); - w[56] = hc_byte_perm_S (w[28], w[29], selector); - w[55] = hc_byte_perm_S (w[27], w[28], selector); - w[54] = hc_byte_perm_S (w[26], w[27], selector); - w[53] = hc_byte_perm_S (w[25], w[26], selector); - w[52] = hc_byte_perm_S (w[24], w[25], selector); - w[51] = hc_byte_perm_S (w[23], w[24], selector); - w[50] = hc_byte_perm_S (w[22], w[23], selector); - w[49] = hc_byte_perm_S (w[21], w[22], selector); - w[48] = hc_byte_perm_S (w[20], w[21], selector); - w[47] = hc_byte_perm_S (w[19], w[20], selector); - w[46] = hc_byte_perm_S (w[18], w[19], selector); - w[45] = hc_byte_perm_S (w[17], w[18], selector); - w[44] = hc_byte_perm_S (w[16], w[17], selector); - w[43] = hc_byte_perm_S (w[15], w[16], selector); - w[42] = hc_byte_perm_S (w[14], w[15], selector); - w[41] = hc_byte_perm_S (w[13], w[14], selector); - w[40] = hc_byte_perm_S (w[12], w[13], selector); - w[39] = hc_byte_perm_S (w[11], w[12], selector); - w[38] = hc_byte_perm_S (w[10], w[11], selector); - w[37] = hc_byte_perm_S (w[ 9], w[10], selector); - w[36] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[35] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[34] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[33] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[32] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[31] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[30] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[29] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[28] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[27] = hc_byte_perm_S ( 0, w[ 0], selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm_S (w[34], w[35], selector); - w[62] = hc_byte_perm_S (w[33], w[34], selector); - w[61] = hc_byte_perm_S (w[32], w[33], selector); - w[60] = hc_byte_perm_S (w[31], w[32], selector); - w[59] = hc_byte_perm_S (w[30], w[31], selector); - w[58] = hc_byte_perm_S (w[29], w[30], selector); - w[57] = hc_byte_perm_S (w[28], w[29], selector); - w[56] = hc_byte_perm_S (w[27], w[28], selector); - w[55] = hc_byte_perm_S (w[26], w[27], selector); - w[54] = hc_byte_perm_S (w[25], w[26], selector); - w[53] = hc_byte_perm_S (w[24], w[25], selector); - w[52] = hc_byte_perm_S (w[23], w[24], selector); - w[51] = hc_byte_perm_S (w[22], w[23], selector); - w[50] = hc_byte_perm_S (w[21], w[22], selector); - w[49] = hc_byte_perm_S (w[20], w[21], selector); - w[48] = hc_byte_perm_S (w[19], w[20], selector); - w[47] = hc_byte_perm_S (w[18], w[19], selector); - w[46] = hc_byte_perm_S (w[17], w[18], selector); - w[45] = hc_byte_perm_S (w[16], w[17], selector); - w[44] = hc_byte_perm_S (w[15], w[16], selector); - w[43] = hc_byte_perm_S (w[14], w[15], selector); - w[42] = hc_byte_perm_S (w[13], w[14], selector); - w[41] = hc_byte_perm_S (w[12], w[13], selector); - w[40] = hc_byte_perm_S (w[11], w[12], selector); - w[39] = hc_byte_perm_S (w[10], w[11], selector); - w[38] = hc_byte_perm_S (w[ 9], w[10], selector); - w[37] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[36] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[35] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[34] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[33] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[32] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[31] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[30] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[29] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[28] = hc_byte_perm_S ( 0, w[ 0], selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm_S (w[33], w[34], selector); - w[62] = hc_byte_perm_S (w[32], w[33], selector); - w[61] = hc_byte_perm_S (w[31], w[32], selector); - w[60] = hc_byte_perm_S (w[30], w[31], selector); - w[59] = hc_byte_perm_S (w[29], w[30], selector); - w[58] = hc_byte_perm_S (w[28], w[29], selector); - w[57] = hc_byte_perm_S (w[27], w[28], selector); - w[56] = hc_byte_perm_S (w[26], w[27], selector); - w[55] = hc_byte_perm_S (w[25], w[26], selector); - w[54] = hc_byte_perm_S (w[24], w[25], selector); - w[53] = hc_byte_perm_S (w[23], w[24], selector); - w[52] = hc_byte_perm_S (w[22], w[23], selector); - w[51] = hc_byte_perm_S (w[21], w[22], selector); - w[50] = hc_byte_perm_S (w[20], w[21], selector); - w[49] = hc_byte_perm_S (w[19], w[20], selector); - w[48] = hc_byte_perm_S (w[18], w[19], selector); - w[47] = hc_byte_perm_S (w[17], w[18], selector); - w[46] = hc_byte_perm_S (w[16], w[17], selector); - w[45] = hc_byte_perm_S (w[15], w[16], selector); - w[44] = hc_byte_perm_S (w[14], w[15], selector); - w[43] = hc_byte_perm_S (w[13], w[14], selector); - w[42] = hc_byte_perm_S (w[12], w[13], selector); - w[41] = hc_byte_perm_S (w[11], w[12], selector); - w[40] = hc_byte_perm_S (w[10], w[11], selector); - w[39] = hc_byte_perm_S (w[ 9], w[10], selector); - w[38] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[37] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[36] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[35] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[34] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[33] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[32] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[31] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[30] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[29] = hc_byte_perm_S ( 0, w[ 0], selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm_S (w[32], w[33], selector); - w[62] = hc_byte_perm_S (w[31], w[32], selector); - w[61] = hc_byte_perm_S (w[30], w[31], selector); - w[60] = hc_byte_perm_S (w[29], w[30], selector); - w[59] = hc_byte_perm_S (w[28], w[29], selector); - w[58] = hc_byte_perm_S (w[27], w[28], selector); - w[57] = hc_byte_perm_S (w[26], w[27], selector); - w[56] = hc_byte_perm_S (w[25], w[26], selector); - w[55] = hc_byte_perm_S (w[24], w[25], selector); - w[54] = hc_byte_perm_S (w[23], w[24], selector); - w[53] = hc_byte_perm_S (w[22], w[23], selector); - w[52] = hc_byte_perm_S (w[21], w[22], selector); - w[51] = hc_byte_perm_S (w[20], w[21], selector); - w[50] = hc_byte_perm_S (w[19], w[20], selector); - w[49] = hc_byte_perm_S (w[18], w[19], selector); - w[48] = hc_byte_perm_S (w[17], w[18], selector); - w[47] = hc_byte_perm_S (w[16], w[17], selector); - w[46] = hc_byte_perm_S (w[15], w[16], selector); - w[45] = hc_byte_perm_S (w[14], w[15], selector); - w[44] = hc_byte_perm_S (w[13], w[14], selector); - w[43] = hc_byte_perm_S (w[12], w[13], selector); - w[42] = hc_byte_perm_S (w[11], w[12], selector); - w[41] = hc_byte_perm_S (w[10], w[11], selector); - w[40] = hc_byte_perm_S (w[ 9], w[10], selector); - w[39] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[38] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[37] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[36] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[35] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[34] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[33] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[32] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[31] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[30] = hc_byte_perm_S ( 0, w[ 0], selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm_S (w[31], w[32], selector); - w[62] = hc_byte_perm_S (w[30], w[31], selector); - w[61] = hc_byte_perm_S (w[29], w[30], selector); - w[60] = hc_byte_perm_S (w[28], w[29], selector); - w[59] = hc_byte_perm_S (w[27], w[28], selector); - w[58] = hc_byte_perm_S (w[26], w[27], selector); - w[57] = hc_byte_perm_S (w[25], w[26], selector); - w[56] = hc_byte_perm_S (w[24], w[25], selector); - w[55] = hc_byte_perm_S (w[23], w[24], selector); - w[54] = hc_byte_perm_S (w[22], w[23], selector); - w[53] = hc_byte_perm_S (w[21], w[22], selector); - w[52] = hc_byte_perm_S (w[20], w[21], selector); - w[51] = hc_byte_perm_S (w[19], w[20], selector); - w[50] = hc_byte_perm_S (w[18], w[19], selector); - w[49] = hc_byte_perm_S (w[17], w[18], selector); - w[48] = hc_byte_perm_S (w[16], w[17], selector); - w[47] = hc_byte_perm_S (w[15], w[16], selector); - w[46] = hc_byte_perm_S (w[14], w[15], selector); - w[45] = hc_byte_perm_S (w[13], w[14], selector); - w[44] = hc_byte_perm_S (w[12], w[13], selector); - w[43] = hc_byte_perm_S (w[11], w[12], selector); - w[42] = hc_byte_perm_S (w[10], w[11], selector); - w[41] = hc_byte_perm_S (w[ 9], w[10], selector); - w[40] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[39] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[38] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[37] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[36] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[35] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[34] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[33] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[32] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[31] = hc_byte_perm_S ( 0, w[ 0], selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm_S (w[30], w[31], selector); - w[62] = hc_byte_perm_S (w[29], w[30], selector); - w[61] = hc_byte_perm_S (w[28], w[29], selector); - w[60] = hc_byte_perm_S (w[27], w[28], selector); - w[59] = hc_byte_perm_S (w[26], w[27], selector); - w[58] = hc_byte_perm_S (w[25], w[26], selector); - w[57] = hc_byte_perm_S (w[24], w[25], selector); - w[56] = hc_byte_perm_S (w[23], w[24], selector); - w[55] = hc_byte_perm_S (w[22], w[23], selector); - w[54] = hc_byte_perm_S (w[21], w[22], selector); - w[53] = hc_byte_perm_S (w[20], w[21], selector); - w[52] = hc_byte_perm_S (w[19], w[20], selector); - w[51] = hc_byte_perm_S (w[18], w[19], selector); - w[50] = hc_byte_perm_S (w[17], w[18], selector); - w[49] = hc_byte_perm_S (w[16], w[17], selector); - w[48] = hc_byte_perm_S (w[15], w[16], selector); - w[47] = hc_byte_perm_S (w[14], w[15], selector); - w[46] = hc_byte_perm_S (w[13], w[14], selector); - w[45] = hc_byte_perm_S (w[12], w[13], selector); - w[44] = hc_byte_perm_S (w[11], w[12], selector); - w[43] = hc_byte_perm_S (w[10], w[11], selector); - w[42] = hc_byte_perm_S (w[ 9], w[10], selector); - w[41] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[40] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[39] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[38] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[37] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[36] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[35] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[34] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[33] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[32] = hc_byte_perm_S ( 0, w[ 0], selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm_S (w[29], w[30], selector); - w[62] = hc_byte_perm_S (w[28], w[29], selector); - w[61] = hc_byte_perm_S (w[27], w[28], selector); - w[60] = hc_byte_perm_S (w[26], w[27], selector); - w[59] = hc_byte_perm_S (w[25], w[26], selector); - w[58] = hc_byte_perm_S (w[24], w[25], selector); - w[57] = hc_byte_perm_S (w[23], w[24], selector); - w[56] = hc_byte_perm_S (w[22], w[23], selector); - w[55] = hc_byte_perm_S (w[21], w[22], selector); - w[54] = hc_byte_perm_S (w[20], w[21], selector); - w[53] = hc_byte_perm_S (w[19], w[20], selector); - w[52] = hc_byte_perm_S (w[18], w[19], selector); - w[51] = hc_byte_perm_S (w[17], w[18], selector); - w[50] = hc_byte_perm_S (w[16], w[17], selector); - w[49] = hc_byte_perm_S (w[15], w[16], selector); - w[48] = hc_byte_perm_S (w[14], w[15], selector); - w[47] = hc_byte_perm_S (w[13], w[14], selector); - w[46] = hc_byte_perm_S (w[12], w[13], selector); - w[45] = hc_byte_perm_S (w[11], w[12], selector); - w[44] = hc_byte_perm_S (w[10], w[11], selector); - w[43] = hc_byte_perm_S (w[ 9], w[10], selector); - w[42] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[41] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[40] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[39] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[38] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[37] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[36] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[35] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[34] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[33] = hc_byte_perm_S ( 0, w[ 0], selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm_S (w[28], w[29], selector); - w[62] = hc_byte_perm_S (w[27], w[28], selector); - w[61] = hc_byte_perm_S (w[26], w[27], selector); - w[60] = hc_byte_perm_S (w[25], w[26], selector); - w[59] = hc_byte_perm_S (w[24], w[25], selector); - w[58] = hc_byte_perm_S (w[23], w[24], selector); - w[57] = hc_byte_perm_S (w[22], w[23], selector); - w[56] = hc_byte_perm_S (w[21], w[22], selector); - w[55] = hc_byte_perm_S (w[20], w[21], selector); - w[54] = hc_byte_perm_S (w[19], w[20], selector); - w[53] = hc_byte_perm_S (w[18], w[19], selector); - w[52] = hc_byte_perm_S (w[17], w[18], selector); - w[51] = hc_byte_perm_S (w[16], w[17], selector); - w[50] = hc_byte_perm_S (w[15], w[16], selector); - w[49] = hc_byte_perm_S (w[14], w[15], selector); - w[48] = hc_byte_perm_S (w[13], w[14], selector); - w[47] = hc_byte_perm_S (w[12], w[13], selector); - w[46] = hc_byte_perm_S (w[11], w[12], selector); - w[45] = hc_byte_perm_S (w[10], w[11], selector); - w[44] = hc_byte_perm_S (w[ 9], w[10], selector); - w[43] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[42] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[41] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[40] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[39] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[38] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[37] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[36] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[35] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[34] = hc_byte_perm_S ( 0, w[ 0], selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm_S (w[27], w[28], selector); - w[62] = hc_byte_perm_S (w[26], w[27], selector); - w[61] = hc_byte_perm_S (w[25], w[26], selector); - w[60] = hc_byte_perm_S (w[24], w[25], selector); - w[59] = hc_byte_perm_S (w[23], w[24], selector); - w[58] = hc_byte_perm_S (w[22], w[23], selector); - w[57] = hc_byte_perm_S (w[21], w[22], selector); - w[56] = hc_byte_perm_S (w[20], w[21], selector); - w[55] = hc_byte_perm_S (w[19], w[20], selector); - w[54] = hc_byte_perm_S (w[18], w[19], selector); - w[53] = hc_byte_perm_S (w[17], w[18], selector); - w[52] = hc_byte_perm_S (w[16], w[17], selector); - w[51] = hc_byte_perm_S (w[15], w[16], selector); - w[50] = hc_byte_perm_S (w[14], w[15], selector); - w[49] = hc_byte_perm_S (w[13], w[14], selector); - w[48] = hc_byte_perm_S (w[12], w[13], selector); - w[47] = hc_byte_perm_S (w[11], w[12], selector); - w[46] = hc_byte_perm_S (w[10], w[11], selector); - w[45] = hc_byte_perm_S (w[ 9], w[10], selector); - w[44] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[43] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[42] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[41] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[40] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[39] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[38] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[37] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[36] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[35] = hc_byte_perm_S ( 0, w[ 0], selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm_S (w[26], w[27], selector); - w[62] = hc_byte_perm_S (w[25], w[26], selector); - w[61] = hc_byte_perm_S (w[24], w[25], selector); - w[60] = hc_byte_perm_S (w[23], w[24], selector); - w[59] = hc_byte_perm_S (w[22], w[23], selector); - w[58] = hc_byte_perm_S (w[21], w[22], selector); - w[57] = hc_byte_perm_S (w[20], w[21], selector); - w[56] = hc_byte_perm_S (w[19], w[20], selector); - w[55] = hc_byte_perm_S (w[18], w[19], selector); - w[54] = hc_byte_perm_S (w[17], w[18], selector); - w[53] = hc_byte_perm_S (w[16], w[17], selector); - w[52] = hc_byte_perm_S (w[15], w[16], selector); - w[51] = hc_byte_perm_S (w[14], w[15], selector); - w[50] = hc_byte_perm_S (w[13], w[14], selector); - w[49] = hc_byte_perm_S (w[12], w[13], selector); - w[48] = hc_byte_perm_S (w[11], w[12], selector); - w[47] = hc_byte_perm_S (w[10], w[11], selector); - w[46] = hc_byte_perm_S (w[ 9], w[10], selector); - w[45] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[44] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[43] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[42] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[41] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[40] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[39] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[38] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[37] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[36] = hc_byte_perm_S ( 0, w[ 0], selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm_S (w[25], w[26], selector); - w[62] = hc_byte_perm_S (w[24], w[25], selector); - w[61] = hc_byte_perm_S (w[23], w[24], selector); - w[60] = hc_byte_perm_S (w[22], w[23], selector); - w[59] = hc_byte_perm_S (w[21], w[22], selector); - w[58] = hc_byte_perm_S (w[20], w[21], selector); - w[57] = hc_byte_perm_S (w[19], w[20], selector); - w[56] = hc_byte_perm_S (w[18], w[19], selector); - w[55] = hc_byte_perm_S (w[17], w[18], selector); - w[54] = hc_byte_perm_S (w[16], w[17], selector); - w[53] = hc_byte_perm_S (w[15], w[16], selector); - w[52] = hc_byte_perm_S (w[14], w[15], selector); - w[51] = hc_byte_perm_S (w[13], w[14], selector); - w[50] = hc_byte_perm_S (w[12], w[13], selector); - w[49] = hc_byte_perm_S (w[11], w[12], selector); - w[48] = hc_byte_perm_S (w[10], w[11], selector); - w[47] = hc_byte_perm_S (w[ 9], w[10], selector); - w[46] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[45] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[44] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[43] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[42] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[41] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[40] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[39] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[38] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[37] = hc_byte_perm_S ( 0, w[ 0], selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm_S (w[24], w[25], selector); - w[62] = hc_byte_perm_S (w[23], w[24], selector); - w[61] = hc_byte_perm_S (w[22], w[23], selector); - w[60] = hc_byte_perm_S (w[21], w[22], selector); - w[59] = hc_byte_perm_S (w[20], w[21], selector); - w[58] = hc_byte_perm_S (w[19], w[20], selector); - w[57] = hc_byte_perm_S (w[18], w[19], selector); - w[56] = hc_byte_perm_S (w[17], w[18], selector); - w[55] = hc_byte_perm_S (w[16], w[17], selector); - w[54] = hc_byte_perm_S (w[15], w[16], selector); - w[53] = hc_byte_perm_S (w[14], w[15], selector); - w[52] = hc_byte_perm_S (w[13], w[14], selector); - w[51] = hc_byte_perm_S (w[12], w[13], selector); - w[50] = hc_byte_perm_S (w[11], w[12], selector); - w[49] = hc_byte_perm_S (w[10], w[11], selector); - w[48] = hc_byte_perm_S (w[ 9], w[10], selector); - w[47] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[46] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[45] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[44] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[43] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[42] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[41] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[40] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[39] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[38] = hc_byte_perm_S ( 0, w[ 0], selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm_S (w[23], w[24], selector); - w[62] = hc_byte_perm_S (w[22], w[23], selector); - w[61] = hc_byte_perm_S (w[21], w[22], selector); - w[60] = hc_byte_perm_S (w[20], w[21], selector); - w[59] = hc_byte_perm_S (w[19], w[20], selector); - w[58] = hc_byte_perm_S (w[18], w[19], selector); - w[57] = hc_byte_perm_S (w[17], w[18], selector); - w[56] = hc_byte_perm_S (w[16], w[17], selector); - w[55] = hc_byte_perm_S (w[15], w[16], selector); - w[54] = hc_byte_perm_S (w[14], w[15], selector); - w[53] = hc_byte_perm_S (w[13], w[14], selector); - w[52] = hc_byte_perm_S (w[12], w[13], selector); - w[51] = hc_byte_perm_S (w[11], w[12], selector); - w[50] = hc_byte_perm_S (w[10], w[11], selector); - w[49] = hc_byte_perm_S (w[ 9], w[10], selector); - w[48] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[47] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[46] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[45] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[44] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[43] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[42] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[41] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[40] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[39] = hc_byte_perm_S ( 0, w[ 0], selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm_S (w[22], w[23], selector); - w[62] = hc_byte_perm_S (w[21], w[22], selector); - w[61] = hc_byte_perm_S (w[20], w[21], selector); - w[60] = hc_byte_perm_S (w[19], w[20], selector); - w[59] = hc_byte_perm_S (w[18], w[19], selector); - w[58] = hc_byte_perm_S (w[17], w[18], selector); - w[57] = hc_byte_perm_S (w[16], w[17], selector); - w[56] = hc_byte_perm_S (w[15], w[16], selector); - w[55] = hc_byte_perm_S (w[14], w[15], selector); - w[54] = hc_byte_perm_S (w[13], w[14], selector); - w[53] = hc_byte_perm_S (w[12], w[13], selector); - w[52] = hc_byte_perm_S (w[11], w[12], selector); - w[51] = hc_byte_perm_S (w[10], w[11], selector); - w[50] = hc_byte_perm_S (w[ 9], w[10], selector); - w[49] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[48] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[47] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[46] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[45] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[44] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[43] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[42] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[41] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[40] = hc_byte_perm_S ( 0, w[ 0], selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm_S (w[21], w[22], selector); - w[62] = hc_byte_perm_S (w[20], w[21], selector); - w[61] = hc_byte_perm_S (w[19], w[20], selector); - w[60] = hc_byte_perm_S (w[18], w[19], selector); - w[59] = hc_byte_perm_S (w[17], w[18], selector); - w[58] = hc_byte_perm_S (w[16], w[17], selector); - w[57] = hc_byte_perm_S (w[15], w[16], selector); - w[56] = hc_byte_perm_S (w[14], w[15], selector); - w[55] = hc_byte_perm_S (w[13], w[14], selector); - w[54] = hc_byte_perm_S (w[12], w[13], selector); - w[53] = hc_byte_perm_S (w[11], w[12], selector); - w[52] = hc_byte_perm_S (w[10], w[11], selector); - w[51] = hc_byte_perm_S (w[ 9], w[10], selector); - w[50] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[49] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[48] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[47] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[46] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[45] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[44] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[43] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[42] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[41] = hc_byte_perm_S ( 0, w[ 0], selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm_S (w[20], w[21], selector); - w[62] = hc_byte_perm_S (w[19], w[20], selector); - w[61] = hc_byte_perm_S (w[18], w[19], selector); - w[60] = hc_byte_perm_S (w[17], w[18], selector); - w[59] = hc_byte_perm_S (w[16], w[17], selector); - w[58] = hc_byte_perm_S (w[15], w[16], selector); - w[57] = hc_byte_perm_S (w[14], w[15], selector); - w[56] = hc_byte_perm_S (w[13], w[14], selector); - w[55] = hc_byte_perm_S (w[12], w[13], selector); - w[54] = hc_byte_perm_S (w[11], w[12], selector); - w[53] = hc_byte_perm_S (w[10], w[11], selector); - w[52] = hc_byte_perm_S (w[ 9], w[10], selector); - w[51] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[50] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[49] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[48] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[47] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[46] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[45] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[44] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[43] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[42] = hc_byte_perm_S ( 0, w[ 0], selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm_S (w[19], w[20], selector); - w[62] = hc_byte_perm_S (w[18], w[19], selector); - w[61] = hc_byte_perm_S (w[17], w[18], selector); - w[60] = hc_byte_perm_S (w[16], w[17], selector); - w[59] = hc_byte_perm_S (w[15], w[16], selector); - w[58] = hc_byte_perm_S (w[14], w[15], selector); - w[57] = hc_byte_perm_S (w[13], w[14], selector); - w[56] = hc_byte_perm_S (w[12], w[13], selector); - w[55] = hc_byte_perm_S (w[11], w[12], selector); - w[54] = hc_byte_perm_S (w[10], w[11], selector); - w[53] = hc_byte_perm_S (w[ 9], w[10], selector); - w[52] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[51] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[50] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[49] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[48] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[47] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[46] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[45] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[44] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[43] = hc_byte_perm_S ( 0, w[ 0], selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm_S (w[18], w[19], selector); - w[62] = hc_byte_perm_S (w[17], w[18], selector); - w[61] = hc_byte_perm_S (w[16], w[17], selector); - w[60] = hc_byte_perm_S (w[15], w[16], selector); - w[59] = hc_byte_perm_S (w[14], w[15], selector); - w[58] = hc_byte_perm_S (w[13], w[14], selector); - w[57] = hc_byte_perm_S (w[12], w[13], selector); - w[56] = hc_byte_perm_S (w[11], w[12], selector); - w[55] = hc_byte_perm_S (w[10], w[11], selector); - w[54] = hc_byte_perm_S (w[ 9], w[10], selector); - w[53] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[52] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[51] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[50] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[49] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[48] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[47] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[46] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[45] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[44] = hc_byte_perm_S ( 0, w[ 0], selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm_S (w[17], w[18], selector); - w[62] = hc_byte_perm_S (w[16], w[17], selector); - w[61] = hc_byte_perm_S (w[15], w[16], selector); - w[60] = hc_byte_perm_S (w[14], w[15], selector); - w[59] = hc_byte_perm_S (w[13], w[14], selector); - w[58] = hc_byte_perm_S (w[12], w[13], selector); - w[57] = hc_byte_perm_S (w[11], w[12], selector); - w[56] = hc_byte_perm_S (w[10], w[11], selector); - w[55] = hc_byte_perm_S (w[ 9], w[10], selector); - w[54] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[53] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[52] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[51] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[50] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[49] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[48] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[47] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[46] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[45] = hc_byte_perm_S ( 0, w[ 0], selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm_S (w[16], w[17], selector); - w[62] = hc_byte_perm_S (w[15], w[16], selector); - w[61] = hc_byte_perm_S (w[14], w[15], selector); - w[60] = hc_byte_perm_S (w[13], w[14], selector); - w[59] = hc_byte_perm_S (w[12], w[13], selector); - w[58] = hc_byte_perm_S (w[11], w[12], selector); - w[57] = hc_byte_perm_S (w[10], w[11], selector); - w[56] = hc_byte_perm_S (w[ 9], w[10], selector); - w[55] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[54] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[53] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[52] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[51] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[50] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[49] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[48] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[47] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[46] = hc_byte_perm_S ( 0, w[ 0], selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm_S (w[15], w[16], selector); - w[62] = hc_byte_perm_S (w[14], w[15], selector); - w[61] = hc_byte_perm_S (w[13], w[14], selector); - w[60] = hc_byte_perm_S (w[12], w[13], selector); - w[59] = hc_byte_perm_S (w[11], w[12], selector); - w[58] = hc_byte_perm_S (w[10], w[11], selector); - w[57] = hc_byte_perm_S (w[ 9], w[10], selector); - w[56] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[55] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[54] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[53] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[52] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[51] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[50] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[49] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[48] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[47] = hc_byte_perm_S ( 0, w[ 0], selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm_S (w[14], w[15], selector); - w[62] = hc_byte_perm_S (w[13], w[14], selector); - w[61] = hc_byte_perm_S (w[12], w[13], selector); - w[60] = hc_byte_perm_S (w[11], w[12], selector); - w[59] = hc_byte_perm_S (w[10], w[11], selector); - w[58] = hc_byte_perm_S (w[ 9], w[10], selector); - w[57] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[56] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[55] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[54] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[53] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[52] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[51] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[50] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[49] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[48] = hc_byte_perm_S ( 0, w[ 0], selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm_S (w[13], w[14], selector); - w[62] = hc_byte_perm_S (w[12], w[13], selector); - w[61] = hc_byte_perm_S (w[11], w[12], selector); - w[60] = hc_byte_perm_S (w[10], w[11], selector); - w[59] = hc_byte_perm_S (w[ 9], w[10], selector); - w[58] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[57] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[56] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[55] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[54] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[53] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[52] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[51] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[50] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[49] = hc_byte_perm_S ( 0, w[ 0], selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm_S (w[12], w[13], selector); - w[62] = hc_byte_perm_S (w[11], w[12], selector); - w[61] = hc_byte_perm_S (w[10], w[11], selector); - w[60] = hc_byte_perm_S (w[ 9], w[10], selector); - w[59] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[58] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[57] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[56] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[55] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[54] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[53] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[52] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[51] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[50] = hc_byte_perm_S ( 0, w[ 0], selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm_S (w[11], w[12], selector); - w[62] = hc_byte_perm_S (w[10], w[11], selector); - w[61] = hc_byte_perm_S (w[ 9], w[10], selector); - w[60] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[59] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[58] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[57] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[56] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[55] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[54] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[53] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[52] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[51] = hc_byte_perm_S ( 0, w[ 0], selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm_S (w[10], w[11], selector); - w[62] = hc_byte_perm_S (w[ 9], w[10], selector); - w[61] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[60] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[59] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[58] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[57] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[56] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[55] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[54] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[53] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[52] = hc_byte_perm_S ( 0, w[ 0], selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm_S (w[ 9], w[10], selector); - w[62] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[61] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[60] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[59] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[58] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[57] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[56] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[55] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[54] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[53] = hc_byte_perm_S ( 0, w[ 0], selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm_S (w[ 8], w[ 9], selector); - w[62] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[61] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[60] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[59] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[58] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[57] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[56] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[55] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[54] = hc_byte_perm_S ( 0, w[ 0], selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm_S (w[ 7], w[ 8], selector); - w[62] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[61] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[60] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[59] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[58] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[57] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[56] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[55] = hc_byte_perm_S ( 0, w[ 0], selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm_S (w[ 6], w[ 7], selector); - w[62] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[61] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[60] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[59] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[58] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[57] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[56] = hc_byte_perm_S ( 0, w[ 0], selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm_S (w[ 5], w[ 6], selector); - w[62] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[61] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[60] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[59] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[58] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[57] = hc_byte_perm_S ( 0, w[ 0], selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm_S (w[ 4], w[ 5], selector); - w[62] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[61] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[60] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[59] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[58] = hc_byte_perm_S ( 0, w[ 0], selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm_S (w[ 3], w[ 4], selector); - w[62] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[61] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[60] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[59] = hc_byte_perm_S ( 0, w[ 0], selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm_S (w[ 2], w[ 3], selector); - w[62] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[61] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[60] = hc_byte_perm_S ( 0, w[ 0], selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm_S (w[ 1], w[ 2], selector); - w[62] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[61] = hc_byte_perm_S ( 0, w[ 0], selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm_S (w[ 0], w[ 1], selector); - w[62] = hc_byte_perm_S ( 0, w[ 0], selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm_S ( 0, w[ 0], selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } DECLSPEC void switch_buffer_by_offset_1x64_be_S (PRIVATE_AS u32 *w, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -64762,4373 +37679,6 @@ DECLSPEC void switch_buffer_by_offset_1x64_be_S (PRIVATE_AS u32 *w, const u32 of break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w[63] = hc_byte_perm_S (w[63], w[62], selector); - w[62] = hc_byte_perm_S (w[62], w[61], selector); - w[61] = hc_byte_perm_S (w[61], w[60], selector); - w[60] = hc_byte_perm_S (w[60], w[59], selector); - w[59] = hc_byte_perm_S (w[59], w[58], selector); - w[58] = hc_byte_perm_S (w[58], w[57], selector); - w[57] = hc_byte_perm_S (w[57], w[56], selector); - w[56] = hc_byte_perm_S (w[56], w[55], selector); - w[55] = hc_byte_perm_S (w[55], w[54], selector); - w[54] = hc_byte_perm_S (w[54], w[53], selector); - w[53] = hc_byte_perm_S (w[53], w[52], selector); - w[52] = hc_byte_perm_S (w[52], w[51], selector); - w[51] = hc_byte_perm_S (w[51], w[50], selector); - w[50] = hc_byte_perm_S (w[50], w[49], selector); - w[49] = hc_byte_perm_S (w[49], w[48], selector); - w[48] = hc_byte_perm_S (w[48], w[47], selector); - w[47] = hc_byte_perm_S (w[47], w[46], selector); - w[46] = hc_byte_perm_S (w[46], w[45], selector); - w[45] = hc_byte_perm_S (w[45], w[44], selector); - w[44] = hc_byte_perm_S (w[44], w[43], selector); - w[43] = hc_byte_perm_S (w[43], w[42], selector); - w[42] = hc_byte_perm_S (w[42], w[41], selector); - w[41] = hc_byte_perm_S (w[41], w[40], selector); - w[40] = hc_byte_perm_S (w[40], w[39], selector); - w[39] = hc_byte_perm_S (w[39], w[38], selector); - w[38] = hc_byte_perm_S (w[38], w[37], selector); - w[37] = hc_byte_perm_S (w[37], w[36], selector); - w[36] = hc_byte_perm_S (w[36], w[35], selector); - w[35] = hc_byte_perm_S (w[35], w[34], selector); - w[34] = hc_byte_perm_S (w[34], w[33], selector); - w[33] = hc_byte_perm_S (w[33], w[32], selector); - w[32] = hc_byte_perm_S (w[32], w[31], selector); - w[31] = hc_byte_perm_S (w[31], w[30], selector); - w[30] = hc_byte_perm_S (w[30], w[29], selector); - w[29] = hc_byte_perm_S (w[29], w[28], selector); - w[28] = hc_byte_perm_S (w[28], w[27], selector); - w[27] = hc_byte_perm_S (w[27], w[26], selector); - w[26] = hc_byte_perm_S (w[26], w[25], selector); - w[25] = hc_byte_perm_S (w[25], w[24], selector); - w[24] = hc_byte_perm_S (w[24], w[23], selector); - w[23] = hc_byte_perm_S (w[23], w[22], selector); - w[22] = hc_byte_perm_S (w[22], w[21], selector); - w[21] = hc_byte_perm_S (w[21], w[20], selector); - w[20] = hc_byte_perm_S (w[20], w[19], selector); - w[19] = hc_byte_perm_S (w[19], w[18], selector); - w[18] = hc_byte_perm_S (w[18], w[17], selector); - w[17] = hc_byte_perm_S (w[17], w[16], selector); - w[16] = hc_byte_perm_S (w[16], w[15], selector); - w[15] = hc_byte_perm_S (w[15], w[14], selector); - w[14] = hc_byte_perm_S (w[14], w[13], selector); - w[13] = hc_byte_perm_S (w[13], w[12], selector); - w[12] = hc_byte_perm_S (w[12], w[11], selector); - w[11] = hc_byte_perm_S (w[11], w[10], selector); - w[10] = hc_byte_perm_S (w[10], w[ 9], selector); - w[ 9] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[ 8] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 7] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 6] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 5] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 4] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 3] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 2] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 1] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 0] = hc_byte_perm_S (w[ 0], 0, selector); - - break; - - case 1: - w[63] = hc_byte_perm_S (w[62], w[61], selector); - w[62] = hc_byte_perm_S (w[61], w[60], selector); - w[61] = hc_byte_perm_S (w[60], w[59], selector); - w[60] = hc_byte_perm_S (w[59], w[58], selector); - w[59] = hc_byte_perm_S (w[58], w[57], selector); - w[58] = hc_byte_perm_S (w[57], w[56], selector); - w[57] = hc_byte_perm_S (w[56], w[55], selector); - w[56] = hc_byte_perm_S (w[55], w[54], selector); - w[55] = hc_byte_perm_S (w[54], w[53], selector); - w[54] = hc_byte_perm_S (w[53], w[52], selector); - w[53] = hc_byte_perm_S (w[52], w[51], selector); - w[52] = hc_byte_perm_S (w[51], w[50], selector); - w[51] = hc_byte_perm_S (w[50], w[49], selector); - w[50] = hc_byte_perm_S (w[49], w[48], selector); - w[49] = hc_byte_perm_S (w[48], w[47], selector); - w[48] = hc_byte_perm_S (w[47], w[46], selector); - w[47] = hc_byte_perm_S (w[46], w[45], selector); - w[46] = hc_byte_perm_S (w[45], w[44], selector); - w[45] = hc_byte_perm_S (w[44], w[43], selector); - w[44] = hc_byte_perm_S (w[43], w[42], selector); - w[43] = hc_byte_perm_S (w[42], w[41], selector); - w[42] = hc_byte_perm_S (w[41], w[40], selector); - w[41] = hc_byte_perm_S (w[40], w[39], selector); - w[40] = hc_byte_perm_S (w[39], w[38], selector); - w[39] = hc_byte_perm_S (w[38], w[37], selector); - w[38] = hc_byte_perm_S (w[37], w[36], selector); - w[37] = hc_byte_perm_S (w[36], w[35], selector); - w[36] = hc_byte_perm_S (w[35], w[34], selector); - w[35] = hc_byte_perm_S (w[34], w[33], selector); - w[34] = hc_byte_perm_S (w[33], w[32], selector); - w[33] = hc_byte_perm_S (w[32], w[31], selector); - w[32] = hc_byte_perm_S (w[31], w[30], selector); - w[31] = hc_byte_perm_S (w[30], w[29], selector); - w[30] = hc_byte_perm_S (w[29], w[28], selector); - w[29] = hc_byte_perm_S (w[28], w[27], selector); - w[28] = hc_byte_perm_S (w[27], w[26], selector); - w[27] = hc_byte_perm_S (w[26], w[25], selector); - w[26] = hc_byte_perm_S (w[25], w[24], selector); - w[25] = hc_byte_perm_S (w[24], w[23], selector); - w[24] = hc_byte_perm_S (w[23], w[22], selector); - w[23] = hc_byte_perm_S (w[22], w[21], selector); - w[22] = hc_byte_perm_S (w[21], w[20], selector); - w[21] = hc_byte_perm_S (w[20], w[19], selector); - w[20] = hc_byte_perm_S (w[19], w[18], selector); - w[19] = hc_byte_perm_S (w[18], w[17], selector); - w[18] = hc_byte_perm_S (w[17], w[16], selector); - w[17] = hc_byte_perm_S (w[16], w[15], selector); - w[16] = hc_byte_perm_S (w[15], w[14], selector); - w[15] = hc_byte_perm_S (w[14], w[13], selector); - w[14] = hc_byte_perm_S (w[13], w[12], selector); - w[13] = hc_byte_perm_S (w[12], w[11], selector); - w[12] = hc_byte_perm_S (w[11], w[10], selector); - w[11] = hc_byte_perm_S (w[10], w[ 9], selector); - w[10] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[ 9] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 8] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 7] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 6] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 5] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 4] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 3] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 2] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 1] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 0] = 0; - - break; - - case 2: - w[63] = hc_byte_perm_S (w[61], w[60], selector); - w[62] = hc_byte_perm_S (w[60], w[59], selector); - w[61] = hc_byte_perm_S (w[59], w[58], selector); - w[60] = hc_byte_perm_S (w[58], w[57], selector); - w[59] = hc_byte_perm_S (w[57], w[56], selector); - w[58] = hc_byte_perm_S (w[56], w[55], selector); - w[57] = hc_byte_perm_S (w[55], w[54], selector); - w[56] = hc_byte_perm_S (w[54], w[53], selector); - w[55] = hc_byte_perm_S (w[53], w[52], selector); - w[54] = hc_byte_perm_S (w[52], w[51], selector); - w[53] = hc_byte_perm_S (w[51], w[50], selector); - w[52] = hc_byte_perm_S (w[50], w[49], selector); - w[51] = hc_byte_perm_S (w[49], w[48], selector); - w[50] = hc_byte_perm_S (w[48], w[47], selector); - w[49] = hc_byte_perm_S (w[47], w[46], selector); - w[48] = hc_byte_perm_S (w[46], w[45], selector); - w[47] = hc_byte_perm_S (w[45], w[44], selector); - w[46] = hc_byte_perm_S (w[44], w[43], selector); - w[45] = hc_byte_perm_S (w[43], w[42], selector); - w[44] = hc_byte_perm_S (w[42], w[41], selector); - w[43] = hc_byte_perm_S (w[41], w[40], selector); - w[42] = hc_byte_perm_S (w[40], w[39], selector); - w[41] = hc_byte_perm_S (w[39], w[38], selector); - w[40] = hc_byte_perm_S (w[38], w[37], selector); - w[39] = hc_byte_perm_S (w[37], w[36], selector); - w[38] = hc_byte_perm_S (w[36], w[35], selector); - w[37] = hc_byte_perm_S (w[35], w[34], selector); - w[36] = hc_byte_perm_S (w[34], w[33], selector); - w[35] = hc_byte_perm_S (w[33], w[32], selector); - w[34] = hc_byte_perm_S (w[32], w[31], selector); - w[33] = hc_byte_perm_S (w[31], w[30], selector); - w[32] = hc_byte_perm_S (w[30], w[29], selector); - w[31] = hc_byte_perm_S (w[29], w[28], selector); - w[30] = hc_byte_perm_S (w[28], w[27], selector); - w[29] = hc_byte_perm_S (w[27], w[26], selector); - w[28] = hc_byte_perm_S (w[26], w[25], selector); - w[27] = hc_byte_perm_S (w[25], w[24], selector); - w[26] = hc_byte_perm_S (w[24], w[23], selector); - w[25] = hc_byte_perm_S (w[23], w[22], selector); - w[24] = hc_byte_perm_S (w[22], w[21], selector); - w[23] = hc_byte_perm_S (w[21], w[20], selector); - w[22] = hc_byte_perm_S (w[20], w[19], selector); - w[21] = hc_byte_perm_S (w[19], w[18], selector); - w[20] = hc_byte_perm_S (w[18], w[17], selector); - w[19] = hc_byte_perm_S (w[17], w[16], selector); - w[18] = hc_byte_perm_S (w[16], w[15], selector); - w[17] = hc_byte_perm_S (w[15], w[14], selector); - w[16] = hc_byte_perm_S (w[14], w[13], selector); - w[15] = hc_byte_perm_S (w[13], w[12], selector); - w[14] = hc_byte_perm_S (w[12], w[11], selector); - w[13] = hc_byte_perm_S (w[11], w[10], selector); - w[12] = hc_byte_perm_S (w[10], w[ 9], selector); - w[11] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[10] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[ 9] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 8] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 7] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 6] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 5] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 4] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 3] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 2] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 3: - w[63] = hc_byte_perm_S (w[60], w[59], selector); - w[62] = hc_byte_perm_S (w[59], w[58], selector); - w[61] = hc_byte_perm_S (w[58], w[57], selector); - w[60] = hc_byte_perm_S (w[57], w[56], selector); - w[59] = hc_byte_perm_S (w[56], w[55], selector); - w[58] = hc_byte_perm_S (w[55], w[54], selector); - w[57] = hc_byte_perm_S (w[54], w[53], selector); - w[56] = hc_byte_perm_S (w[53], w[52], selector); - w[55] = hc_byte_perm_S (w[52], w[51], selector); - w[54] = hc_byte_perm_S (w[51], w[50], selector); - w[53] = hc_byte_perm_S (w[50], w[49], selector); - w[52] = hc_byte_perm_S (w[49], w[48], selector); - w[51] = hc_byte_perm_S (w[48], w[47], selector); - w[50] = hc_byte_perm_S (w[47], w[46], selector); - w[49] = hc_byte_perm_S (w[46], w[45], selector); - w[48] = hc_byte_perm_S (w[45], w[44], selector); - w[47] = hc_byte_perm_S (w[44], w[43], selector); - w[46] = hc_byte_perm_S (w[43], w[42], selector); - w[45] = hc_byte_perm_S (w[42], w[41], selector); - w[44] = hc_byte_perm_S (w[41], w[40], selector); - w[43] = hc_byte_perm_S (w[40], w[39], selector); - w[42] = hc_byte_perm_S (w[39], w[38], selector); - w[41] = hc_byte_perm_S (w[38], w[37], selector); - w[40] = hc_byte_perm_S (w[37], w[36], selector); - w[39] = hc_byte_perm_S (w[36], w[35], selector); - w[38] = hc_byte_perm_S (w[35], w[34], selector); - w[37] = hc_byte_perm_S (w[34], w[33], selector); - w[36] = hc_byte_perm_S (w[33], w[32], selector); - w[35] = hc_byte_perm_S (w[32], w[31], selector); - w[34] = hc_byte_perm_S (w[31], w[30], selector); - w[33] = hc_byte_perm_S (w[30], w[29], selector); - w[32] = hc_byte_perm_S (w[29], w[28], selector); - w[31] = hc_byte_perm_S (w[28], w[27], selector); - w[30] = hc_byte_perm_S (w[27], w[26], selector); - w[29] = hc_byte_perm_S (w[26], w[25], selector); - w[28] = hc_byte_perm_S (w[25], w[24], selector); - w[27] = hc_byte_perm_S (w[24], w[23], selector); - w[26] = hc_byte_perm_S (w[23], w[22], selector); - w[25] = hc_byte_perm_S (w[22], w[21], selector); - w[24] = hc_byte_perm_S (w[21], w[20], selector); - w[23] = hc_byte_perm_S (w[20], w[19], selector); - w[22] = hc_byte_perm_S (w[19], w[18], selector); - w[21] = hc_byte_perm_S (w[18], w[17], selector); - w[20] = hc_byte_perm_S (w[17], w[16], selector); - w[19] = hc_byte_perm_S (w[16], w[15], selector); - w[18] = hc_byte_perm_S (w[15], w[14], selector); - w[17] = hc_byte_perm_S (w[14], w[13], selector); - w[16] = hc_byte_perm_S (w[13], w[12], selector); - w[15] = hc_byte_perm_S (w[12], w[11], selector); - w[14] = hc_byte_perm_S (w[11], w[10], selector); - w[13] = hc_byte_perm_S (w[10], w[ 9], selector); - w[12] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[11] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[10] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[ 9] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 8] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 7] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 6] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 5] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 4] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 3] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 4: - w[63] = hc_byte_perm_S (w[59], w[58], selector); - w[62] = hc_byte_perm_S (w[58], w[57], selector); - w[61] = hc_byte_perm_S (w[57], w[56], selector); - w[60] = hc_byte_perm_S (w[56], w[55], selector); - w[59] = hc_byte_perm_S (w[55], w[54], selector); - w[58] = hc_byte_perm_S (w[54], w[53], selector); - w[57] = hc_byte_perm_S (w[53], w[52], selector); - w[56] = hc_byte_perm_S (w[52], w[51], selector); - w[55] = hc_byte_perm_S (w[51], w[50], selector); - w[54] = hc_byte_perm_S (w[50], w[49], selector); - w[53] = hc_byte_perm_S (w[49], w[48], selector); - w[52] = hc_byte_perm_S (w[48], w[47], selector); - w[51] = hc_byte_perm_S (w[47], w[46], selector); - w[50] = hc_byte_perm_S (w[46], w[45], selector); - w[49] = hc_byte_perm_S (w[45], w[44], selector); - w[48] = hc_byte_perm_S (w[44], w[43], selector); - w[47] = hc_byte_perm_S (w[43], w[42], selector); - w[46] = hc_byte_perm_S (w[42], w[41], selector); - w[45] = hc_byte_perm_S (w[41], w[40], selector); - w[44] = hc_byte_perm_S (w[40], w[39], selector); - w[43] = hc_byte_perm_S (w[39], w[38], selector); - w[42] = hc_byte_perm_S (w[38], w[37], selector); - w[41] = hc_byte_perm_S (w[37], w[36], selector); - w[40] = hc_byte_perm_S (w[36], w[35], selector); - w[39] = hc_byte_perm_S (w[35], w[34], selector); - w[38] = hc_byte_perm_S (w[34], w[33], selector); - w[37] = hc_byte_perm_S (w[33], w[32], selector); - w[36] = hc_byte_perm_S (w[32], w[31], selector); - w[35] = hc_byte_perm_S (w[31], w[30], selector); - w[34] = hc_byte_perm_S (w[30], w[29], selector); - w[33] = hc_byte_perm_S (w[29], w[28], selector); - w[32] = hc_byte_perm_S (w[28], w[27], selector); - w[31] = hc_byte_perm_S (w[27], w[26], selector); - w[30] = hc_byte_perm_S (w[26], w[25], selector); - w[29] = hc_byte_perm_S (w[25], w[24], selector); - w[28] = hc_byte_perm_S (w[24], w[23], selector); - w[27] = hc_byte_perm_S (w[23], w[22], selector); - w[26] = hc_byte_perm_S (w[22], w[21], selector); - w[25] = hc_byte_perm_S (w[21], w[20], selector); - w[24] = hc_byte_perm_S (w[20], w[19], selector); - w[23] = hc_byte_perm_S (w[19], w[18], selector); - w[22] = hc_byte_perm_S (w[18], w[17], selector); - w[21] = hc_byte_perm_S (w[17], w[16], selector); - w[20] = hc_byte_perm_S (w[16], w[15], selector); - w[19] = hc_byte_perm_S (w[15], w[14], selector); - w[18] = hc_byte_perm_S (w[14], w[13], selector); - w[17] = hc_byte_perm_S (w[13], w[12], selector); - w[16] = hc_byte_perm_S (w[12], w[11], selector); - w[15] = hc_byte_perm_S (w[11], w[10], selector); - w[14] = hc_byte_perm_S (w[10], w[ 9], selector); - w[13] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[12] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[11] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[10] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[ 9] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 8] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 7] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 6] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 5] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 4] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 5: - w[63] = hc_byte_perm_S (w[58], w[57], selector); - w[62] = hc_byte_perm_S (w[57], w[56], selector); - w[61] = hc_byte_perm_S (w[56], w[55], selector); - w[60] = hc_byte_perm_S (w[55], w[54], selector); - w[59] = hc_byte_perm_S (w[54], w[53], selector); - w[58] = hc_byte_perm_S (w[53], w[52], selector); - w[57] = hc_byte_perm_S (w[52], w[51], selector); - w[56] = hc_byte_perm_S (w[51], w[50], selector); - w[55] = hc_byte_perm_S (w[50], w[49], selector); - w[54] = hc_byte_perm_S (w[49], w[48], selector); - w[53] = hc_byte_perm_S (w[48], w[47], selector); - w[52] = hc_byte_perm_S (w[47], w[46], selector); - w[51] = hc_byte_perm_S (w[46], w[45], selector); - w[50] = hc_byte_perm_S (w[45], w[44], selector); - w[49] = hc_byte_perm_S (w[44], w[43], selector); - w[48] = hc_byte_perm_S (w[43], w[42], selector); - w[47] = hc_byte_perm_S (w[42], w[41], selector); - w[46] = hc_byte_perm_S (w[41], w[40], selector); - w[45] = hc_byte_perm_S (w[40], w[39], selector); - w[44] = hc_byte_perm_S (w[39], w[38], selector); - w[43] = hc_byte_perm_S (w[38], w[37], selector); - w[42] = hc_byte_perm_S (w[37], w[36], selector); - w[41] = hc_byte_perm_S (w[36], w[35], selector); - w[40] = hc_byte_perm_S (w[35], w[34], selector); - w[39] = hc_byte_perm_S (w[34], w[33], selector); - w[38] = hc_byte_perm_S (w[33], w[32], selector); - w[37] = hc_byte_perm_S (w[32], w[31], selector); - w[36] = hc_byte_perm_S (w[31], w[30], selector); - w[35] = hc_byte_perm_S (w[30], w[29], selector); - w[34] = hc_byte_perm_S (w[29], w[28], selector); - w[33] = hc_byte_perm_S (w[28], w[27], selector); - w[32] = hc_byte_perm_S (w[27], w[26], selector); - w[31] = hc_byte_perm_S (w[26], w[25], selector); - w[30] = hc_byte_perm_S (w[25], w[24], selector); - w[29] = hc_byte_perm_S (w[24], w[23], selector); - w[28] = hc_byte_perm_S (w[23], w[22], selector); - w[27] = hc_byte_perm_S (w[22], w[21], selector); - w[26] = hc_byte_perm_S (w[21], w[20], selector); - w[25] = hc_byte_perm_S (w[20], w[19], selector); - w[24] = hc_byte_perm_S (w[19], w[18], selector); - w[23] = hc_byte_perm_S (w[18], w[17], selector); - w[22] = hc_byte_perm_S (w[17], w[16], selector); - w[21] = hc_byte_perm_S (w[16], w[15], selector); - w[20] = hc_byte_perm_S (w[15], w[14], selector); - w[19] = hc_byte_perm_S (w[14], w[13], selector); - w[18] = hc_byte_perm_S (w[13], w[12], selector); - w[17] = hc_byte_perm_S (w[12], w[11], selector); - w[16] = hc_byte_perm_S (w[11], w[10], selector); - w[15] = hc_byte_perm_S (w[10], w[ 9], selector); - w[14] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[13] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[12] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[11] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[10] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[ 9] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 8] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 7] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 6] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 5] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 6: - w[63] = hc_byte_perm_S (w[57], w[56], selector); - w[62] = hc_byte_perm_S (w[56], w[55], selector); - w[61] = hc_byte_perm_S (w[55], w[54], selector); - w[60] = hc_byte_perm_S (w[54], w[53], selector); - w[59] = hc_byte_perm_S (w[53], w[52], selector); - w[58] = hc_byte_perm_S (w[52], w[51], selector); - w[57] = hc_byte_perm_S (w[51], w[50], selector); - w[56] = hc_byte_perm_S (w[50], w[49], selector); - w[55] = hc_byte_perm_S (w[49], w[48], selector); - w[54] = hc_byte_perm_S (w[48], w[47], selector); - w[53] = hc_byte_perm_S (w[47], w[46], selector); - w[52] = hc_byte_perm_S (w[46], w[45], selector); - w[51] = hc_byte_perm_S (w[45], w[44], selector); - w[50] = hc_byte_perm_S (w[44], w[43], selector); - w[49] = hc_byte_perm_S (w[43], w[42], selector); - w[48] = hc_byte_perm_S (w[42], w[41], selector); - w[47] = hc_byte_perm_S (w[41], w[40], selector); - w[46] = hc_byte_perm_S (w[40], w[39], selector); - w[45] = hc_byte_perm_S (w[39], w[38], selector); - w[44] = hc_byte_perm_S (w[38], w[37], selector); - w[43] = hc_byte_perm_S (w[37], w[36], selector); - w[42] = hc_byte_perm_S (w[36], w[35], selector); - w[41] = hc_byte_perm_S (w[35], w[34], selector); - w[40] = hc_byte_perm_S (w[34], w[33], selector); - w[39] = hc_byte_perm_S (w[33], w[32], selector); - w[38] = hc_byte_perm_S (w[32], w[31], selector); - w[37] = hc_byte_perm_S (w[31], w[30], selector); - w[36] = hc_byte_perm_S (w[30], w[29], selector); - w[35] = hc_byte_perm_S (w[29], w[28], selector); - w[34] = hc_byte_perm_S (w[28], w[27], selector); - w[33] = hc_byte_perm_S (w[27], w[26], selector); - w[32] = hc_byte_perm_S (w[26], w[25], selector); - w[31] = hc_byte_perm_S (w[25], w[24], selector); - w[30] = hc_byte_perm_S (w[24], w[23], selector); - w[29] = hc_byte_perm_S (w[23], w[22], selector); - w[28] = hc_byte_perm_S (w[22], w[21], selector); - w[27] = hc_byte_perm_S (w[21], w[20], selector); - w[26] = hc_byte_perm_S (w[20], w[19], selector); - w[25] = hc_byte_perm_S (w[19], w[18], selector); - w[24] = hc_byte_perm_S (w[18], w[17], selector); - w[23] = hc_byte_perm_S (w[17], w[16], selector); - w[22] = hc_byte_perm_S (w[16], w[15], selector); - w[21] = hc_byte_perm_S (w[15], w[14], selector); - w[20] = hc_byte_perm_S (w[14], w[13], selector); - w[19] = hc_byte_perm_S (w[13], w[12], selector); - w[18] = hc_byte_perm_S (w[12], w[11], selector); - w[17] = hc_byte_perm_S (w[11], w[10], selector); - w[16] = hc_byte_perm_S (w[10], w[ 9], selector); - w[15] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[14] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[13] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[12] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[11] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[10] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[ 9] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 8] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 7] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 6] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 7: - w[63] = hc_byte_perm_S (w[56], w[55], selector); - w[62] = hc_byte_perm_S (w[55], w[54], selector); - w[61] = hc_byte_perm_S (w[54], w[53], selector); - w[60] = hc_byte_perm_S (w[53], w[52], selector); - w[59] = hc_byte_perm_S (w[52], w[51], selector); - w[58] = hc_byte_perm_S (w[51], w[50], selector); - w[57] = hc_byte_perm_S (w[50], w[49], selector); - w[56] = hc_byte_perm_S (w[49], w[48], selector); - w[55] = hc_byte_perm_S (w[48], w[47], selector); - w[54] = hc_byte_perm_S (w[47], w[46], selector); - w[53] = hc_byte_perm_S (w[46], w[45], selector); - w[52] = hc_byte_perm_S (w[45], w[44], selector); - w[51] = hc_byte_perm_S (w[44], w[43], selector); - w[50] = hc_byte_perm_S (w[43], w[42], selector); - w[49] = hc_byte_perm_S (w[42], w[41], selector); - w[48] = hc_byte_perm_S (w[41], w[40], selector); - w[47] = hc_byte_perm_S (w[40], w[39], selector); - w[46] = hc_byte_perm_S (w[39], w[38], selector); - w[45] = hc_byte_perm_S (w[38], w[37], selector); - w[44] = hc_byte_perm_S (w[37], w[36], selector); - w[43] = hc_byte_perm_S (w[36], w[35], selector); - w[42] = hc_byte_perm_S (w[35], w[34], selector); - w[41] = hc_byte_perm_S (w[34], w[33], selector); - w[40] = hc_byte_perm_S (w[33], w[32], selector); - w[39] = hc_byte_perm_S (w[32], w[31], selector); - w[38] = hc_byte_perm_S (w[31], w[30], selector); - w[37] = hc_byte_perm_S (w[30], w[29], selector); - w[36] = hc_byte_perm_S (w[29], w[28], selector); - w[35] = hc_byte_perm_S (w[28], w[27], selector); - w[34] = hc_byte_perm_S (w[27], w[26], selector); - w[33] = hc_byte_perm_S (w[26], w[25], selector); - w[32] = hc_byte_perm_S (w[25], w[24], selector); - w[31] = hc_byte_perm_S (w[24], w[23], selector); - w[30] = hc_byte_perm_S (w[23], w[22], selector); - w[29] = hc_byte_perm_S (w[22], w[21], selector); - w[28] = hc_byte_perm_S (w[21], w[20], selector); - w[27] = hc_byte_perm_S (w[20], w[19], selector); - w[26] = hc_byte_perm_S (w[19], w[18], selector); - w[25] = hc_byte_perm_S (w[18], w[17], selector); - w[24] = hc_byte_perm_S (w[17], w[16], selector); - w[23] = hc_byte_perm_S (w[16], w[15], selector); - w[22] = hc_byte_perm_S (w[15], w[14], selector); - w[21] = hc_byte_perm_S (w[14], w[13], selector); - w[20] = hc_byte_perm_S (w[13], w[12], selector); - w[19] = hc_byte_perm_S (w[12], w[11], selector); - w[18] = hc_byte_perm_S (w[11], w[10], selector); - w[17] = hc_byte_perm_S (w[10], w[ 9], selector); - w[16] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[15] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[14] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[13] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[12] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[11] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[10] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[ 9] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 8] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 7] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 8: - w[63] = hc_byte_perm_S (w[55], w[54], selector); - w[62] = hc_byte_perm_S (w[54], w[53], selector); - w[61] = hc_byte_perm_S (w[53], w[52], selector); - w[60] = hc_byte_perm_S (w[52], w[51], selector); - w[59] = hc_byte_perm_S (w[51], w[50], selector); - w[58] = hc_byte_perm_S (w[50], w[49], selector); - w[57] = hc_byte_perm_S (w[49], w[48], selector); - w[56] = hc_byte_perm_S (w[48], w[47], selector); - w[55] = hc_byte_perm_S (w[47], w[46], selector); - w[54] = hc_byte_perm_S (w[46], w[45], selector); - w[53] = hc_byte_perm_S (w[45], w[44], selector); - w[52] = hc_byte_perm_S (w[44], w[43], selector); - w[51] = hc_byte_perm_S (w[43], w[42], selector); - w[50] = hc_byte_perm_S (w[42], w[41], selector); - w[49] = hc_byte_perm_S (w[41], w[40], selector); - w[48] = hc_byte_perm_S (w[40], w[39], selector); - w[47] = hc_byte_perm_S (w[39], w[38], selector); - w[46] = hc_byte_perm_S (w[38], w[37], selector); - w[45] = hc_byte_perm_S (w[37], w[36], selector); - w[44] = hc_byte_perm_S (w[36], w[35], selector); - w[43] = hc_byte_perm_S (w[35], w[34], selector); - w[42] = hc_byte_perm_S (w[34], w[33], selector); - w[41] = hc_byte_perm_S (w[33], w[32], selector); - w[40] = hc_byte_perm_S (w[32], w[31], selector); - w[39] = hc_byte_perm_S (w[31], w[30], selector); - w[38] = hc_byte_perm_S (w[30], w[29], selector); - w[37] = hc_byte_perm_S (w[29], w[28], selector); - w[36] = hc_byte_perm_S (w[28], w[27], selector); - w[35] = hc_byte_perm_S (w[27], w[26], selector); - w[34] = hc_byte_perm_S (w[26], w[25], selector); - w[33] = hc_byte_perm_S (w[25], w[24], selector); - w[32] = hc_byte_perm_S (w[24], w[23], selector); - w[31] = hc_byte_perm_S (w[23], w[22], selector); - w[30] = hc_byte_perm_S (w[22], w[21], selector); - w[29] = hc_byte_perm_S (w[21], w[20], selector); - w[28] = hc_byte_perm_S (w[20], w[19], selector); - w[27] = hc_byte_perm_S (w[19], w[18], selector); - w[26] = hc_byte_perm_S (w[18], w[17], selector); - w[25] = hc_byte_perm_S (w[17], w[16], selector); - w[24] = hc_byte_perm_S (w[16], w[15], selector); - w[23] = hc_byte_perm_S (w[15], w[14], selector); - w[22] = hc_byte_perm_S (w[14], w[13], selector); - w[21] = hc_byte_perm_S (w[13], w[12], selector); - w[20] = hc_byte_perm_S (w[12], w[11], selector); - w[19] = hc_byte_perm_S (w[11], w[10], selector); - w[18] = hc_byte_perm_S (w[10], w[ 9], selector); - w[17] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[16] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[15] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[14] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[13] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[12] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[11] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[10] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[ 9] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 8] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 9: - w[63] = hc_byte_perm_S (w[54], w[53], selector); - w[62] = hc_byte_perm_S (w[53], w[52], selector); - w[61] = hc_byte_perm_S (w[52], w[51], selector); - w[60] = hc_byte_perm_S (w[51], w[50], selector); - w[59] = hc_byte_perm_S (w[50], w[49], selector); - w[58] = hc_byte_perm_S (w[49], w[48], selector); - w[57] = hc_byte_perm_S (w[48], w[47], selector); - w[56] = hc_byte_perm_S (w[47], w[46], selector); - w[55] = hc_byte_perm_S (w[46], w[45], selector); - w[54] = hc_byte_perm_S (w[45], w[44], selector); - w[53] = hc_byte_perm_S (w[44], w[43], selector); - w[52] = hc_byte_perm_S (w[43], w[42], selector); - w[51] = hc_byte_perm_S (w[42], w[41], selector); - w[50] = hc_byte_perm_S (w[41], w[40], selector); - w[49] = hc_byte_perm_S (w[40], w[39], selector); - w[48] = hc_byte_perm_S (w[39], w[38], selector); - w[47] = hc_byte_perm_S (w[38], w[37], selector); - w[46] = hc_byte_perm_S (w[37], w[36], selector); - w[45] = hc_byte_perm_S (w[36], w[35], selector); - w[44] = hc_byte_perm_S (w[35], w[34], selector); - w[43] = hc_byte_perm_S (w[34], w[33], selector); - w[42] = hc_byte_perm_S (w[33], w[32], selector); - w[41] = hc_byte_perm_S (w[32], w[31], selector); - w[40] = hc_byte_perm_S (w[31], w[30], selector); - w[39] = hc_byte_perm_S (w[30], w[29], selector); - w[38] = hc_byte_perm_S (w[29], w[28], selector); - w[37] = hc_byte_perm_S (w[28], w[27], selector); - w[36] = hc_byte_perm_S (w[27], w[26], selector); - w[35] = hc_byte_perm_S (w[26], w[25], selector); - w[34] = hc_byte_perm_S (w[25], w[24], selector); - w[33] = hc_byte_perm_S (w[24], w[23], selector); - w[32] = hc_byte_perm_S (w[23], w[22], selector); - w[31] = hc_byte_perm_S (w[22], w[21], selector); - w[30] = hc_byte_perm_S (w[21], w[20], selector); - w[29] = hc_byte_perm_S (w[20], w[19], selector); - w[28] = hc_byte_perm_S (w[19], w[18], selector); - w[27] = hc_byte_perm_S (w[18], w[17], selector); - w[26] = hc_byte_perm_S (w[17], w[16], selector); - w[25] = hc_byte_perm_S (w[16], w[15], selector); - w[24] = hc_byte_perm_S (w[15], w[14], selector); - w[23] = hc_byte_perm_S (w[14], w[13], selector); - w[22] = hc_byte_perm_S (w[13], w[12], selector); - w[21] = hc_byte_perm_S (w[12], w[11], selector); - w[20] = hc_byte_perm_S (w[11], w[10], selector); - w[19] = hc_byte_perm_S (w[10], w[ 9], selector); - w[18] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[17] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[16] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[15] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[14] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[13] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[12] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[11] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[10] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[ 9] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 10: - w[63] = hc_byte_perm_S (w[53], w[52], selector); - w[62] = hc_byte_perm_S (w[52], w[51], selector); - w[61] = hc_byte_perm_S (w[51], w[50], selector); - w[60] = hc_byte_perm_S (w[50], w[49], selector); - w[59] = hc_byte_perm_S (w[49], w[48], selector); - w[58] = hc_byte_perm_S (w[48], w[47], selector); - w[57] = hc_byte_perm_S (w[47], w[46], selector); - w[56] = hc_byte_perm_S (w[46], w[45], selector); - w[55] = hc_byte_perm_S (w[45], w[44], selector); - w[54] = hc_byte_perm_S (w[44], w[43], selector); - w[53] = hc_byte_perm_S (w[43], w[42], selector); - w[52] = hc_byte_perm_S (w[42], w[41], selector); - w[51] = hc_byte_perm_S (w[41], w[40], selector); - w[50] = hc_byte_perm_S (w[40], w[39], selector); - w[49] = hc_byte_perm_S (w[39], w[38], selector); - w[48] = hc_byte_perm_S (w[38], w[37], selector); - w[47] = hc_byte_perm_S (w[37], w[36], selector); - w[46] = hc_byte_perm_S (w[36], w[35], selector); - w[45] = hc_byte_perm_S (w[35], w[34], selector); - w[44] = hc_byte_perm_S (w[34], w[33], selector); - w[43] = hc_byte_perm_S (w[33], w[32], selector); - w[42] = hc_byte_perm_S (w[32], w[31], selector); - w[41] = hc_byte_perm_S (w[31], w[30], selector); - w[40] = hc_byte_perm_S (w[30], w[29], selector); - w[39] = hc_byte_perm_S (w[29], w[28], selector); - w[38] = hc_byte_perm_S (w[28], w[27], selector); - w[37] = hc_byte_perm_S (w[27], w[26], selector); - w[36] = hc_byte_perm_S (w[26], w[25], selector); - w[35] = hc_byte_perm_S (w[25], w[24], selector); - w[34] = hc_byte_perm_S (w[24], w[23], selector); - w[33] = hc_byte_perm_S (w[23], w[22], selector); - w[32] = hc_byte_perm_S (w[22], w[21], selector); - w[31] = hc_byte_perm_S (w[21], w[20], selector); - w[30] = hc_byte_perm_S (w[20], w[19], selector); - w[29] = hc_byte_perm_S (w[19], w[18], selector); - w[28] = hc_byte_perm_S (w[18], w[17], selector); - w[27] = hc_byte_perm_S (w[17], w[16], selector); - w[26] = hc_byte_perm_S (w[16], w[15], selector); - w[25] = hc_byte_perm_S (w[15], w[14], selector); - w[24] = hc_byte_perm_S (w[14], w[13], selector); - w[23] = hc_byte_perm_S (w[13], w[12], selector); - w[22] = hc_byte_perm_S (w[12], w[11], selector); - w[21] = hc_byte_perm_S (w[11], w[10], selector); - w[20] = hc_byte_perm_S (w[10], w[ 9], selector); - w[19] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[18] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[17] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[16] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[15] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[14] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[13] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[12] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[11] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[10] = hc_byte_perm_S (w[ 0], 0, selector); - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 11: - w[63] = hc_byte_perm_S (w[52], w[51], selector); - w[62] = hc_byte_perm_S (w[51], w[50], selector); - w[61] = hc_byte_perm_S (w[50], w[49], selector); - w[60] = hc_byte_perm_S (w[49], w[48], selector); - w[59] = hc_byte_perm_S (w[48], w[47], selector); - w[58] = hc_byte_perm_S (w[47], w[46], selector); - w[57] = hc_byte_perm_S (w[46], w[45], selector); - w[56] = hc_byte_perm_S (w[45], w[44], selector); - w[55] = hc_byte_perm_S (w[44], w[43], selector); - w[54] = hc_byte_perm_S (w[43], w[42], selector); - w[53] = hc_byte_perm_S (w[42], w[41], selector); - w[52] = hc_byte_perm_S (w[41], w[40], selector); - w[51] = hc_byte_perm_S (w[40], w[39], selector); - w[50] = hc_byte_perm_S (w[39], w[38], selector); - w[49] = hc_byte_perm_S (w[38], w[37], selector); - w[48] = hc_byte_perm_S (w[37], w[36], selector); - w[47] = hc_byte_perm_S (w[36], w[35], selector); - w[46] = hc_byte_perm_S (w[35], w[34], selector); - w[45] = hc_byte_perm_S (w[34], w[33], selector); - w[44] = hc_byte_perm_S (w[33], w[32], selector); - w[43] = hc_byte_perm_S (w[32], w[31], selector); - w[42] = hc_byte_perm_S (w[31], w[30], selector); - w[41] = hc_byte_perm_S (w[30], w[29], selector); - w[40] = hc_byte_perm_S (w[29], w[28], selector); - w[39] = hc_byte_perm_S (w[28], w[27], selector); - w[38] = hc_byte_perm_S (w[27], w[26], selector); - w[37] = hc_byte_perm_S (w[26], w[25], selector); - w[36] = hc_byte_perm_S (w[25], w[24], selector); - w[35] = hc_byte_perm_S (w[24], w[23], selector); - w[34] = hc_byte_perm_S (w[23], w[22], selector); - w[33] = hc_byte_perm_S (w[22], w[21], selector); - w[32] = hc_byte_perm_S (w[21], w[20], selector); - w[31] = hc_byte_perm_S (w[20], w[19], selector); - w[30] = hc_byte_perm_S (w[19], w[18], selector); - w[29] = hc_byte_perm_S (w[18], w[17], selector); - w[28] = hc_byte_perm_S (w[17], w[16], selector); - w[27] = hc_byte_perm_S (w[16], w[15], selector); - w[26] = hc_byte_perm_S (w[15], w[14], selector); - w[25] = hc_byte_perm_S (w[14], w[13], selector); - w[24] = hc_byte_perm_S (w[13], w[12], selector); - w[23] = hc_byte_perm_S (w[12], w[11], selector); - w[22] = hc_byte_perm_S (w[11], w[10], selector); - w[21] = hc_byte_perm_S (w[10], w[ 9], selector); - w[20] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[19] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[18] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[17] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[16] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[15] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[14] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[13] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[12] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[11] = hc_byte_perm_S (w[ 0], 0, selector); - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 12: - w[63] = hc_byte_perm_S (w[51], w[50], selector); - w[62] = hc_byte_perm_S (w[50], w[49], selector); - w[61] = hc_byte_perm_S (w[49], w[48], selector); - w[60] = hc_byte_perm_S (w[48], w[47], selector); - w[59] = hc_byte_perm_S (w[47], w[46], selector); - w[58] = hc_byte_perm_S (w[46], w[45], selector); - w[57] = hc_byte_perm_S (w[45], w[44], selector); - w[56] = hc_byte_perm_S (w[44], w[43], selector); - w[55] = hc_byte_perm_S (w[43], w[42], selector); - w[54] = hc_byte_perm_S (w[42], w[41], selector); - w[53] = hc_byte_perm_S (w[41], w[40], selector); - w[52] = hc_byte_perm_S (w[40], w[39], selector); - w[51] = hc_byte_perm_S (w[39], w[38], selector); - w[50] = hc_byte_perm_S (w[38], w[37], selector); - w[49] = hc_byte_perm_S (w[37], w[36], selector); - w[48] = hc_byte_perm_S (w[36], w[35], selector); - w[47] = hc_byte_perm_S (w[35], w[34], selector); - w[46] = hc_byte_perm_S (w[34], w[33], selector); - w[45] = hc_byte_perm_S (w[33], w[32], selector); - w[44] = hc_byte_perm_S (w[32], w[31], selector); - w[43] = hc_byte_perm_S (w[31], w[30], selector); - w[42] = hc_byte_perm_S (w[30], w[29], selector); - w[41] = hc_byte_perm_S (w[29], w[28], selector); - w[40] = hc_byte_perm_S (w[28], w[27], selector); - w[39] = hc_byte_perm_S (w[27], w[26], selector); - w[38] = hc_byte_perm_S (w[26], w[25], selector); - w[37] = hc_byte_perm_S (w[25], w[24], selector); - w[36] = hc_byte_perm_S (w[24], w[23], selector); - w[35] = hc_byte_perm_S (w[23], w[22], selector); - w[34] = hc_byte_perm_S (w[22], w[21], selector); - w[33] = hc_byte_perm_S (w[21], w[20], selector); - w[32] = hc_byte_perm_S (w[20], w[19], selector); - w[31] = hc_byte_perm_S (w[19], w[18], selector); - w[30] = hc_byte_perm_S (w[18], w[17], selector); - w[29] = hc_byte_perm_S (w[17], w[16], selector); - w[28] = hc_byte_perm_S (w[16], w[15], selector); - w[27] = hc_byte_perm_S (w[15], w[14], selector); - w[26] = hc_byte_perm_S (w[14], w[13], selector); - w[25] = hc_byte_perm_S (w[13], w[12], selector); - w[24] = hc_byte_perm_S (w[12], w[11], selector); - w[23] = hc_byte_perm_S (w[11], w[10], selector); - w[22] = hc_byte_perm_S (w[10], w[ 9], selector); - w[21] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[20] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[19] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[18] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[17] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[16] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[15] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[14] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[13] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[12] = hc_byte_perm_S (w[ 0], 0, selector); - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 13: - w[63] = hc_byte_perm_S (w[50], w[49], selector); - w[62] = hc_byte_perm_S (w[49], w[48], selector); - w[61] = hc_byte_perm_S (w[48], w[47], selector); - w[60] = hc_byte_perm_S (w[47], w[46], selector); - w[59] = hc_byte_perm_S (w[46], w[45], selector); - w[58] = hc_byte_perm_S (w[45], w[44], selector); - w[57] = hc_byte_perm_S (w[44], w[43], selector); - w[56] = hc_byte_perm_S (w[43], w[42], selector); - w[55] = hc_byte_perm_S (w[42], w[41], selector); - w[54] = hc_byte_perm_S (w[41], w[40], selector); - w[53] = hc_byte_perm_S (w[40], w[39], selector); - w[52] = hc_byte_perm_S (w[39], w[38], selector); - w[51] = hc_byte_perm_S (w[38], w[37], selector); - w[50] = hc_byte_perm_S (w[37], w[36], selector); - w[49] = hc_byte_perm_S (w[36], w[35], selector); - w[48] = hc_byte_perm_S (w[35], w[34], selector); - w[47] = hc_byte_perm_S (w[34], w[33], selector); - w[46] = hc_byte_perm_S (w[33], w[32], selector); - w[45] = hc_byte_perm_S (w[32], w[31], selector); - w[44] = hc_byte_perm_S (w[31], w[30], selector); - w[43] = hc_byte_perm_S (w[30], w[29], selector); - w[42] = hc_byte_perm_S (w[29], w[28], selector); - w[41] = hc_byte_perm_S (w[28], w[27], selector); - w[40] = hc_byte_perm_S (w[27], w[26], selector); - w[39] = hc_byte_perm_S (w[26], w[25], selector); - w[38] = hc_byte_perm_S (w[25], w[24], selector); - w[37] = hc_byte_perm_S (w[24], w[23], selector); - w[36] = hc_byte_perm_S (w[23], w[22], selector); - w[35] = hc_byte_perm_S (w[22], w[21], selector); - w[34] = hc_byte_perm_S (w[21], w[20], selector); - w[33] = hc_byte_perm_S (w[20], w[19], selector); - w[32] = hc_byte_perm_S (w[19], w[18], selector); - w[31] = hc_byte_perm_S (w[18], w[17], selector); - w[30] = hc_byte_perm_S (w[17], w[16], selector); - w[29] = hc_byte_perm_S (w[16], w[15], selector); - w[28] = hc_byte_perm_S (w[15], w[14], selector); - w[27] = hc_byte_perm_S (w[14], w[13], selector); - w[26] = hc_byte_perm_S (w[13], w[12], selector); - w[25] = hc_byte_perm_S (w[12], w[11], selector); - w[24] = hc_byte_perm_S (w[11], w[10], selector); - w[23] = hc_byte_perm_S (w[10], w[ 9], selector); - w[22] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[21] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[20] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[19] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[18] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[17] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[16] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[15] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[14] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[13] = hc_byte_perm_S (w[ 0], 0, selector); - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 14: - w[63] = hc_byte_perm_S (w[49], w[48], selector); - w[62] = hc_byte_perm_S (w[48], w[47], selector); - w[61] = hc_byte_perm_S (w[47], w[46], selector); - w[60] = hc_byte_perm_S (w[46], w[45], selector); - w[59] = hc_byte_perm_S (w[45], w[44], selector); - w[58] = hc_byte_perm_S (w[44], w[43], selector); - w[57] = hc_byte_perm_S (w[43], w[42], selector); - w[56] = hc_byte_perm_S (w[42], w[41], selector); - w[55] = hc_byte_perm_S (w[41], w[40], selector); - w[54] = hc_byte_perm_S (w[40], w[39], selector); - w[53] = hc_byte_perm_S (w[39], w[38], selector); - w[52] = hc_byte_perm_S (w[38], w[37], selector); - w[51] = hc_byte_perm_S (w[37], w[36], selector); - w[50] = hc_byte_perm_S (w[36], w[35], selector); - w[49] = hc_byte_perm_S (w[35], w[34], selector); - w[48] = hc_byte_perm_S (w[34], w[33], selector); - w[47] = hc_byte_perm_S (w[33], w[32], selector); - w[46] = hc_byte_perm_S (w[32], w[31], selector); - w[45] = hc_byte_perm_S (w[31], w[30], selector); - w[44] = hc_byte_perm_S (w[30], w[29], selector); - w[43] = hc_byte_perm_S (w[29], w[28], selector); - w[42] = hc_byte_perm_S (w[28], w[27], selector); - w[41] = hc_byte_perm_S (w[27], w[26], selector); - w[40] = hc_byte_perm_S (w[26], w[25], selector); - w[39] = hc_byte_perm_S (w[25], w[24], selector); - w[38] = hc_byte_perm_S (w[24], w[23], selector); - w[37] = hc_byte_perm_S (w[23], w[22], selector); - w[36] = hc_byte_perm_S (w[22], w[21], selector); - w[35] = hc_byte_perm_S (w[21], w[20], selector); - w[34] = hc_byte_perm_S (w[20], w[19], selector); - w[33] = hc_byte_perm_S (w[19], w[18], selector); - w[32] = hc_byte_perm_S (w[18], w[17], selector); - w[31] = hc_byte_perm_S (w[17], w[16], selector); - w[30] = hc_byte_perm_S (w[16], w[15], selector); - w[29] = hc_byte_perm_S (w[15], w[14], selector); - w[28] = hc_byte_perm_S (w[14], w[13], selector); - w[27] = hc_byte_perm_S (w[13], w[12], selector); - w[26] = hc_byte_perm_S (w[12], w[11], selector); - w[25] = hc_byte_perm_S (w[11], w[10], selector); - w[24] = hc_byte_perm_S (w[10], w[ 9], selector); - w[23] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[22] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[21] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[20] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[19] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[18] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[17] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[16] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[15] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[14] = hc_byte_perm_S (w[ 0], 0, selector); - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 15: - w[63] = hc_byte_perm_S (w[48], w[47], selector); - w[62] = hc_byte_perm_S (w[47], w[46], selector); - w[61] = hc_byte_perm_S (w[46], w[45], selector); - w[60] = hc_byte_perm_S (w[45], w[44], selector); - w[59] = hc_byte_perm_S (w[44], w[43], selector); - w[58] = hc_byte_perm_S (w[43], w[42], selector); - w[57] = hc_byte_perm_S (w[42], w[41], selector); - w[56] = hc_byte_perm_S (w[41], w[40], selector); - w[55] = hc_byte_perm_S (w[40], w[39], selector); - w[54] = hc_byte_perm_S (w[39], w[38], selector); - w[53] = hc_byte_perm_S (w[38], w[37], selector); - w[52] = hc_byte_perm_S (w[37], w[36], selector); - w[51] = hc_byte_perm_S (w[36], w[35], selector); - w[50] = hc_byte_perm_S (w[35], w[34], selector); - w[49] = hc_byte_perm_S (w[34], w[33], selector); - w[48] = hc_byte_perm_S (w[33], w[32], selector); - w[47] = hc_byte_perm_S (w[32], w[31], selector); - w[46] = hc_byte_perm_S (w[31], w[30], selector); - w[45] = hc_byte_perm_S (w[30], w[29], selector); - w[44] = hc_byte_perm_S (w[29], w[28], selector); - w[43] = hc_byte_perm_S (w[28], w[27], selector); - w[42] = hc_byte_perm_S (w[27], w[26], selector); - w[41] = hc_byte_perm_S (w[26], w[25], selector); - w[40] = hc_byte_perm_S (w[25], w[24], selector); - w[39] = hc_byte_perm_S (w[24], w[23], selector); - w[38] = hc_byte_perm_S (w[23], w[22], selector); - w[37] = hc_byte_perm_S (w[22], w[21], selector); - w[36] = hc_byte_perm_S (w[21], w[20], selector); - w[35] = hc_byte_perm_S (w[20], w[19], selector); - w[34] = hc_byte_perm_S (w[19], w[18], selector); - w[33] = hc_byte_perm_S (w[18], w[17], selector); - w[32] = hc_byte_perm_S (w[17], w[16], selector); - w[31] = hc_byte_perm_S (w[16], w[15], selector); - w[30] = hc_byte_perm_S (w[15], w[14], selector); - w[29] = hc_byte_perm_S (w[14], w[13], selector); - w[28] = hc_byte_perm_S (w[13], w[12], selector); - w[27] = hc_byte_perm_S (w[12], w[11], selector); - w[26] = hc_byte_perm_S (w[11], w[10], selector); - w[25] = hc_byte_perm_S (w[10], w[ 9], selector); - w[24] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[23] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[22] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[21] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[20] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[19] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[18] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[17] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[16] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[15] = hc_byte_perm_S (w[ 0], 0, selector); - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 16: - w[63] = hc_byte_perm_S (w[47], w[46], selector); - w[62] = hc_byte_perm_S (w[46], w[45], selector); - w[61] = hc_byte_perm_S (w[45], w[44], selector); - w[60] = hc_byte_perm_S (w[44], w[43], selector); - w[59] = hc_byte_perm_S (w[43], w[42], selector); - w[58] = hc_byte_perm_S (w[42], w[41], selector); - w[57] = hc_byte_perm_S (w[41], w[40], selector); - w[56] = hc_byte_perm_S (w[40], w[39], selector); - w[55] = hc_byte_perm_S (w[39], w[38], selector); - w[54] = hc_byte_perm_S (w[38], w[37], selector); - w[53] = hc_byte_perm_S (w[37], w[36], selector); - w[52] = hc_byte_perm_S (w[36], w[35], selector); - w[51] = hc_byte_perm_S (w[35], w[34], selector); - w[50] = hc_byte_perm_S (w[34], w[33], selector); - w[49] = hc_byte_perm_S (w[33], w[32], selector); - w[48] = hc_byte_perm_S (w[32], w[31], selector); - w[47] = hc_byte_perm_S (w[31], w[30], selector); - w[46] = hc_byte_perm_S (w[30], w[29], selector); - w[45] = hc_byte_perm_S (w[29], w[28], selector); - w[44] = hc_byte_perm_S (w[28], w[27], selector); - w[43] = hc_byte_perm_S (w[27], w[26], selector); - w[42] = hc_byte_perm_S (w[26], w[25], selector); - w[41] = hc_byte_perm_S (w[25], w[24], selector); - w[40] = hc_byte_perm_S (w[24], w[23], selector); - w[39] = hc_byte_perm_S (w[23], w[22], selector); - w[38] = hc_byte_perm_S (w[22], w[21], selector); - w[37] = hc_byte_perm_S (w[21], w[20], selector); - w[36] = hc_byte_perm_S (w[20], w[19], selector); - w[35] = hc_byte_perm_S (w[19], w[18], selector); - w[34] = hc_byte_perm_S (w[18], w[17], selector); - w[33] = hc_byte_perm_S (w[17], w[16], selector); - w[32] = hc_byte_perm_S (w[16], w[15], selector); - w[31] = hc_byte_perm_S (w[15], w[14], selector); - w[30] = hc_byte_perm_S (w[14], w[13], selector); - w[29] = hc_byte_perm_S (w[13], w[12], selector); - w[28] = hc_byte_perm_S (w[12], w[11], selector); - w[27] = hc_byte_perm_S (w[11], w[10], selector); - w[26] = hc_byte_perm_S (w[10], w[ 9], selector); - w[25] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[24] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[23] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[22] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[21] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[20] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[19] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[18] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[17] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[16] = hc_byte_perm_S (w[ 0], 0, selector); - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 17: - w[63] = hc_byte_perm_S (w[46], w[45], selector); - w[62] = hc_byte_perm_S (w[45], w[44], selector); - w[61] = hc_byte_perm_S (w[44], w[43], selector); - w[60] = hc_byte_perm_S (w[43], w[42], selector); - w[59] = hc_byte_perm_S (w[42], w[41], selector); - w[58] = hc_byte_perm_S (w[41], w[40], selector); - w[57] = hc_byte_perm_S (w[40], w[39], selector); - w[56] = hc_byte_perm_S (w[39], w[38], selector); - w[55] = hc_byte_perm_S (w[38], w[37], selector); - w[54] = hc_byte_perm_S (w[37], w[36], selector); - w[53] = hc_byte_perm_S (w[36], w[35], selector); - w[52] = hc_byte_perm_S (w[35], w[34], selector); - w[51] = hc_byte_perm_S (w[34], w[33], selector); - w[50] = hc_byte_perm_S (w[33], w[32], selector); - w[49] = hc_byte_perm_S (w[32], w[31], selector); - w[48] = hc_byte_perm_S (w[31], w[30], selector); - w[47] = hc_byte_perm_S (w[30], w[29], selector); - w[46] = hc_byte_perm_S (w[29], w[28], selector); - w[45] = hc_byte_perm_S (w[28], w[27], selector); - w[44] = hc_byte_perm_S (w[27], w[26], selector); - w[43] = hc_byte_perm_S (w[26], w[25], selector); - w[42] = hc_byte_perm_S (w[25], w[24], selector); - w[41] = hc_byte_perm_S (w[24], w[23], selector); - w[40] = hc_byte_perm_S (w[23], w[22], selector); - w[39] = hc_byte_perm_S (w[22], w[21], selector); - w[38] = hc_byte_perm_S (w[21], w[20], selector); - w[37] = hc_byte_perm_S (w[20], w[19], selector); - w[36] = hc_byte_perm_S (w[19], w[18], selector); - w[35] = hc_byte_perm_S (w[18], w[17], selector); - w[34] = hc_byte_perm_S (w[17], w[16], selector); - w[33] = hc_byte_perm_S (w[16], w[15], selector); - w[32] = hc_byte_perm_S (w[15], w[14], selector); - w[31] = hc_byte_perm_S (w[14], w[13], selector); - w[30] = hc_byte_perm_S (w[13], w[12], selector); - w[29] = hc_byte_perm_S (w[12], w[11], selector); - w[28] = hc_byte_perm_S (w[11], w[10], selector); - w[27] = hc_byte_perm_S (w[10], w[ 9], selector); - w[26] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[25] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[24] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[23] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[22] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[21] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[20] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[19] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[18] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[17] = hc_byte_perm_S (w[ 0], 0, selector); - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 18: - w[63] = hc_byte_perm_S (w[45], w[44], selector); - w[62] = hc_byte_perm_S (w[44], w[43], selector); - w[61] = hc_byte_perm_S (w[43], w[42], selector); - w[60] = hc_byte_perm_S (w[42], w[41], selector); - w[59] = hc_byte_perm_S (w[41], w[40], selector); - w[58] = hc_byte_perm_S (w[40], w[39], selector); - w[57] = hc_byte_perm_S (w[39], w[38], selector); - w[56] = hc_byte_perm_S (w[38], w[37], selector); - w[55] = hc_byte_perm_S (w[37], w[36], selector); - w[54] = hc_byte_perm_S (w[36], w[35], selector); - w[53] = hc_byte_perm_S (w[35], w[34], selector); - w[52] = hc_byte_perm_S (w[34], w[33], selector); - w[51] = hc_byte_perm_S (w[33], w[32], selector); - w[50] = hc_byte_perm_S (w[32], w[31], selector); - w[49] = hc_byte_perm_S (w[31], w[30], selector); - w[48] = hc_byte_perm_S (w[30], w[29], selector); - w[47] = hc_byte_perm_S (w[29], w[28], selector); - w[46] = hc_byte_perm_S (w[28], w[27], selector); - w[45] = hc_byte_perm_S (w[27], w[26], selector); - w[44] = hc_byte_perm_S (w[26], w[25], selector); - w[43] = hc_byte_perm_S (w[25], w[24], selector); - w[42] = hc_byte_perm_S (w[24], w[23], selector); - w[41] = hc_byte_perm_S (w[23], w[22], selector); - w[40] = hc_byte_perm_S (w[22], w[21], selector); - w[39] = hc_byte_perm_S (w[21], w[20], selector); - w[38] = hc_byte_perm_S (w[20], w[19], selector); - w[37] = hc_byte_perm_S (w[19], w[18], selector); - w[36] = hc_byte_perm_S (w[18], w[17], selector); - w[35] = hc_byte_perm_S (w[17], w[16], selector); - w[34] = hc_byte_perm_S (w[16], w[15], selector); - w[33] = hc_byte_perm_S (w[15], w[14], selector); - w[32] = hc_byte_perm_S (w[14], w[13], selector); - w[31] = hc_byte_perm_S (w[13], w[12], selector); - w[30] = hc_byte_perm_S (w[12], w[11], selector); - w[29] = hc_byte_perm_S (w[11], w[10], selector); - w[28] = hc_byte_perm_S (w[10], w[ 9], selector); - w[27] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[26] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[25] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[24] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[23] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[22] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[21] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[20] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[19] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[18] = hc_byte_perm_S (w[ 0], 0, selector); - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 19: - w[63] = hc_byte_perm_S (w[44], w[43], selector); - w[62] = hc_byte_perm_S (w[43], w[42], selector); - w[61] = hc_byte_perm_S (w[42], w[41], selector); - w[60] = hc_byte_perm_S (w[41], w[40], selector); - w[59] = hc_byte_perm_S (w[40], w[39], selector); - w[58] = hc_byte_perm_S (w[39], w[38], selector); - w[57] = hc_byte_perm_S (w[38], w[37], selector); - w[56] = hc_byte_perm_S (w[37], w[36], selector); - w[55] = hc_byte_perm_S (w[36], w[35], selector); - w[54] = hc_byte_perm_S (w[35], w[34], selector); - w[53] = hc_byte_perm_S (w[34], w[33], selector); - w[52] = hc_byte_perm_S (w[33], w[32], selector); - w[51] = hc_byte_perm_S (w[32], w[31], selector); - w[50] = hc_byte_perm_S (w[31], w[30], selector); - w[49] = hc_byte_perm_S (w[30], w[29], selector); - w[48] = hc_byte_perm_S (w[29], w[28], selector); - w[47] = hc_byte_perm_S (w[28], w[27], selector); - w[46] = hc_byte_perm_S (w[27], w[26], selector); - w[45] = hc_byte_perm_S (w[26], w[25], selector); - w[44] = hc_byte_perm_S (w[25], w[24], selector); - w[43] = hc_byte_perm_S (w[24], w[23], selector); - w[42] = hc_byte_perm_S (w[23], w[22], selector); - w[41] = hc_byte_perm_S (w[22], w[21], selector); - w[40] = hc_byte_perm_S (w[21], w[20], selector); - w[39] = hc_byte_perm_S (w[20], w[19], selector); - w[38] = hc_byte_perm_S (w[19], w[18], selector); - w[37] = hc_byte_perm_S (w[18], w[17], selector); - w[36] = hc_byte_perm_S (w[17], w[16], selector); - w[35] = hc_byte_perm_S (w[16], w[15], selector); - w[34] = hc_byte_perm_S (w[15], w[14], selector); - w[33] = hc_byte_perm_S (w[14], w[13], selector); - w[32] = hc_byte_perm_S (w[13], w[12], selector); - w[31] = hc_byte_perm_S (w[12], w[11], selector); - w[30] = hc_byte_perm_S (w[11], w[10], selector); - w[29] = hc_byte_perm_S (w[10], w[ 9], selector); - w[28] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[27] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[26] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[25] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[24] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[23] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[22] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[21] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[20] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[19] = hc_byte_perm_S (w[ 0], 0, selector); - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 20: - w[63] = hc_byte_perm_S (w[43], w[42], selector); - w[62] = hc_byte_perm_S (w[42], w[41], selector); - w[61] = hc_byte_perm_S (w[41], w[40], selector); - w[60] = hc_byte_perm_S (w[40], w[39], selector); - w[59] = hc_byte_perm_S (w[39], w[38], selector); - w[58] = hc_byte_perm_S (w[38], w[37], selector); - w[57] = hc_byte_perm_S (w[37], w[36], selector); - w[56] = hc_byte_perm_S (w[36], w[35], selector); - w[55] = hc_byte_perm_S (w[35], w[34], selector); - w[54] = hc_byte_perm_S (w[34], w[33], selector); - w[53] = hc_byte_perm_S (w[33], w[32], selector); - w[52] = hc_byte_perm_S (w[32], w[31], selector); - w[51] = hc_byte_perm_S (w[31], w[30], selector); - w[50] = hc_byte_perm_S (w[30], w[29], selector); - w[49] = hc_byte_perm_S (w[29], w[28], selector); - w[48] = hc_byte_perm_S (w[28], w[27], selector); - w[47] = hc_byte_perm_S (w[27], w[26], selector); - w[46] = hc_byte_perm_S (w[26], w[25], selector); - w[45] = hc_byte_perm_S (w[25], w[24], selector); - w[44] = hc_byte_perm_S (w[24], w[23], selector); - w[43] = hc_byte_perm_S (w[23], w[22], selector); - w[42] = hc_byte_perm_S (w[22], w[21], selector); - w[41] = hc_byte_perm_S (w[21], w[20], selector); - w[40] = hc_byte_perm_S (w[20], w[19], selector); - w[39] = hc_byte_perm_S (w[19], w[18], selector); - w[38] = hc_byte_perm_S (w[18], w[17], selector); - w[37] = hc_byte_perm_S (w[17], w[16], selector); - w[36] = hc_byte_perm_S (w[16], w[15], selector); - w[35] = hc_byte_perm_S (w[15], w[14], selector); - w[34] = hc_byte_perm_S (w[14], w[13], selector); - w[33] = hc_byte_perm_S (w[13], w[12], selector); - w[32] = hc_byte_perm_S (w[12], w[11], selector); - w[31] = hc_byte_perm_S (w[11], w[10], selector); - w[30] = hc_byte_perm_S (w[10], w[ 9], selector); - w[29] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[28] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[27] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[26] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[25] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[24] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[23] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[22] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[21] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[20] = hc_byte_perm_S (w[ 0], 0, selector); - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 21: - w[63] = hc_byte_perm_S (w[42], w[41], selector); - w[62] = hc_byte_perm_S (w[41], w[40], selector); - w[61] = hc_byte_perm_S (w[40], w[39], selector); - w[60] = hc_byte_perm_S (w[39], w[38], selector); - w[59] = hc_byte_perm_S (w[38], w[37], selector); - w[58] = hc_byte_perm_S (w[37], w[36], selector); - w[57] = hc_byte_perm_S (w[36], w[35], selector); - w[56] = hc_byte_perm_S (w[35], w[34], selector); - w[55] = hc_byte_perm_S (w[34], w[33], selector); - w[54] = hc_byte_perm_S (w[33], w[32], selector); - w[53] = hc_byte_perm_S (w[32], w[31], selector); - w[52] = hc_byte_perm_S (w[31], w[30], selector); - w[51] = hc_byte_perm_S (w[30], w[29], selector); - w[50] = hc_byte_perm_S (w[29], w[28], selector); - w[49] = hc_byte_perm_S (w[28], w[27], selector); - w[48] = hc_byte_perm_S (w[27], w[26], selector); - w[47] = hc_byte_perm_S (w[26], w[25], selector); - w[46] = hc_byte_perm_S (w[25], w[24], selector); - w[45] = hc_byte_perm_S (w[24], w[23], selector); - w[44] = hc_byte_perm_S (w[23], w[22], selector); - w[43] = hc_byte_perm_S (w[22], w[21], selector); - w[42] = hc_byte_perm_S (w[21], w[20], selector); - w[41] = hc_byte_perm_S (w[20], w[19], selector); - w[40] = hc_byte_perm_S (w[19], w[18], selector); - w[39] = hc_byte_perm_S (w[18], w[17], selector); - w[38] = hc_byte_perm_S (w[17], w[16], selector); - w[37] = hc_byte_perm_S (w[16], w[15], selector); - w[36] = hc_byte_perm_S (w[15], w[14], selector); - w[35] = hc_byte_perm_S (w[14], w[13], selector); - w[34] = hc_byte_perm_S (w[13], w[12], selector); - w[33] = hc_byte_perm_S (w[12], w[11], selector); - w[32] = hc_byte_perm_S (w[11], w[10], selector); - w[31] = hc_byte_perm_S (w[10], w[ 9], selector); - w[30] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[29] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[28] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[27] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[26] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[25] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[24] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[23] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[22] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[21] = hc_byte_perm_S (w[ 0], 0, selector); - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 22: - w[63] = hc_byte_perm_S (w[41], w[40], selector); - w[62] = hc_byte_perm_S (w[40], w[39], selector); - w[61] = hc_byte_perm_S (w[39], w[38], selector); - w[60] = hc_byte_perm_S (w[38], w[37], selector); - w[59] = hc_byte_perm_S (w[37], w[36], selector); - w[58] = hc_byte_perm_S (w[36], w[35], selector); - w[57] = hc_byte_perm_S (w[35], w[34], selector); - w[56] = hc_byte_perm_S (w[34], w[33], selector); - w[55] = hc_byte_perm_S (w[33], w[32], selector); - w[54] = hc_byte_perm_S (w[32], w[31], selector); - w[53] = hc_byte_perm_S (w[31], w[30], selector); - w[52] = hc_byte_perm_S (w[30], w[29], selector); - w[51] = hc_byte_perm_S (w[29], w[28], selector); - w[50] = hc_byte_perm_S (w[28], w[27], selector); - w[49] = hc_byte_perm_S (w[27], w[26], selector); - w[48] = hc_byte_perm_S (w[26], w[25], selector); - w[47] = hc_byte_perm_S (w[25], w[24], selector); - w[46] = hc_byte_perm_S (w[24], w[23], selector); - w[45] = hc_byte_perm_S (w[23], w[22], selector); - w[44] = hc_byte_perm_S (w[22], w[21], selector); - w[43] = hc_byte_perm_S (w[21], w[20], selector); - w[42] = hc_byte_perm_S (w[20], w[19], selector); - w[41] = hc_byte_perm_S (w[19], w[18], selector); - w[40] = hc_byte_perm_S (w[18], w[17], selector); - w[39] = hc_byte_perm_S (w[17], w[16], selector); - w[38] = hc_byte_perm_S (w[16], w[15], selector); - w[37] = hc_byte_perm_S (w[15], w[14], selector); - w[36] = hc_byte_perm_S (w[14], w[13], selector); - w[35] = hc_byte_perm_S (w[13], w[12], selector); - w[34] = hc_byte_perm_S (w[12], w[11], selector); - w[33] = hc_byte_perm_S (w[11], w[10], selector); - w[32] = hc_byte_perm_S (w[10], w[ 9], selector); - w[31] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[30] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[29] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[28] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[27] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[26] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[25] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[24] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[23] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[22] = hc_byte_perm_S (w[ 0], 0, selector); - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 23: - w[63] = hc_byte_perm_S (w[40], w[39], selector); - w[62] = hc_byte_perm_S (w[39], w[38], selector); - w[61] = hc_byte_perm_S (w[38], w[37], selector); - w[60] = hc_byte_perm_S (w[37], w[36], selector); - w[59] = hc_byte_perm_S (w[36], w[35], selector); - w[58] = hc_byte_perm_S (w[35], w[34], selector); - w[57] = hc_byte_perm_S (w[34], w[33], selector); - w[56] = hc_byte_perm_S (w[33], w[32], selector); - w[55] = hc_byte_perm_S (w[32], w[31], selector); - w[54] = hc_byte_perm_S (w[31], w[30], selector); - w[53] = hc_byte_perm_S (w[30], w[29], selector); - w[52] = hc_byte_perm_S (w[29], w[28], selector); - w[51] = hc_byte_perm_S (w[28], w[27], selector); - w[50] = hc_byte_perm_S (w[27], w[26], selector); - w[49] = hc_byte_perm_S (w[26], w[25], selector); - w[48] = hc_byte_perm_S (w[25], w[24], selector); - w[47] = hc_byte_perm_S (w[24], w[23], selector); - w[46] = hc_byte_perm_S (w[23], w[22], selector); - w[45] = hc_byte_perm_S (w[22], w[21], selector); - w[44] = hc_byte_perm_S (w[21], w[20], selector); - w[43] = hc_byte_perm_S (w[20], w[19], selector); - w[42] = hc_byte_perm_S (w[19], w[18], selector); - w[41] = hc_byte_perm_S (w[18], w[17], selector); - w[40] = hc_byte_perm_S (w[17], w[16], selector); - w[39] = hc_byte_perm_S (w[16], w[15], selector); - w[38] = hc_byte_perm_S (w[15], w[14], selector); - w[37] = hc_byte_perm_S (w[14], w[13], selector); - w[36] = hc_byte_perm_S (w[13], w[12], selector); - w[35] = hc_byte_perm_S (w[12], w[11], selector); - w[34] = hc_byte_perm_S (w[11], w[10], selector); - w[33] = hc_byte_perm_S (w[10], w[ 9], selector); - w[32] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[31] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[30] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[29] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[28] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[27] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[26] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[25] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[24] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[23] = hc_byte_perm_S (w[ 0], 0, selector); - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 24: - w[63] = hc_byte_perm_S (w[39], w[38], selector); - w[62] = hc_byte_perm_S (w[38], w[37], selector); - w[61] = hc_byte_perm_S (w[37], w[36], selector); - w[60] = hc_byte_perm_S (w[36], w[35], selector); - w[59] = hc_byte_perm_S (w[35], w[34], selector); - w[58] = hc_byte_perm_S (w[34], w[33], selector); - w[57] = hc_byte_perm_S (w[33], w[32], selector); - w[56] = hc_byte_perm_S (w[32], w[31], selector); - w[55] = hc_byte_perm_S (w[31], w[30], selector); - w[54] = hc_byte_perm_S (w[30], w[29], selector); - w[53] = hc_byte_perm_S (w[29], w[28], selector); - w[52] = hc_byte_perm_S (w[28], w[27], selector); - w[51] = hc_byte_perm_S (w[27], w[26], selector); - w[50] = hc_byte_perm_S (w[26], w[25], selector); - w[49] = hc_byte_perm_S (w[25], w[24], selector); - w[48] = hc_byte_perm_S (w[24], w[23], selector); - w[47] = hc_byte_perm_S (w[23], w[22], selector); - w[46] = hc_byte_perm_S (w[22], w[21], selector); - w[45] = hc_byte_perm_S (w[21], w[20], selector); - w[44] = hc_byte_perm_S (w[20], w[19], selector); - w[43] = hc_byte_perm_S (w[19], w[18], selector); - w[42] = hc_byte_perm_S (w[18], w[17], selector); - w[41] = hc_byte_perm_S (w[17], w[16], selector); - w[40] = hc_byte_perm_S (w[16], w[15], selector); - w[39] = hc_byte_perm_S (w[15], w[14], selector); - w[38] = hc_byte_perm_S (w[14], w[13], selector); - w[37] = hc_byte_perm_S (w[13], w[12], selector); - w[36] = hc_byte_perm_S (w[12], w[11], selector); - w[35] = hc_byte_perm_S (w[11], w[10], selector); - w[34] = hc_byte_perm_S (w[10], w[ 9], selector); - w[33] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[32] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[31] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[30] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[29] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[28] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[27] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[26] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[25] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[24] = hc_byte_perm_S (w[ 0], 0, selector); - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 25: - w[63] = hc_byte_perm_S (w[38], w[37], selector); - w[62] = hc_byte_perm_S (w[37], w[36], selector); - w[61] = hc_byte_perm_S (w[36], w[35], selector); - w[60] = hc_byte_perm_S (w[35], w[34], selector); - w[59] = hc_byte_perm_S (w[34], w[33], selector); - w[58] = hc_byte_perm_S (w[33], w[32], selector); - w[57] = hc_byte_perm_S (w[32], w[31], selector); - w[56] = hc_byte_perm_S (w[31], w[30], selector); - w[55] = hc_byte_perm_S (w[30], w[29], selector); - w[54] = hc_byte_perm_S (w[29], w[28], selector); - w[53] = hc_byte_perm_S (w[28], w[27], selector); - w[52] = hc_byte_perm_S (w[27], w[26], selector); - w[51] = hc_byte_perm_S (w[26], w[25], selector); - w[50] = hc_byte_perm_S (w[25], w[24], selector); - w[49] = hc_byte_perm_S (w[24], w[23], selector); - w[48] = hc_byte_perm_S (w[23], w[22], selector); - w[47] = hc_byte_perm_S (w[22], w[21], selector); - w[46] = hc_byte_perm_S (w[21], w[20], selector); - w[45] = hc_byte_perm_S (w[20], w[19], selector); - w[44] = hc_byte_perm_S (w[19], w[18], selector); - w[43] = hc_byte_perm_S (w[18], w[17], selector); - w[42] = hc_byte_perm_S (w[17], w[16], selector); - w[41] = hc_byte_perm_S (w[16], w[15], selector); - w[40] = hc_byte_perm_S (w[15], w[14], selector); - w[39] = hc_byte_perm_S (w[14], w[13], selector); - w[38] = hc_byte_perm_S (w[13], w[12], selector); - w[37] = hc_byte_perm_S (w[12], w[11], selector); - w[36] = hc_byte_perm_S (w[11], w[10], selector); - w[35] = hc_byte_perm_S (w[10], w[ 9], selector); - w[34] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[33] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[32] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[31] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[30] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[29] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[28] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[27] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[26] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[25] = hc_byte_perm_S (w[ 0], 0, selector); - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 26: - w[63] = hc_byte_perm_S (w[37], w[36], selector); - w[62] = hc_byte_perm_S (w[36], w[35], selector); - w[61] = hc_byte_perm_S (w[35], w[34], selector); - w[60] = hc_byte_perm_S (w[34], w[33], selector); - w[59] = hc_byte_perm_S (w[33], w[32], selector); - w[58] = hc_byte_perm_S (w[32], w[31], selector); - w[57] = hc_byte_perm_S (w[31], w[30], selector); - w[56] = hc_byte_perm_S (w[30], w[29], selector); - w[55] = hc_byte_perm_S (w[29], w[28], selector); - w[54] = hc_byte_perm_S (w[28], w[27], selector); - w[53] = hc_byte_perm_S (w[27], w[26], selector); - w[52] = hc_byte_perm_S (w[26], w[25], selector); - w[51] = hc_byte_perm_S (w[25], w[24], selector); - w[50] = hc_byte_perm_S (w[24], w[23], selector); - w[49] = hc_byte_perm_S (w[23], w[22], selector); - w[48] = hc_byte_perm_S (w[22], w[21], selector); - w[47] = hc_byte_perm_S (w[21], w[20], selector); - w[46] = hc_byte_perm_S (w[20], w[19], selector); - w[45] = hc_byte_perm_S (w[19], w[18], selector); - w[44] = hc_byte_perm_S (w[18], w[17], selector); - w[43] = hc_byte_perm_S (w[17], w[16], selector); - w[42] = hc_byte_perm_S (w[16], w[15], selector); - w[41] = hc_byte_perm_S (w[15], w[14], selector); - w[40] = hc_byte_perm_S (w[14], w[13], selector); - w[39] = hc_byte_perm_S (w[13], w[12], selector); - w[38] = hc_byte_perm_S (w[12], w[11], selector); - w[37] = hc_byte_perm_S (w[11], w[10], selector); - w[36] = hc_byte_perm_S (w[10], w[ 9], selector); - w[35] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[34] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[33] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[32] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[31] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[30] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[29] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[28] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[27] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[26] = hc_byte_perm_S (w[ 0], 0, selector); - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 27: - w[63] = hc_byte_perm_S (w[36], w[35], selector); - w[62] = hc_byte_perm_S (w[35], w[34], selector); - w[61] = hc_byte_perm_S (w[34], w[33], selector); - w[60] = hc_byte_perm_S (w[33], w[32], selector); - w[59] = hc_byte_perm_S (w[32], w[31], selector); - w[58] = hc_byte_perm_S (w[31], w[30], selector); - w[57] = hc_byte_perm_S (w[30], w[29], selector); - w[56] = hc_byte_perm_S (w[29], w[28], selector); - w[55] = hc_byte_perm_S (w[28], w[27], selector); - w[54] = hc_byte_perm_S (w[27], w[26], selector); - w[53] = hc_byte_perm_S (w[26], w[25], selector); - w[52] = hc_byte_perm_S (w[25], w[24], selector); - w[51] = hc_byte_perm_S (w[24], w[23], selector); - w[50] = hc_byte_perm_S (w[23], w[22], selector); - w[49] = hc_byte_perm_S (w[22], w[21], selector); - w[48] = hc_byte_perm_S (w[21], w[20], selector); - w[47] = hc_byte_perm_S (w[20], w[19], selector); - w[46] = hc_byte_perm_S (w[19], w[18], selector); - w[45] = hc_byte_perm_S (w[18], w[17], selector); - w[44] = hc_byte_perm_S (w[17], w[16], selector); - w[43] = hc_byte_perm_S (w[16], w[15], selector); - w[42] = hc_byte_perm_S (w[15], w[14], selector); - w[41] = hc_byte_perm_S (w[14], w[13], selector); - w[40] = hc_byte_perm_S (w[13], w[12], selector); - w[39] = hc_byte_perm_S (w[12], w[11], selector); - w[38] = hc_byte_perm_S (w[11], w[10], selector); - w[37] = hc_byte_perm_S (w[10], w[ 9], selector); - w[36] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[35] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[34] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[33] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[32] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[31] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[30] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[29] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[28] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[27] = hc_byte_perm_S (w[ 0], 0, selector); - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 28: - w[63] = hc_byte_perm_S (w[35], w[34], selector); - w[62] = hc_byte_perm_S (w[34], w[33], selector); - w[61] = hc_byte_perm_S (w[33], w[32], selector); - w[60] = hc_byte_perm_S (w[32], w[31], selector); - w[59] = hc_byte_perm_S (w[31], w[30], selector); - w[58] = hc_byte_perm_S (w[30], w[29], selector); - w[57] = hc_byte_perm_S (w[29], w[28], selector); - w[56] = hc_byte_perm_S (w[28], w[27], selector); - w[55] = hc_byte_perm_S (w[27], w[26], selector); - w[54] = hc_byte_perm_S (w[26], w[25], selector); - w[53] = hc_byte_perm_S (w[25], w[24], selector); - w[52] = hc_byte_perm_S (w[24], w[23], selector); - w[51] = hc_byte_perm_S (w[23], w[22], selector); - w[50] = hc_byte_perm_S (w[22], w[21], selector); - w[49] = hc_byte_perm_S (w[21], w[20], selector); - w[48] = hc_byte_perm_S (w[20], w[19], selector); - w[47] = hc_byte_perm_S (w[19], w[18], selector); - w[46] = hc_byte_perm_S (w[18], w[17], selector); - w[45] = hc_byte_perm_S (w[17], w[16], selector); - w[44] = hc_byte_perm_S (w[16], w[15], selector); - w[43] = hc_byte_perm_S (w[15], w[14], selector); - w[42] = hc_byte_perm_S (w[14], w[13], selector); - w[41] = hc_byte_perm_S (w[13], w[12], selector); - w[40] = hc_byte_perm_S (w[12], w[11], selector); - w[39] = hc_byte_perm_S (w[11], w[10], selector); - w[38] = hc_byte_perm_S (w[10], w[ 9], selector); - w[37] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[36] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[35] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[34] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[33] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[32] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[31] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[30] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[29] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[28] = hc_byte_perm_S (w[ 0], 0, selector); - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 29: - w[63] = hc_byte_perm_S (w[34], w[33], selector); - w[62] = hc_byte_perm_S (w[33], w[32], selector); - w[61] = hc_byte_perm_S (w[32], w[31], selector); - w[60] = hc_byte_perm_S (w[31], w[30], selector); - w[59] = hc_byte_perm_S (w[30], w[29], selector); - w[58] = hc_byte_perm_S (w[29], w[28], selector); - w[57] = hc_byte_perm_S (w[28], w[27], selector); - w[56] = hc_byte_perm_S (w[27], w[26], selector); - w[55] = hc_byte_perm_S (w[26], w[25], selector); - w[54] = hc_byte_perm_S (w[25], w[24], selector); - w[53] = hc_byte_perm_S (w[24], w[23], selector); - w[52] = hc_byte_perm_S (w[23], w[22], selector); - w[51] = hc_byte_perm_S (w[22], w[21], selector); - w[50] = hc_byte_perm_S (w[21], w[20], selector); - w[49] = hc_byte_perm_S (w[20], w[19], selector); - w[48] = hc_byte_perm_S (w[19], w[18], selector); - w[47] = hc_byte_perm_S (w[18], w[17], selector); - w[46] = hc_byte_perm_S (w[17], w[16], selector); - w[45] = hc_byte_perm_S (w[16], w[15], selector); - w[44] = hc_byte_perm_S (w[15], w[14], selector); - w[43] = hc_byte_perm_S (w[14], w[13], selector); - w[42] = hc_byte_perm_S (w[13], w[12], selector); - w[41] = hc_byte_perm_S (w[12], w[11], selector); - w[40] = hc_byte_perm_S (w[11], w[10], selector); - w[39] = hc_byte_perm_S (w[10], w[ 9], selector); - w[38] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[37] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[36] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[35] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[34] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[33] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[32] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[31] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[30] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[29] = hc_byte_perm_S (w[ 0], 0, selector); - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 30: - w[63] = hc_byte_perm_S (w[33], w[32], selector); - w[62] = hc_byte_perm_S (w[32], w[31], selector); - w[61] = hc_byte_perm_S (w[31], w[30], selector); - w[60] = hc_byte_perm_S (w[30], w[29], selector); - w[59] = hc_byte_perm_S (w[29], w[28], selector); - w[58] = hc_byte_perm_S (w[28], w[27], selector); - w[57] = hc_byte_perm_S (w[27], w[26], selector); - w[56] = hc_byte_perm_S (w[26], w[25], selector); - w[55] = hc_byte_perm_S (w[25], w[24], selector); - w[54] = hc_byte_perm_S (w[24], w[23], selector); - w[53] = hc_byte_perm_S (w[23], w[22], selector); - w[52] = hc_byte_perm_S (w[22], w[21], selector); - w[51] = hc_byte_perm_S (w[21], w[20], selector); - w[50] = hc_byte_perm_S (w[20], w[19], selector); - w[49] = hc_byte_perm_S (w[19], w[18], selector); - w[48] = hc_byte_perm_S (w[18], w[17], selector); - w[47] = hc_byte_perm_S (w[17], w[16], selector); - w[46] = hc_byte_perm_S (w[16], w[15], selector); - w[45] = hc_byte_perm_S (w[15], w[14], selector); - w[44] = hc_byte_perm_S (w[14], w[13], selector); - w[43] = hc_byte_perm_S (w[13], w[12], selector); - w[42] = hc_byte_perm_S (w[12], w[11], selector); - w[41] = hc_byte_perm_S (w[11], w[10], selector); - w[40] = hc_byte_perm_S (w[10], w[ 9], selector); - w[39] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[38] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[37] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[36] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[35] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[34] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[33] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[32] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[31] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[30] = hc_byte_perm_S (w[ 0], 0, selector); - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 31: - w[63] = hc_byte_perm_S (w[32], w[31], selector); - w[62] = hc_byte_perm_S (w[31], w[30], selector); - w[61] = hc_byte_perm_S (w[30], w[29], selector); - w[60] = hc_byte_perm_S (w[29], w[28], selector); - w[59] = hc_byte_perm_S (w[28], w[27], selector); - w[58] = hc_byte_perm_S (w[27], w[26], selector); - w[57] = hc_byte_perm_S (w[26], w[25], selector); - w[56] = hc_byte_perm_S (w[25], w[24], selector); - w[55] = hc_byte_perm_S (w[24], w[23], selector); - w[54] = hc_byte_perm_S (w[23], w[22], selector); - w[53] = hc_byte_perm_S (w[22], w[21], selector); - w[52] = hc_byte_perm_S (w[21], w[20], selector); - w[51] = hc_byte_perm_S (w[20], w[19], selector); - w[50] = hc_byte_perm_S (w[19], w[18], selector); - w[49] = hc_byte_perm_S (w[18], w[17], selector); - w[48] = hc_byte_perm_S (w[17], w[16], selector); - w[47] = hc_byte_perm_S (w[16], w[15], selector); - w[46] = hc_byte_perm_S (w[15], w[14], selector); - w[45] = hc_byte_perm_S (w[14], w[13], selector); - w[44] = hc_byte_perm_S (w[13], w[12], selector); - w[43] = hc_byte_perm_S (w[12], w[11], selector); - w[42] = hc_byte_perm_S (w[11], w[10], selector); - w[41] = hc_byte_perm_S (w[10], w[ 9], selector); - w[40] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[39] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[38] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[37] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[36] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[35] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[34] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[33] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[32] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[31] = hc_byte_perm_S (w[ 0], 0, selector); - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 32: - w[63] = hc_byte_perm_S (w[31], w[30], selector); - w[62] = hc_byte_perm_S (w[30], w[29], selector); - w[61] = hc_byte_perm_S (w[29], w[28], selector); - w[60] = hc_byte_perm_S (w[28], w[27], selector); - w[59] = hc_byte_perm_S (w[27], w[26], selector); - w[58] = hc_byte_perm_S (w[26], w[25], selector); - w[57] = hc_byte_perm_S (w[25], w[24], selector); - w[56] = hc_byte_perm_S (w[24], w[23], selector); - w[55] = hc_byte_perm_S (w[23], w[22], selector); - w[54] = hc_byte_perm_S (w[22], w[21], selector); - w[53] = hc_byte_perm_S (w[21], w[20], selector); - w[52] = hc_byte_perm_S (w[20], w[19], selector); - w[51] = hc_byte_perm_S (w[19], w[18], selector); - w[50] = hc_byte_perm_S (w[18], w[17], selector); - w[49] = hc_byte_perm_S (w[17], w[16], selector); - w[48] = hc_byte_perm_S (w[16], w[15], selector); - w[47] = hc_byte_perm_S (w[15], w[14], selector); - w[46] = hc_byte_perm_S (w[14], w[13], selector); - w[45] = hc_byte_perm_S (w[13], w[12], selector); - w[44] = hc_byte_perm_S (w[12], w[11], selector); - w[43] = hc_byte_perm_S (w[11], w[10], selector); - w[42] = hc_byte_perm_S (w[10], w[ 9], selector); - w[41] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[40] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[39] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[38] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[37] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[36] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[35] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[34] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[33] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[32] = hc_byte_perm_S (w[ 0], 0, selector); - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 33: - w[63] = hc_byte_perm_S (w[30], w[29], selector); - w[62] = hc_byte_perm_S (w[29], w[28], selector); - w[61] = hc_byte_perm_S (w[28], w[27], selector); - w[60] = hc_byte_perm_S (w[27], w[26], selector); - w[59] = hc_byte_perm_S (w[26], w[25], selector); - w[58] = hc_byte_perm_S (w[25], w[24], selector); - w[57] = hc_byte_perm_S (w[24], w[23], selector); - w[56] = hc_byte_perm_S (w[23], w[22], selector); - w[55] = hc_byte_perm_S (w[22], w[21], selector); - w[54] = hc_byte_perm_S (w[21], w[20], selector); - w[53] = hc_byte_perm_S (w[20], w[19], selector); - w[52] = hc_byte_perm_S (w[19], w[18], selector); - w[51] = hc_byte_perm_S (w[18], w[17], selector); - w[50] = hc_byte_perm_S (w[17], w[16], selector); - w[49] = hc_byte_perm_S (w[16], w[15], selector); - w[48] = hc_byte_perm_S (w[15], w[14], selector); - w[47] = hc_byte_perm_S (w[14], w[13], selector); - w[46] = hc_byte_perm_S (w[13], w[12], selector); - w[45] = hc_byte_perm_S (w[12], w[11], selector); - w[44] = hc_byte_perm_S (w[11], w[10], selector); - w[43] = hc_byte_perm_S (w[10], w[ 9], selector); - w[42] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[41] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[40] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[39] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[38] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[37] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[36] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[35] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[34] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[33] = hc_byte_perm_S (w[ 0], 0, selector); - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 34: - w[63] = hc_byte_perm_S (w[29], w[28], selector); - w[62] = hc_byte_perm_S (w[28], w[27], selector); - w[61] = hc_byte_perm_S (w[27], w[26], selector); - w[60] = hc_byte_perm_S (w[26], w[25], selector); - w[59] = hc_byte_perm_S (w[25], w[24], selector); - w[58] = hc_byte_perm_S (w[24], w[23], selector); - w[57] = hc_byte_perm_S (w[23], w[22], selector); - w[56] = hc_byte_perm_S (w[22], w[21], selector); - w[55] = hc_byte_perm_S (w[21], w[20], selector); - w[54] = hc_byte_perm_S (w[20], w[19], selector); - w[53] = hc_byte_perm_S (w[19], w[18], selector); - w[52] = hc_byte_perm_S (w[18], w[17], selector); - w[51] = hc_byte_perm_S (w[17], w[16], selector); - w[50] = hc_byte_perm_S (w[16], w[15], selector); - w[49] = hc_byte_perm_S (w[15], w[14], selector); - w[48] = hc_byte_perm_S (w[14], w[13], selector); - w[47] = hc_byte_perm_S (w[13], w[12], selector); - w[46] = hc_byte_perm_S (w[12], w[11], selector); - w[45] = hc_byte_perm_S (w[11], w[10], selector); - w[44] = hc_byte_perm_S (w[10], w[ 9], selector); - w[43] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[42] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[41] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[40] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[39] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[38] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[37] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[36] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[35] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[34] = hc_byte_perm_S (w[ 0], 0, selector); - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 35: - w[63] = hc_byte_perm_S (w[28], w[27], selector); - w[62] = hc_byte_perm_S (w[27], w[26], selector); - w[61] = hc_byte_perm_S (w[26], w[25], selector); - w[60] = hc_byte_perm_S (w[25], w[24], selector); - w[59] = hc_byte_perm_S (w[24], w[23], selector); - w[58] = hc_byte_perm_S (w[23], w[22], selector); - w[57] = hc_byte_perm_S (w[22], w[21], selector); - w[56] = hc_byte_perm_S (w[21], w[20], selector); - w[55] = hc_byte_perm_S (w[20], w[19], selector); - w[54] = hc_byte_perm_S (w[19], w[18], selector); - w[53] = hc_byte_perm_S (w[18], w[17], selector); - w[52] = hc_byte_perm_S (w[17], w[16], selector); - w[51] = hc_byte_perm_S (w[16], w[15], selector); - w[50] = hc_byte_perm_S (w[15], w[14], selector); - w[49] = hc_byte_perm_S (w[14], w[13], selector); - w[48] = hc_byte_perm_S (w[13], w[12], selector); - w[47] = hc_byte_perm_S (w[12], w[11], selector); - w[46] = hc_byte_perm_S (w[11], w[10], selector); - w[45] = hc_byte_perm_S (w[10], w[ 9], selector); - w[44] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[43] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[42] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[41] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[40] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[39] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[38] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[37] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[36] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[35] = hc_byte_perm_S (w[ 0], 0, selector); - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 36: - w[63] = hc_byte_perm_S (w[27], w[26], selector); - w[62] = hc_byte_perm_S (w[26], w[25], selector); - w[61] = hc_byte_perm_S (w[25], w[24], selector); - w[60] = hc_byte_perm_S (w[24], w[23], selector); - w[59] = hc_byte_perm_S (w[23], w[22], selector); - w[58] = hc_byte_perm_S (w[22], w[21], selector); - w[57] = hc_byte_perm_S (w[21], w[20], selector); - w[56] = hc_byte_perm_S (w[20], w[19], selector); - w[55] = hc_byte_perm_S (w[19], w[18], selector); - w[54] = hc_byte_perm_S (w[18], w[17], selector); - w[53] = hc_byte_perm_S (w[17], w[16], selector); - w[52] = hc_byte_perm_S (w[16], w[15], selector); - w[51] = hc_byte_perm_S (w[15], w[14], selector); - w[50] = hc_byte_perm_S (w[14], w[13], selector); - w[49] = hc_byte_perm_S (w[13], w[12], selector); - w[48] = hc_byte_perm_S (w[12], w[11], selector); - w[47] = hc_byte_perm_S (w[11], w[10], selector); - w[46] = hc_byte_perm_S (w[10], w[ 9], selector); - w[45] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[44] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[43] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[42] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[41] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[40] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[39] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[38] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[37] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[36] = hc_byte_perm_S (w[ 0], 0, selector); - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 37: - w[63] = hc_byte_perm_S (w[26], w[25], selector); - w[62] = hc_byte_perm_S (w[25], w[24], selector); - w[61] = hc_byte_perm_S (w[24], w[23], selector); - w[60] = hc_byte_perm_S (w[23], w[22], selector); - w[59] = hc_byte_perm_S (w[22], w[21], selector); - w[58] = hc_byte_perm_S (w[21], w[20], selector); - w[57] = hc_byte_perm_S (w[20], w[19], selector); - w[56] = hc_byte_perm_S (w[19], w[18], selector); - w[55] = hc_byte_perm_S (w[18], w[17], selector); - w[54] = hc_byte_perm_S (w[17], w[16], selector); - w[53] = hc_byte_perm_S (w[16], w[15], selector); - w[52] = hc_byte_perm_S (w[15], w[14], selector); - w[51] = hc_byte_perm_S (w[14], w[13], selector); - w[50] = hc_byte_perm_S (w[13], w[12], selector); - w[49] = hc_byte_perm_S (w[12], w[11], selector); - w[48] = hc_byte_perm_S (w[11], w[10], selector); - w[47] = hc_byte_perm_S (w[10], w[ 9], selector); - w[46] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[45] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[44] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[43] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[42] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[41] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[40] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[39] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[38] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[37] = hc_byte_perm_S (w[ 0], 0, selector); - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 38: - w[63] = hc_byte_perm_S (w[25], w[24], selector); - w[62] = hc_byte_perm_S (w[24], w[23], selector); - w[61] = hc_byte_perm_S (w[23], w[22], selector); - w[60] = hc_byte_perm_S (w[22], w[21], selector); - w[59] = hc_byte_perm_S (w[21], w[20], selector); - w[58] = hc_byte_perm_S (w[20], w[19], selector); - w[57] = hc_byte_perm_S (w[19], w[18], selector); - w[56] = hc_byte_perm_S (w[18], w[17], selector); - w[55] = hc_byte_perm_S (w[17], w[16], selector); - w[54] = hc_byte_perm_S (w[16], w[15], selector); - w[53] = hc_byte_perm_S (w[15], w[14], selector); - w[52] = hc_byte_perm_S (w[14], w[13], selector); - w[51] = hc_byte_perm_S (w[13], w[12], selector); - w[50] = hc_byte_perm_S (w[12], w[11], selector); - w[49] = hc_byte_perm_S (w[11], w[10], selector); - w[48] = hc_byte_perm_S (w[10], w[ 9], selector); - w[47] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[46] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[45] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[44] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[43] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[42] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[41] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[40] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[39] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[38] = hc_byte_perm_S (w[ 0], 0, selector); - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 39: - w[63] = hc_byte_perm_S (w[24], w[23], selector); - w[62] = hc_byte_perm_S (w[23], w[22], selector); - w[61] = hc_byte_perm_S (w[22], w[21], selector); - w[60] = hc_byte_perm_S (w[21], w[20], selector); - w[59] = hc_byte_perm_S (w[20], w[19], selector); - w[58] = hc_byte_perm_S (w[19], w[18], selector); - w[57] = hc_byte_perm_S (w[18], w[17], selector); - w[56] = hc_byte_perm_S (w[17], w[16], selector); - w[55] = hc_byte_perm_S (w[16], w[15], selector); - w[54] = hc_byte_perm_S (w[15], w[14], selector); - w[53] = hc_byte_perm_S (w[14], w[13], selector); - w[52] = hc_byte_perm_S (w[13], w[12], selector); - w[51] = hc_byte_perm_S (w[12], w[11], selector); - w[50] = hc_byte_perm_S (w[11], w[10], selector); - w[49] = hc_byte_perm_S (w[10], w[ 9], selector); - w[48] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[47] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[46] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[45] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[44] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[43] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[42] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[41] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[40] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[39] = hc_byte_perm_S (w[ 0], 0, selector); - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 40: - w[63] = hc_byte_perm_S (w[23], w[22], selector); - w[62] = hc_byte_perm_S (w[22], w[21], selector); - w[61] = hc_byte_perm_S (w[21], w[20], selector); - w[60] = hc_byte_perm_S (w[20], w[19], selector); - w[59] = hc_byte_perm_S (w[19], w[18], selector); - w[58] = hc_byte_perm_S (w[18], w[17], selector); - w[57] = hc_byte_perm_S (w[17], w[16], selector); - w[56] = hc_byte_perm_S (w[16], w[15], selector); - w[55] = hc_byte_perm_S (w[15], w[14], selector); - w[54] = hc_byte_perm_S (w[14], w[13], selector); - w[53] = hc_byte_perm_S (w[13], w[12], selector); - w[52] = hc_byte_perm_S (w[12], w[11], selector); - w[51] = hc_byte_perm_S (w[11], w[10], selector); - w[50] = hc_byte_perm_S (w[10], w[ 9], selector); - w[49] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[48] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[47] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[46] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[45] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[44] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[43] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[42] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[41] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[40] = hc_byte_perm_S (w[ 0], 0, selector); - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 41: - w[63] = hc_byte_perm_S (w[22], w[21], selector); - w[62] = hc_byte_perm_S (w[21], w[20], selector); - w[61] = hc_byte_perm_S (w[20], w[19], selector); - w[60] = hc_byte_perm_S (w[19], w[18], selector); - w[59] = hc_byte_perm_S (w[18], w[17], selector); - w[58] = hc_byte_perm_S (w[17], w[16], selector); - w[57] = hc_byte_perm_S (w[16], w[15], selector); - w[56] = hc_byte_perm_S (w[15], w[14], selector); - w[55] = hc_byte_perm_S (w[14], w[13], selector); - w[54] = hc_byte_perm_S (w[13], w[12], selector); - w[53] = hc_byte_perm_S (w[12], w[11], selector); - w[52] = hc_byte_perm_S (w[11], w[10], selector); - w[51] = hc_byte_perm_S (w[10], w[ 9], selector); - w[50] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[49] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[48] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[47] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[46] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[45] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[44] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[43] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[42] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[41] = hc_byte_perm_S (w[ 0], 0, selector); - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 42: - w[63] = hc_byte_perm_S (w[21], w[20], selector); - w[62] = hc_byte_perm_S (w[20], w[19], selector); - w[61] = hc_byte_perm_S (w[19], w[18], selector); - w[60] = hc_byte_perm_S (w[18], w[17], selector); - w[59] = hc_byte_perm_S (w[17], w[16], selector); - w[58] = hc_byte_perm_S (w[16], w[15], selector); - w[57] = hc_byte_perm_S (w[15], w[14], selector); - w[56] = hc_byte_perm_S (w[14], w[13], selector); - w[55] = hc_byte_perm_S (w[13], w[12], selector); - w[54] = hc_byte_perm_S (w[12], w[11], selector); - w[53] = hc_byte_perm_S (w[11], w[10], selector); - w[52] = hc_byte_perm_S (w[10], w[ 9], selector); - w[51] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[50] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[49] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[48] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[47] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[46] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[45] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[44] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[43] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[42] = hc_byte_perm_S (w[ 0], 0, selector); - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 43: - w[63] = hc_byte_perm_S (w[20], w[19], selector); - w[62] = hc_byte_perm_S (w[19], w[18], selector); - w[61] = hc_byte_perm_S (w[18], w[17], selector); - w[60] = hc_byte_perm_S (w[17], w[16], selector); - w[59] = hc_byte_perm_S (w[16], w[15], selector); - w[58] = hc_byte_perm_S (w[15], w[14], selector); - w[57] = hc_byte_perm_S (w[14], w[13], selector); - w[56] = hc_byte_perm_S (w[13], w[12], selector); - w[55] = hc_byte_perm_S (w[12], w[11], selector); - w[54] = hc_byte_perm_S (w[11], w[10], selector); - w[53] = hc_byte_perm_S (w[10], w[ 9], selector); - w[52] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[51] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[50] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[49] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[48] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[47] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[46] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[45] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[44] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[43] = hc_byte_perm_S (w[ 0], 0, selector); - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 44: - w[63] = hc_byte_perm_S (w[19], w[18], selector); - w[62] = hc_byte_perm_S (w[18], w[17], selector); - w[61] = hc_byte_perm_S (w[17], w[16], selector); - w[60] = hc_byte_perm_S (w[16], w[15], selector); - w[59] = hc_byte_perm_S (w[15], w[14], selector); - w[58] = hc_byte_perm_S (w[14], w[13], selector); - w[57] = hc_byte_perm_S (w[13], w[12], selector); - w[56] = hc_byte_perm_S (w[12], w[11], selector); - w[55] = hc_byte_perm_S (w[11], w[10], selector); - w[54] = hc_byte_perm_S (w[10], w[ 9], selector); - w[53] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[52] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[51] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[50] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[49] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[48] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[47] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[46] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[45] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[44] = hc_byte_perm_S (w[ 0], 0, selector); - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 45: - w[63] = hc_byte_perm_S (w[18], w[17], selector); - w[62] = hc_byte_perm_S (w[17], w[16], selector); - w[61] = hc_byte_perm_S (w[16], w[15], selector); - w[60] = hc_byte_perm_S (w[15], w[14], selector); - w[59] = hc_byte_perm_S (w[14], w[13], selector); - w[58] = hc_byte_perm_S (w[13], w[12], selector); - w[57] = hc_byte_perm_S (w[12], w[11], selector); - w[56] = hc_byte_perm_S (w[11], w[10], selector); - w[55] = hc_byte_perm_S (w[10], w[ 9], selector); - w[54] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[53] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[52] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[51] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[50] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[49] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[48] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[47] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[46] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[45] = hc_byte_perm_S (w[ 0], 0, selector); - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 46: - w[63] = hc_byte_perm_S (w[17], w[16], selector); - w[62] = hc_byte_perm_S (w[16], w[15], selector); - w[61] = hc_byte_perm_S (w[15], w[14], selector); - w[60] = hc_byte_perm_S (w[14], w[13], selector); - w[59] = hc_byte_perm_S (w[13], w[12], selector); - w[58] = hc_byte_perm_S (w[12], w[11], selector); - w[57] = hc_byte_perm_S (w[11], w[10], selector); - w[56] = hc_byte_perm_S (w[10], w[ 9], selector); - w[55] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[54] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[53] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[52] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[51] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[50] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[49] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[48] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[47] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[46] = hc_byte_perm_S (w[ 0], 0, selector); - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 47: - w[63] = hc_byte_perm_S (w[16], w[15], selector); - w[62] = hc_byte_perm_S (w[15], w[14], selector); - w[61] = hc_byte_perm_S (w[14], w[13], selector); - w[60] = hc_byte_perm_S (w[13], w[12], selector); - w[59] = hc_byte_perm_S (w[12], w[11], selector); - w[58] = hc_byte_perm_S (w[11], w[10], selector); - w[57] = hc_byte_perm_S (w[10], w[ 9], selector); - w[56] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[55] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[54] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[53] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[52] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[51] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[50] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[49] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[48] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[47] = hc_byte_perm_S (w[ 0], 0, selector); - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 48: - w[63] = hc_byte_perm_S (w[15], w[14], selector); - w[62] = hc_byte_perm_S (w[14], w[13], selector); - w[61] = hc_byte_perm_S (w[13], w[12], selector); - w[60] = hc_byte_perm_S (w[12], w[11], selector); - w[59] = hc_byte_perm_S (w[11], w[10], selector); - w[58] = hc_byte_perm_S (w[10], w[ 9], selector); - w[57] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[56] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[55] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[54] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[53] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[52] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[51] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[50] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[49] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[48] = hc_byte_perm_S (w[ 0], 0, selector); - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 49: - w[63] = hc_byte_perm_S (w[14], w[13], selector); - w[62] = hc_byte_perm_S (w[13], w[12], selector); - w[61] = hc_byte_perm_S (w[12], w[11], selector); - w[60] = hc_byte_perm_S (w[11], w[10], selector); - w[59] = hc_byte_perm_S (w[10], w[ 9], selector); - w[58] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[57] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[56] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[55] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[54] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[53] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[52] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[51] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[50] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[49] = hc_byte_perm_S (w[ 0], 0, selector); - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 50: - w[63] = hc_byte_perm_S (w[13], w[12], selector); - w[62] = hc_byte_perm_S (w[12], w[11], selector); - w[61] = hc_byte_perm_S (w[11], w[10], selector); - w[60] = hc_byte_perm_S (w[10], w[ 9], selector); - w[59] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[58] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[57] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[56] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[55] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[54] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[53] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[52] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[51] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[50] = hc_byte_perm_S (w[ 0], 0, selector); - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 51: - w[63] = hc_byte_perm_S (w[12], w[11], selector); - w[62] = hc_byte_perm_S (w[11], w[10], selector); - w[61] = hc_byte_perm_S (w[10], w[ 9], selector); - w[60] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[59] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[58] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[57] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[56] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[55] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[54] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[53] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[52] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[51] = hc_byte_perm_S (w[ 0], 0, selector); - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 52: - w[63] = hc_byte_perm_S (w[11], w[10], selector); - w[62] = hc_byte_perm_S (w[10], w[ 9], selector); - w[61] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[60] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[59] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[58] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[57] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[56] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[55] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[54] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[53] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[52] = hc_byte_perm_S (w[ 0], 0, selector); - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 53: - w[63] = hc_byte_perm_S (w[10], w[ 9], selector); - w[62] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[61] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[60] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[59] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[58] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[57] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[56] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[55] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[54] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[53] = hc_byte_perm_S (w[ 0], 0, selector); - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 54: - w[63] = hc_byte_perm_S (w[ 9], w[ 8], selector); - w[62] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[61] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[60] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[59] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[58] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[57] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[56] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[55] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[54] = hc_byte_perm_S (w[ 0], 0, selector); - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 55: - w[63] = hc_byte_perm_S (w[ 8], w[ 7], selector); - w[62] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[61] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[60] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[59] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[58] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[57] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[56] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[55] = hc_byte_perm_S (w[ 0], 0, selector); - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 56: - w[63] = hc_byte_perm_S (w[ 7], w[ 6], selector); - w[62] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[61] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[60] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[59] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[58] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[57] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[56] = hc_byte_perm_S (w[ 0], 0, selector); - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 57: - w[63] = hc_byte_perm_S (w[ 6], w[ 5], selector); - w[62] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[61] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[60] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[59] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[58] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[57] = hc_byte_perm_S (w[ 0], 0, selector); - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 58: - w[63] = hc_byte_perm_S (w[ 5], w[ 4], selector); - w[62] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[61] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[60] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[59] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[58] = hc_byte_perm_S (w[ 0], 0, selector); - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 59: - w[63] = hc_byte_perm_S (w[ 4], w[ 3], selector); - w[62] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[61] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[60] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[59] = hc_byte_perm_S (w[ 0], 0, selector); - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 60: - w[63] = hc_byte_perm_S (w[ 3], w[ 2], selector); - w[62] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[61] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[60] = hc_byte_perm_S (w[ 0], 0, selector); - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 61: - w[63] = hc_byte_perm_S (w[ 2], w[ 1], selector); - w[62] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[61] = hc_byte_perm_S (w[ 0], 0, selector); - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 62: - w[63] = hc_byte_perm_S (w[ 1], w[ 0], selector); - w[62] = hc_byte_perm_S (w[ 0], 0, selector); - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - - case 63: - w[63] = hc_byte_perm_S (w[ 0], 0, selector); - w[62] = 0; - w[61] = 0; - w[60] = 0; - w[59] = 0; - w[58] = 0; - w[57] = 0; - w[56] = 0; - w[55] = 0; - w[54] = 0; - w[53] = 0; - w[52] = 0; - w[51] = 0; - w[50] = 0; - w[49] = 0; - w[48] = 0; - w[47] = 0; - w[46] = 0; - w[45] = 0; - w[44] = 0; - w[43] = 0; - w[42] = 0; - w[41] = 0; - w[40] = 0; - w[39] = 0; - w[38] = 0; - w[37] = 0; - w[36] = 0; - w[35] = 0; - w[34] = 0; - w[33] = 0; - w[32] = 0; - w[31] = 0; - w[30] = 0; - w[29] = 0; - w[28] = 0; - w[27] = 0; - w[26] = 0; - w[25] = 0; - w[24] = 0; - w[23] = 0; - w[22] = 0; - w[21] = 0; - w[20] = 0; - w[19] = 0; - w[18] = 0; - w[17] = 0; - w[16] = 0; - w[15] = 0; - w[14] = 0; - w[13] = 0; - w[12] = 0; - w[11] = 0; - w[10] = 0; - w[ 9] = 0; - w[ 8] = 0; - w[ 7] = 0; - w[ 6] = 0; - w[ 5] = 0; - w[ 4] = 0; - w[ 3] = 0; - w[ 2] = 0; - w[ 1] = 0; - w[ 0] = 0; - - break; - } - #endif } /** diff --git a/OpenCL/inc_hash_blake2b.cl b/OpenCL/inc_hash_blake2b.cl index b205b18a7..9ec941b10 100644 --- a/OpenCL/inc_hash_blake2b.cl +++ b/OpenCL/inc_hash_blake2b.cl @@ -24,7 +24,7 @@ DECLSPEC u64 blake2b_rot16_S (const u64 a) return out.v64; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv64_t in; @@ -98,7 +98,7 @@ DECLSPEC u64 blake2b_rot24_S (const u64 a) return out.v64; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv64_t in; diff --git a/OpenCL/inc_hash_blake2s.cl b/OpenCL/inc_hash_blake2s.cl index 99d4389fb..d1c2e3716 100644 --- a/OpenCL/inc_hash_blake2s.cl +++ b/OpenCL/inc_hash_blake2s.cl @@ -77,7 +77,7 @@ DECLSPEC u32 blake2s_rot08_S (const u32 a) return out.v32; - #elif (defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1 + #elif (defined IS_AMD || defined IS_HIP) vconv32_t in; diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index 5a8d04f4c..7e4308229 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -781,7 +781,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 src_r00 = src_r0[0]; const u32 src_r01 = src_r0[1]; const u32 src_r02 = src_r0[2]; @@ -882,123 +881,6 @@ DECLSPEC void append_block8_optimized (const u32 offset, PRIVATE_AS u32 *buf0, P s0 = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - const u32 src_r00 = src_r0[0]; - const u32 src_r01 = src_r0[1]; - const u32 src_r02 = src_r0[2]; - const u32 src_r03 = src_r0[3]; - const u32 src_r10 = src_r1[0]; - const u32 src_r11 = src_r1[1]; - const u32 src_r12 = src_r1[2]; - const u32 src_r13 = src_r1[3]; - - switch (offset_switch) - { - case 0: - s7 = hc_byte_perm_S (src_r12, src_r13, selector); - s6 = hc_byte_perm_S (src_r11, src_r12, selector); - s5 = hc_byte_perm_S (src_r10, src_r11, selector); - s4 = hc_byte_perm_S (src_r03, src_r10, selector); - s3 = hc_byte_perm_S (src_r02, src_r03, selector); - s2 = hc_byte_perm_S (src_r01, src_r02, selector); - s1 = hc_byte_perm_S (src_r00, src_r01, selector); - s0 = hc_byte_perm_S ( 0, src_r00, selector); - break; - - case 1: - s7 = hc_byte_perm_S (src_r11, src_r12, selector); - s6 = hc_byte_perm_S (src_r10, src_r11, selector); - s5 = hc_byte_perm_S (src_r03, src_r10, selector); - s4 = hc_byte_perm_S (src_r02, src_r03, selector); - s3 = hc_byte_perm_S (src_r01, src_r02, selector); - s2 = hc_byte_perm_S (src_r00, src_r01, selector); - s1 = hc_byte_perm_S ( 0, src_r00, selector); - s0 = 0; - break; - - case 2: - s7 = hc_byte_perm_S (src_r10, src_r11, selector); - s6 = hc_byte_perm_S (src_r03, src_r10, selector); - s5 = hc_byte_perm_S (src_r02, src_r03, selector); - s4 = hc_byte_perm_S (src_r01, src_r02, selector); - s3 = hc_byte_perm_S (src_r00, src_r01, selector); - s2 = hc_byte_perm_S ( 0, src_r00, selector); - s1 = 0; - s0 = 0; - break; - - case 3: - s7 = hc_byte_perm_S (src_r03, src_r10, selector); - s6 = hc_byte_perm_S (src_r02, src_r03, selector); - s5 = hc_byte_perm_S (src_r01, src_r02, selector); - s4 = hc_byte_perm_S (src_r00, src_r01, selector); - s3 = hc_byte_perm_S ( 0, src_r00, selector); - s2 = 0; - s1 = 0; - s0 = 0; - - break; - - case 4: - s7 = hc_byte_perm_S (src_r02, src_r03, selector); - s6 = hc_byte_perm_S (src_r01, src_r02, selector); - s5 = hc_byte_perm_S (src_r00, src_r01, selector); - s4 = hc_byte_perm_S ( 0, src_r00, selector); - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 5: - s7 = hc_byte_perm_S (src_r01, src_r02, selector); - s6 = hc_byte_perm_S (src_r00, src_r01, selector); - s5 = hc_byte_perm_S ( 0, src_r00, selector); - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 6: - s7 = hc_byte_perm_S (src_r00, src_r01, selector); - s6 = hc_byte_perm_S ( 0, src_r00, selector); - s5 = 0; - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - - case 7: - s7 = hc_byte_perm_S ( 0, src_r00, selector); - s6 = 0; - s5 = 0; - s4 = 0; - s3 = 0; - s2 = 0; - s1 = 0; - s0 = 0; - break; - } - #endif buf0[0] = src_l0[0] | s0; buf0[1] = src_l0[1] | s1; diff --git a/OpenCL/inc_vendor.h b/OpenCL/inc_vendor.h index 40414cbb2..3f4da2fa8 100644 --- a/OpenCL/inc_vendor.h +++ b/OpenCL/inc_vendor.h @@ -121,10 +121,6 @@ using namespace metal; #define IS_GENERIC #endif -#if defined IS_AMD && HAS_VPERM == 1 -#define IS_ROCM -#endif - #define LOCAL_MEM_TYPE_LOCAL 1 #define LOCAL_MEM_TYPE_GLOBAL 2 @@ -159,7 +155,7 @@ using namespace metal; #elif defined IS_CUDA #define DECLSPEC __device__ #elif defined IS_HIP -#define DECLSPEC __device__ +#define DECLSPEC __device__ HC_INLINE #else #define DECLSPEC #endif @@ -190,11 +186,6 @@ using namespace metal; #define USE_ROTATE #endif -#ifdef IS_ROCM -#define USE_BITSELECT -#define USE_ROTATE -#endif - #ifdef IS_INTEL_SDK #ifdef IS_CPU //#define USE_BITSELECT diff --git a/OpenCL/m00500-optimized.cl b/OpenCL/m00500-optimized.cl index d7b7f57d2..e91fdee6d 100644 --- a/OpenCL/m00500-optimized.cl +++ b/OpenCL/m00500-optimized.cl @@ -32,43 +32,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); const u32 div = offset / 4; @@ -145,45 +118,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); const u32 div = offset / 4; @@ -258,35 +203,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m01600-optimized.cl b/OpenCL/m01600-optimized.cl index d113243e1..47ec5ba37 100644 --- a/OpenCL/m01600-optimized.cl +++ b/OpenCL/m01600-optimized.cl @@ -31,44 +31,17 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif - + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); + const u32 div = offset / 4; switch (div) @@ -144,45 +117,17 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); const u32 div = offset / 4; @@ -257,35 +202,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 37bbcf883..6465f1457 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -231,47 +231,18 @@ DECLSPEC void append_salt (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp4; u32 tmp5; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = append[4]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - tmp5 = hc_bytealign (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = append[4]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - tmp5 = hc_byte_perm (in4, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); + tmp5 = hc_bytealign_S (in4, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m06300-optimized.cl b/OpenCL/m06300-optimized.cl index 39865927c..cdbd5179a 100644 --- a/OpenCL/m06300-optimized.cl +++ b/OpenCL/m06300-optimized.cl @@ -28,43 +28,16 @@ DECLSPEC void memcat16 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_ u32 tmp3; u32 tmp4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, 0, offset); const u32 div = offset / 4; @@ -140,47 +113,18 @@ DECLSPEC void memcat16_x80 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIV u32 tmp2; u32 tmp3; u32 tmp4; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; u32 in2 = append[2]; u32 in3 = append[3]; u32 in4 = 0x80; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, in2, offset); - tmp3 = hc_bytealign (in2, in3, offset); - tmp4 = hc_bytealign (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - u32 in2 = append[2]; - u32 in3 = append[3]; - u32 in4 = 0x80; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, in2, selector); - tmp3 = hc_byte_perm (in2, in3, selector); - tmp4 = hc_byte_perm (in3, in4, selector); - #endif - + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, in2, offset); + tmp3 = hc_bytealign_S (in2, in3, offset); + tmp4 = hc_bytealign_S (in3, in4, offset); + const u32 div = offset / 4; switch (div) @@ -254,35 +198,12 @@ DECLSPEC void memcat8 (PRIVATE_AS u32 *block0, PRIVATE_AS u32 *block1, PRIVATE_A u32 tmp1; u32 tmp2; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC u32 in0 = append[0]; u32 in1 = append[1]; - tmp0 = hc_bytealign ( 0, in0, offset); - tmp1 = hc_bytealign (in0, in1, offset); - tmp2 = hc_bytealign (in1, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - const int offset_mod_4 = offset & 3; - - const int offset_minus_4 = 4 - offset_mod_4; - - #if defined IS_NV - const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (offset_minus_4 * 8)); - #endif - - u32 in0 = append[0]; - u32 in1 = append[1]; - - tmp0 = hc_byte_perm ( 0, in0, selector); - tmp1 = hc_byte_perm (in0, in1, selector); - tmp2 = hc_byte_perm (in1, 0, selector); - #endif + tmp0 = hc_bytealign_S ( 0, in0, offset); + tmp1 = hc_bytealign_S (in0, in1, offset); + tmp2 = hc_bytealign_S (in1, 0, offset); const u32 div = offset / 4; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index 1a3ea413c..a2db229c8 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -45,30 +45,11 @@ DECLSPEC u32 memcat16 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); switch (offset / 4) { @@ -172,30 +153,11 @@ DECLSPEC u32 memcat16c (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); u32 carry[4] = { 0 }; @@ -336,32 +298,12 @@ DECLSPEC u32 memcat16s (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS cons u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, in4, offset); - const u32 tmp5 = hc_bytealign_be (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); + const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); switch (offset / 4) { @@ -477,32 +419,12 @@ DECLSPEC u32 memcat16sc (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS con u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 tmp0 = hc_bytealign_be ( 0, in0, offset); - const u32 tmp1 = hc_bytealign_be (in0, in1, offset); - const u32 tmp2 = hc_bytealign_be (in1, in2, offset); - const u32 tmp3 = hc_bytealign_be (in2, in3, offset); - const u32 tmp4 = hc_bytealign_be (in3, in4, offset); - const u32 tmp5 = hc_bytealign_be (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif + const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); + const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); + const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); + const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); + const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); + const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); u32 carry[5] = { 0 }; @@ -784,30 +706,11 @@ DECLSPEC u32 memcat20 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in2 = append[2]; u32 in3 = append[3]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (0, in3, selector); - #endif switch (offset / 4) { @@ -950,30 +853,11 @@ DECLSPEC u32 memcat20_x80 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c u32 in3 = append[3]; u32 in4 = 0x80000000; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - #endif switch (offset / 4) { @@ -1116,32 +1000,12 @@ DECLSPEC u32 memcat24 (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 in3 = append[3]; u32 in4 = append[4]; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC const u32 tmp0 = hc_bytealign_be_S ( 0, in0, offset); const u32 tmp1 = hc_bytealign_be_S (in0, in1, offset); const u32 tmp2 = hc_bytealign_be_S (in1, in2, offset); const u32 tmp3 = hc_bytealign_be_S (in2, in3, offset); const u32 tmp4 = hc_bytealign_be_S (in3, in4, offset); const u32 tmp5 = hc_bytealign_be_S (in4, 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - const u32 tmp0 = hc_byte_perm_S (in0, 0, selector); - const u32 tmp1 = hc_byte_perm_S (in1, in0, selector); - const u32 tmp2 = hc_byte_perm_S (in2, in1, selector); - const u32 tmp3 = hc_byte_perm_S (in3, in2, selector); - const u32 tmp4 = hc_byte_perm_S (in4, in3, selector); - const u32 tmp5 = hc_byte_perm_S (0, in4, selector); - #endif switch (offset / 4) { diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index b665dbbf3..7a958394a 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -234,34 +234,13 @@ DECLSPEC void make_sc (LOCAL_AS u32 *sc, PRIVATE_AS const u32 *pw, const u32 pw_ u32 i; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC for (i = 0; i < pd; i++) sc[idx++] = pw[i]; sc[idx++] = pw[i] - | hc_bytealign_be (bl[0], 0, pm4); - for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be (bl[i], bl[i - 1], pm4); - sc[idx++] = hc_bytealign_be (sc[0], bl[i - 1], pm4); - for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be (sc[i], sc[i - 1], pm4); - sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((pm4 & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((pm4 & 3) * 8)); - #endif - - for (i = 0; i < pd; i++) sc[idx++] = pw[i]; - sc[idx++] = pw[i] - | hc_byte_perm ( 0, bl[0], selector); - for (i = 1; i < bd; i++) sc[idx++] = hc_byte_perm (bl[i - 1], bl[i], selector); - sc[idx++] = hc_byte_perm (bl[i - 1], sc[0], selector); - for (i = 1; i < 4; i++) sc[idx++] = hc_byte_perm (sc[i - 1], sc[i], selector); - sc[idx++] = hc_byte_perm (sc[i - 1], 0, selector); - #endif + | hc_bytealign_be_S (bl[0], 0, pm4); + for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be_S (bl[i], bl[i - 1], pm4); + sc[idx++] = hc_bytealign_be_S (sc[0], bl[i - 1], pm4); + for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be_S (sc[i], sc[i - 1], pm4); + sc[idx++] = hc_bytealign_be_S ( 0, sc[i - 1], pm4); } } @@ -272,27 +251,10 @@ DECLSPEC void make_pt_with_offset (PRIVATE_AS u32 *pt, const u32 offset, LOCAL_A const u32 om = m % 4; const u32 od = m / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om); - pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om); - pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om); - pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((om & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((om & 3) * 8)); - #endif - pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector); - pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector); - pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector); - pt[3] = hc_byte_perm (sc[od + 3], sc[od + 4], selector); - #endif + pt[0] = hc_bytealign_be_S (sc[od + 1], sc[od + 0], om); + pt[1] = hc_bytealign_be_S (sc[od + 2], sc[od + 1], om); + pt[2] = hc_bytealign_be_S (sc[od + 3], sc[od + 2], om); + pt[3] = hc_bytealign_be_S (sc[od + 4], sc[od + 3], om); } DECLSPEC void make_w_with_offset (PRIVATE_AS ctx_t *ctx, const u32 W_len, const u32 offset, LOCAL_AS const u32 *sc, const u32 pwbl_len, PRIVATE_AS u32 *iv, PRIVATE_AS const u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m11600-pure.cl b/OpenCL/m11600-pure.cl index 85e59a4da..44d6e3ff7 100644 --- a/OpenCL/m11600-pure.cl +++ b/OpenCL/m11600-pure.cl @@ -42,24 +42,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m12500-pure.cl b/OpenCL/m12500-pure.cl index 9ae20dc50..9738ca3bb 100644 --- a/OpenCL/m12500-pure.cl +++ b/OpenCL/m12500-pure.cl @@ -37,24 +37,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m13800_a0-optimized.cl b/OpenCL/m13800_a0-optimized.cl index 2562fac9b..440fc322a 100644 --- a/OpenCL/m13800_a0-optimized.cl +++ b/OpenCL/m13800_a0-optimized.cl @@ -51,7 +51,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -69,36 +68,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m13800_a1-optimized.cl b/OpenCL/m13800_a1-optimized.cl index 2ea23421b..09b8e9eaa 100644 --- a/OpenCL/m13800_a1-optimized.cl +++ b/OpenCL/m13800_a1-optimized.cl @@ -49,7 +49,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -67,36 +66,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m13800_a3-optimized.cl b/OpenCL/m13800_a3-optimized.cl index d79a9e8f7..a99473ebd 100644 --- a/OpenCL/m13800_a3-optimized.cl +++ b/OpenCL/m13800_a3-optimized.cl @@ -48,7 +48,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS u32x tmp15; u32x tmp16; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC tmp00 = hc_bytealign_be ( 0, carry[ 0], offset); tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset); tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset); @@ -66,36 +65,6 @@ DECLSPEC void memcat64c_be (PRIVATE_AS u32x *block, const u32 offset, PRIVATE_AS tmp14 = hc_bytealign_be (carry[13], carry[14], offset); tmp15 = hc_bytealign_be (carry[14], carry[15], offset); tmp16 = hc_bytealign_be (carry[15], 0, offset); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - tmp00 = hc_byte_perm (carry[ 0], 0, selector); - tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector); - tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector); - tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector); - tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector); - tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector); - tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector); - tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector); - tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector); - tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector); - tmp10 = hc_byte_perm (carry[10], carry[ 9], selector); - tmp11 = hc_byte_perm (carry[11], carry[10], selector); - tmp12 = hc_byte_perm (carry[12], carry[11], selector); - tmp13 = hc_byte_perm (carry[13], carry[12], selector); - tmp14 = hc_byte_perm (carry[14], carry[13], selector); - tmp15 = hc_byte_perm (carry[15], carry[14], selector); - tmp16 = hc_byte_perm ( 0, carry[15], selector); - #endif carry[ 0] = 0; carry[ 1] = 0; diff --git a/OpenCL/m17010-pure.cl b/OpenCL/m17010-pure.cl index 4c4e41571..c3ebe645e 100644 --- a/OpenCL/m17010-pure.cl +++ b/OpenCL/m17010-pure.cl @@ -42,31 +42,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -74,11 +49,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -188,11 +163,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17020-pure.cl b/OpenCL/m17020-pure.cl index 29fda3bfa..0ec95fd3f 100644 --- a/OpenCL/m17020-pure.cl +++ b/OpenCL/m17020-pure.cl @@ -47,31 +47,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -79,11 +54,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -193,11 +168,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17030-pure.cl b/OpenCL/m17030-pure.cl index b31f2eeb2..0b97696e5 100644 --- a/OpenCL/m17030-pure.cl +++ b/OpenCL/m17030-pure.cl @@ -43,31 +43,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -189,11 +164,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); memzero_le_S (decoded_data, sha1_byte_off, 384 * sizeof(u32)); diff --git a/OpenCL/m17040-pure.cl b/OpenCL/m17040-pure.cl index 020ab6b64..dea375187 100644 --- a/OpenCL/m17040-pure.cl +++ b/OpenCL/m17040-pure.cl @@ -43,31 +43,6 @@ typedef struct gpg_tmp } gpg_tmp_t; - -DECLSPEC u32 hc_bytealign_le_S (const u32 a, const u32 b, const int c) -{ - const int c_mod_4 = c & 3; - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - const u32 r = l32_from_64_S ((v64_from_v32ab_S (b, a) >> (c_mod_4 * 8))); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> (c_mod_4 * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> (c_mod_4 * 8)); - #endif - - const u32 r = hc_byte_perm (b, a, selector); - #endif - - return r; -} - DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS const u32 *append, u32 len) { const u32 start_index = (offset - 1) >> 2; @@ -75,11 +50,11 @@ DECLSPEC void memcat_le_S (PRIVATE_AS u32 *block, const u32 offset, PRIVATE_AS c const int off_mod_4 = offset & 3; const int off_minus_4 = 4 - off_mod_4; - block[start_index] |= hc_bytealign_le_S (append[0], 0, off_minus_4); + block[start_index] |= hc_bytealign_be_S (append[0], 0, off_minus_4); for (u32 idx = 1; idx < count; idx++) { - block[start_index + idx] = hc_bytealign_le_S (append[idx], append[idx - 1], off_minus_4); + block[start_index + idx] = hc_bytealign_be_S (append[idx], append[idx - 1], off_minus_4); } } @@ -168,11 +143,11 @@ DECLSPEC int check_decoded_data (PRIVATE_AS u32 *decoded_data, const u32 decoded u32 expected_sha1[5]; - expected_sha1[0] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); - expected_sha1[1] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); - expected_sha1[2] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); - expected_sha1[3] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); - expected_sha1[4] = hc_bytealign_le_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); + expected_sha1[0] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 1], decoded_data[sha1_u32_off + 0], sha1_byte_off); + expected_sha1[1] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 2], decoded_data[sha1_u32_off + 1], sha1_byte_off); + expected_sha1[2] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 3], decoded_data[sha1_u32_off + 2], sha1_byte_off); + expected_sha1[3] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 4], decoded_data[sha1_u32_off + 3], sha1_byte_off); + expected_sha1[4] = hc_bytealign_be_S (decoded_data[sha1_u32_off + 5], decoded_data[sha1_u32_off + 4], sha1_byte_off); diff --git a/OpenCL/m23700-pure.cl b/OpenCL/m23700-pure.cl index 4e520a5cd..1a5fe7504 100644 --- a/OpenCL/m23700-pure.cl +++ b/OpenCL/m23700-pure.cl @@ -145,24 +145,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m23800-pure.cl b/OpenCL/m23800-pure.cl index 71bd9a50f..78f98eed6 100644 --- a/OpenCL/m23800-pure.cl +++ b/OpenCL/m23800-pure.cl @@ -58,24 +58,8 @@ DECLSPEC void memcat8c_be (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u3 u32 tmp0; u32 tmp1; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC - tmp0 = hc_bytealign_be (0, append, func_len); - tmp1 = hc_bytealign_be (append, 0, func_len); - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S (0x0706050403020100UL >> ((func_len & 3) * 8)); - #endif - - tmp0 = hc_byte_perm (append, 0, selector); - tmp1 = hc_byte_perm (0, append, selector); - #endif + tmp0 = hc_bytealign_be_S (0, append, func_len); + tmp1 = hc_bytealign_be_S (append, 0, func_len); u32 carry = 0; diff --git a/OpenCL/m31400_a0-optimized.cl b/OpenCL/m31400_a0-optimized.cl index 3d386cb7c..2d1155546 100644 --- a/OpenCL/m31400_a0-optimized.cl +++ b/OpenCL/m31400_a0-optimized.cl @@ -29,7 +29,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -67,56 +66,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a0-pure.cl b/OpenCL/m31400_a0-pure.cl index 50a005e33..dfeb887ad 100644 --- a/OpenCL/m31400_a0-pure.cl +++ b/OpenCL/m31400_a0-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a1-optimized.cl b/OpenCL/m31400_a1-optimized.cl index aba0ad4fa..02e00eee8 100644 --- a/OpenCL/m31400_a1-optimized.cl +++ b/OpenCL/m31400_a1-optimized.cl @@ -26,7 +26,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -64,56 +63,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a1-pure.cl b/OpenCL/m31400_a1-pure.cl index 96774ca7b..a45dbff6f 100644 --- a/OpenCL/m31400_a1-pure.cl +++ b/OpenCL/m31400_a1-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a3-optimized.cl b/OpenCL/m31400_a3-optimized.cl index bc30d42c3..40eb9753e 100644 --- a/OpenCL/m31400_a3-optimized.cl +++ b/OpenCL/m31400_a3-optimized.cl @@ -28,7 +28,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -66,56 +65,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) diff --git a/OpenCL/m31400_a3-pure.cl b/OpenCL/m31400_a3-pure.cl index 09d450df9..861cef146 100644 --- a/OpenCL/m31400_a3-pure.cl +++ b/OpenCL/m31400_a3-pure.cl @@ -31,7 +31,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) { const int offset_switch = offset / 4; - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 0) || defined IS_GENERIC switch (offset_switch) { case 0: @@ -69,56 +68,6 @@ DECLSPEC void shift_buffer_by_offset (PRIVATE_AS u32 *w0, const u32 offset) w0[0] = 0; break; } - #endif - - #if ((defined IS_AMD || defined IS_HIP) && HAS_VPERM == 1) || defined IS_NV - - #if defined IS_NV - const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff; - #endif - - #if (defined IS_AMD || defined IS_HIP) - const int selector = l32_from_64_S(0x0706050403020100UL >> ((offset & 3) * 8)); - #endif - - switch (offset_switch) - { - case 0: - w0[3] = hc_byte_perm_S (w0[3], w0[2], selector); - w0[2] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[1] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[0] = hc_byte_perm_S (w0[0], 0, selector); - break; - - case 1: - w0[3] = hc_byte_perm_S (w0[2], w0[1], selector); - w0[2] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[1] = hc_byte_perm_S (w0[0], 0, selector); - w0[0] = 0; - break; - - case 2: - w0[3] = hc_byte_perm_S (w0[1], w0[0], selector); - w0[2] = hc_byte_perm_S (w0[0], 0, selector); - w0[1] = 0; - w0[0] = 0; - break; - - case 3: - w0[3] = hc_byte_perm_S (w0[0], 0, selector); - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - - default: - w0[3] = 0; - w0[2] = 0; - w0[1] = 0; - w0[0] = 0; - break; - } - #endif } DECLSPEC void aes256_scrt_format (PRIVATE_AS u32 *aes_ks, PRIVATE_AS u32 *pw, const u32 pw_len, PRIVATE_AS u32 *hash, PRIVATE_AS u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) From 2532b83c22bfcc0dacb54bbc73e97917d519efbb Mon Sep 17 00:00:00 2001 From: hashcat-bot Date: Thu, 10 Jul 2025 21:21:14 +0200 Subject: [PATCH 37/57] Update license.txt --- docs/license.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/license.txt b/docs/license.txt index 88a5d2361..335df3b5b 100644 --- a/docs/license.txt +++ b/docs/license.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2015-2024 Jens Steube +Copyright (c) 2015-2025 Jens Steube Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From d4cefed0a7462c44d247a1366043dee167978529 Mon Sep 17 00:00:00 2001 From: PenguinKeeper7 Date: Thu, 10 Jul 2025 21:24:44 +0100 Subject: [PATCH 38/57] Search for more Electrum prv key prefixes --- OpenCL/m16600_a0-optimized.cl | 20 ++++++++++++++++++-- OpenCL/m16600_a0-pure.cl | 20 ++++++++++++++++++-- OpenCL/m16600_a1-optimized.cl | 20 ++++++++++++++++++-- OpenCL/m16600_a1-pure.cl | 20 ++++++++++++++++++-- OpenCL/m16600_a3-optimized.cl | 10 +++++++++- OpenCL/m16600_a3-pure.cl | 20 ++++++++++++++++++-- 6 files changed, 99 insertions(+), 11 deletions(-) diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl index 1d3b4871f..7cca797c1 100644 --- a/OpenCL/m16600_a0-optimized.cl +++ b/OpenCL/m16600_a0-optimized.cl @@ -392,7 +392,15 @@ KERNEL_FQ KERNEL_FA void m16600_m04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; @@ -804,7 +812,15 @@ KERNEL_FQ KERNEL_FA void m16600_s04 (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; diff --git a/OpenCL/m16600_a0-pure.cl b/OpenCL/m16600_a0-pure.cl index 40655d3b6..5e11ac656 100644 --- a/OpenCL/m16600_a0-pure.cl +++ b/OpenCL/m16600_a0-pure.cl @@ -206,7 +206,15 @@ KERNEL_FQ KERNEL_FA void m16600_mxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; @@ -424,7 +432,15 @@ KERNEL_FQ KERNEL_FA void m16600_sxx (KERN_ATTR_RULES_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl index 0a51cd748..c8b9ee3de 100644 --- a/OpenCL/m16600_a1-optimized.cl +++ b/OpenCL/m16600_a1-optimized.cl @@ -448,7 +448,15 @@ KERNEL_FQ KERNEL_FA void m16600_m04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; @@ -918,7 +926,15 @@ KERNEL_FQ KERNEL_FA void m16600_s04 (KERN_ATTR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; diff --git a/OpenCL/m16600_a1-pure.cl b/OpenCL/m16600_a1-pure.cl index b654820c4..20fe1aebc 100644 --- a/OpenCL/m16600_a1-pure.cl +++ b/OpenCL/m16600_a1-pure.cl @@ -202,7 +202,15 @@ KERNEL_FQ KERNEL_FA void m16600_mxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; @@ -418,7 +426,15 @@ KERNEL_FQ KERNEL_FA void m16600_sxx (KERN_ATTR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; diff --git a/OpenCL/m16600_a3-optimized.cl b/OpenCL/m16600_a3-optimized.cl index 45760ab33..447a8a14c 100644 --- a/OpenCL/m16600_a3-optimized.cl +++ b/OpenCL/m16600_a3-optimized.cl @@ -167,7 +167,15 @@ DECLSPEC void m16600 (SHM_TYPE u32a *s_te0, SHM_TYPE u32a *s_te1, SHM_TYPE u32a if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; diff --git a/OpenCL/m16600_a3-pure.cl b/OpenCL/m16600_a3-pure.cl index a611b6759..bf97a1db7 100644 --- a/OpenCL/m16600_a3-pure.cl +++ b/OpenCL/m16600_a3-pure.cl @@ -215,7 +215,15 @@ KERNEL_FQ KERNEL_FA void m16600_mxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; @@ -444,7 +452,15 @@ KERNEL_FQ KERNEL_FA void m16600_sxx (KERN_ATTR_VECTOR_ESALT (electrum_wallet_t)) if (salt_type == 2) { - if ((u8) (out[0] >> 0) != 'x') continue; + u8 version = (u8) (out[0] >> 0); + + // https://github.com/spesmilo/electrum-docs/blob/master/xpub_version_bytes.rst + // Does not include testnet addresses + if (version != 'x' && + version != 'y' && + version != 'Y' && + version != 'z' && + version != 'Z' ) continue; if ((u8) (out[0] >> 8) != 'p') continue; if ((u8) (out[0] >> 16) != 'r') continue; if ((u8) (out[0] >> 24) != 'v') continue; From 278dac2dd359836f79123a008de709fa082e8576 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Thu, 10 Jul 2025 22:41:20 +0200 Subject: [PATCH 39/57] show some warnings only if quiet and machine_readable options are set to false --- src/backend.c | 16 +++++++---- src/bridges/bridge_python_generic_hash_mp.c | 24 ++++++++++++----- src/bridges/bridge_python_generic_hash_sp.c | 30 ++++++++++++++------- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/backend.c b/src/backend.c index e9e7a5572..317be0168 100644 --- a/src/backend.c +++ b/src/backend.c @@ -10217,11 +10217,17 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) const u64 device_available_mem_new = device_available_mem_sav - (device_available_mem_sav * 0.34); - event_log_warning (hashcat_ctx, "* Device #%u: This system does not offer any reliable method to query actual free memory. Estimated base: %" PRIu64, device_id + 1, device_available_mem_sav); - event_log_warning (hashcat_ctx, " Assuming normal desktop activity, reducing estimate by 34%%: %" PRIu64, device_available_mem_new); - event_log_warning (hashcat_ctx, " This can hurt performance drastically, especially on memory-heavy algorithms."); - event_log_warning (hashcat_ctx, " You can adjust this percentage using --backend-devices-keepfree"); - event_log_warning (hashcat_ctx, NULL); + if (user_options->quiet == false) + { + if (user_options->machine_readable == false) + { + event_log_warning (hashcat_ctx, "* Device #%u: This system does not offer any reliable method to query actual free memory. Estimated base: %" PRIu64, device_id + 1, device_available_mem_sav); + event_log_warning (hashcat_ctx, " Assuming normal desktop activity, reducing estimate by 34%%: %" PRIu64, device_available_mem_new); + event_log_warning (hashcat_ctx, " This can hurt performance drastically, especially on memory-heavy algorithms."); + event_log_warning (hashcat_ctx, " You can adjust this percentage using --backend-devices-keepfree"); + event_log_warning (hashcat_ctx, NULL); + } + } device_param->device_available_mem = device_available_mem_new; } diff --git a/src/bridges/bridge_python_generic_hash_mp.c b/src/bridges/bridge_python_generic_hash_mp.c index 4dcfe0ea5..3b15e59c1 100644 --- a/src/bridges/bridge_python_generic_hash_mp.c +++ b/src/bridges/bridge_python_generic_hash_mp.c @@ -336,7 +336,7 @@ static int resolve_pyenv_libpath (char *out_buf, const size_t out_sz) return -1; } -static bool init_python (hc_python_lib_t *python) +static bool init_python (hc_python_lib_t *python, user_options_t *user_options) { char pythondll_path[PATH_MAX]; @@ -526,7 +526,13 @@ static bool init_python (hc_python_lib_t *python) } else { - printf ("Loaded python library from: %s\n\n", pythondll_path); + if (user_options->quiet == false) + { + if (user_options->machine_readable == false) + { + printf ("Loaded python library from: %s\n\n", pythondll_path); + } + } } #define HC_LOAD_FUNC_PYTHON(ptr,name,pythonname,type,libname,noerr) \ @@ -694,7 +700,7 @@ void *platform_init (user_options_t *user_options) python_interpreter->python = python; - if (init_python (python) == false) return NULL; + if (init_python (python, user_options) == false) return NULL; python->Py_Initialize (); @@ -714,9 +720,15 @@ void *platform_init (user_options_t *user_options) unit_t *unit_buf = &python_interpreter->units_buf[0]; #if defined (_WIN) || defined (__CYGWIN__) || defined (__APPLE__) - fprintf (stderr, "Attention!!! Falling back to single-threaded mode.\n"); - fprintf (stderr, " Windows and MacOS ds not support multiprocessing module cleanly!\n"); - fprintf (stderr, " For multithreading on Windows and MacOS, please use -m 72000 instead.\n\n"); + if (user_options->quiet == false) + { + if (user_options->machine_readable == false) + { + fprintf (stderr, "Attention!!! Falling back to single-threaded mode.\n"); + fprintf (stderr, " Windows and MacOS ds not support multiprocessing module cleanly!\n"); + fprintf (stderr, " For multithreading on Windows and MacOS, please use -m 72000 instead.\n\n"); + } + } #endif python_interpreter->source_filename = (user_options->bridge_parameter1 == NULL) ? DEFAULT_SOURCE_FILENAME : user_options->bridge_parameter1; diff --git a/src/bridges/bridge_python_generic_hash_sp.c b/src/bridges/bridge_python_generic_hash_sp.c index 3d2dbb994..1c46b4086 100644 --- a/src/bridges/bridge_python_generic_hash_sp.c +++ b/src/bridges/bridge_python_generic_hash_sp.c @@ -330,7 +330,7 @@ static int resolve_pyenv_libpath (char *out_buf, const size_t out_sz) return -1; } -static bool init_python (hc_python_lib_t *python) +static bool init_python (hc_python_lib_t *python, user_options_t *user_options) { char pythondll_path[PATH_MAX]; @@ -525,18 +525,30 @@ static bool init_python (hc_python_lib_t *python) } else { - printf ("Loaded python library from: %s\n\n", pythondll_path); + if (user_options->quiet == false) + { + if (user_options->machine_readable == false) + { + printf ("Loaded python library from: %s\n\n", pythondll_path); + } + } } #if defined (_WIN) || defined (__CYGWIN__) || defined (__APPLE__) #else - fprintf (stderr, "Attention!!! The 'free-threaded' python library has some major downsides.\n"); - fprintf (stderr, " The main purpose of this module is to give Windows and macOS users a multithreading option.\n"); - fprintf (stderr, " It seems to be a lot slower, and relevant modules such as `cffi` are incompatibile.\n"); - fprintf (stderr, " Since your are on Linux we highly recommend to stick to multiprocessing module.\n"); - fprintf (stderr, " Maybe 'free-threaded' mode will become more mature in the future.\n"); - fprintf (stderr, " For now, we high recommend to stick to -m 73000 instead.\n\n"); + if (user_options->quiet == false) + { + if (user_options->machine_readable == false) + { + fprintf (stderr, "Attention!!! The 'free-threaded' python library has some major downsides.\n"); + fprintf (stderr, " The main purpose of this module is to give Windows and macOS users a multithreading option.\n"); + fprintf (stderr, " It seems to be a lot slower, and relevant modules such as `cffi` are incompatibile.\n"); + fprintf (stderr, " Since your are on Linux we highly recommend to stick to multiprocessing module.\n"); + fprintf (stderr, " Maybe 'free-threaded' mode will become more mature in the future.\n"); + fprintf (stderr, " For now, we high recommend to stick to -m 73000 instead.\n\n"); + } + } #endif #define HC_LOAD_FUNC_PYTHON(ptr,name,pythonname,type,libname,noerr) \ @@ -696,7 +708,7 @@ void *platform_init (user_options_t *user_options) python_interpreter->python = python; - if (init_python (python) == false) return NULL; + if (init_python (python, user_options) == false) return NULL; python->Py_Initialize (); From a5fd16d9cd775a95aa331e39554a86af8bb08c4c Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Thu, 10 Jul 2025 23:10:04 +0200 Subject: [PATCH 40/57] fix module_constraints (Optimized-Mode-PW-Constraints) for -m 400 --- tools/test_modules/m00400.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m00400.pm b/tools/test_modules/m00400.pm index c6073edd5..3e95c9eed 100644 --- a/tools/test_modules/m00400.pm +++ b/tools/test_modules/m00400.pm @@ -12,7 +12,7 @@ use warnings; use Authen::Passphrase::PHPass; -sub module_constraints { [[0, 256], [8, 8], [0, 55], [8, 8], [-1, -1]] } +sub module_constraints { [[0, 256], [8, 8], [0, 39], [8, 8], [-1, -1]] } sub module_generate_hash { From 6200d3cc591acee674e951d6bcb2a16d352bf5b7 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Thu, 10 Jul 2025 23:25:03 +0200 Subject: [PATCH 41/57] fix min salt (username) len with hash-mode 3100 --- src/modules/module_03100.c | 2 +- tools/test_modules/m03100.pm | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/modules/module_03100.c b/src/modules/module_03100.c index f5b77a395..7cffab6a6 100644 --- a/src/modules/module_03100.c +++ b/src/modules/module_03100.c @@ -84,7 +84,7 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE token.attr[0] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX; - token.len_min[1] = 0; + token.len_min[1] = 1; token.len_max[1] = 30; token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH; diff --git a/tools/test_modules/m03100.pm b/tools/test_modules/m03100.pm index 515103df7..bf87c8fd7 100644 --- a/tools/test_modules/m03100.pm +++ b/tools/test_modules/m03100.pm @@ -10,7 +10,7 @@ use warnings; use Crypt::CBC; -sub module_constraints { [[-1, -1], [-1, -1], [0, 30], [0, 30], [-1, -1]] } +sub module_constraints { [[-1, -1], [-1, -1], [0, 30], [1, 30], [-1, -1]] } sub module_generate_hash { From 1d4d9db45b377feeeb45c610023bc27ea296bf1c Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Fri, 11 Jul 2025 00:53:06 +0200 Subject: [PATCH 42/57] fix bug on Makefile with detection of Apple Silicon systems --- src/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index cf85746aa..575c23589 100644 --- a/src/Makefile +++ b/src/Makefile @@ -85,7 +85,7 @@ AR := /usr/bin/ar SED := /usr/bin/sed SED_IN_PLACE := -i "" DARWIN_VERSION := $(shell uname -r | cut -d. -f1) -IS_APPLE_SILICON := $(shell lipo /bin/zsh -verify_arch arm64e && echo 1 || echo 0) +IS_APPLE_SILICON := $(shell [ "$$(sysctl -in hw.optional.arm64 2>/dev/null)" = "1" ] && echo 1 || echo 0) endif ifneq (,$(filter $(UNAME),FreeBSD NetBSD)) @@ -237,8 +237,10 @@ CFLAGS_UNRAR += -Wno-class-memaccess CFLAGS_UNRAR += -Wno-misleading-indentation CFLAGS_UNRAR += -Wno-format-overflow else +ifeq ($(IS_APPLE_SILICON),0) CFLAGS_UNRAR += -Wno-nontrivial-memcall endif +endif CFLAGS_UNRAR += -Wno-missing-braces CFLAGS_UNRAR += -Wno-unused-variable CFLAGS_UNRAR += -Wno-unused-parameter From d9d57fc9a0b4fafcc51ba77ad1ba45ba0df23ad8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 11 Jul 2025 10:08:08 +0200 Subject: [PATCH 43/57] Fixed autotune edge case and encoder bugs Improved handling of an autotune edge case. In theory, increasing accel early can improve accuracy, and it does, but it also prevents increasing the thread count because it's more likely to run into high runtime limits. OTOH, we want to prioritize threads over accel. This change may slightly reduce performance for algorithms that benefit from high accel and low thread counts (e.g., 7800, 14900), but those can be managed by limiting thread count or, preferably, by setting OPTS_TYPE_NATIVE_THREADS. Added OPTS_TYPE_NATIVE_THREADS to 7800, 7810, and 14900. Also fixed encoder bugs in hash-mode 29920 and 29940, identified using the new test_edge.sh script. The encoders in the modules failed to properly terminate the output string. --- src/autotune.c | 9 +++++++++ src/modules/module_07800.c | 1 + src/modules/module_07801.c | 1 + src/modules/module_14900.c | 1 + src/modules/module_29920.c | 2 ++ src/modules/module_29940.c | 2 ++ 6 files changed, 16 insertions(+) diff --git a/src/autotune.c b/src/autotune.c index cf233a88f..4c8566f3b 100644 --- a/src/autotune.c +++ b/src/autotune.c @@ -334,6 +334,14 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param // v7 autotuner is a lot more straight forward // we start with some purely theoretical values as a base, then move on to some meassured tests + /* This causes more problems than it solves. + * In theory, it's fine to boost accel early to improve accuracy, and it does, + * but on the other hand, it prevents increasing the thread count due to high runtime. + * For longer runtimes, we want to prioritize more threads over higher accel. + * This change also has some downsides for algorithms that actually benefit + * from higher accel and fewer threads (e.g., 7800, 14900). But those are easy to manage + * by limiting thread count, or better, by setting them to OPTS_TYPE_NATIVE_THREADS. + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { if (kernel_accel_min < kernel_accel_max) @@ -348,6 +356,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param } } } + */ if (kernel_threads_min < kernel_threads_max) { diff --git a/src/modules/module_07800.c b/src/modules/module_07800.c index ce94bead4..cd45730f4 100644 --- a/src/modules/module_07800.c +++ b/src/modules/module_07800.c @@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_BE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_ST_ADD80 | OPTS_TYPE_ST_UPPER; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; diff --git a/src/modules/module_07801.c b/src/modules/module_07801.c index 17f8777d4..2696dfbe2 100644 --- a/src/modules/module_07801.c +++ b/src/modules/module_07801.c @@ -24,6 +24,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_BE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_ST_ADD80 | OPTS_TYPE_ST_UPPER; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; diff --git a/src/modules/module_14900.c b/src/modules/module_14900.c index fcc639688..38fee41dd 100644 --- a/src/modules/module_14900.c +++ b/src/modules/module_14900.c @@ -23,6 +23,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_SUGGEST_KG; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *BENCHMARK_MASK = "?b?b?b?b?bxxxxx"; diff --git a/src/modules/module_29920.c b/src/modules/module_29920.c index 6cf4a3611..63d170526 100644 --- a/src/modules/module_29920.c +++ b/src/modules/module_29920.c @@ -245,6 +245,8 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE u32_to_hex (byte_swap_32 (encdatavault->keychain[i]), (u8 *) tmp_buf + j); } + tmp_buf[32 * 8] = 0; + const int line_len = snprintf (line_buf, line_size, "%s%u$%u$%08x%08x$%08x%08x$32$%08x%08x%08x%08x%08x%08x%08x%08x$%u$%s", SIGNATURE_ENCDATAVAULT, encdatavault->version, diff --git a/src/modules/module_29940.c b/src/modules/module_29940.c index aa2791475..4abcc0a0f 100644 --- a/src/modules/module_29940.c +++ b/src/modules/module_29940.c @@ -207,6 +207,8 @@ int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE u32_to_hex (byte_swap_32 (encdatavault->keychain[i]), (u8 *) tmp_buf + j); } + tmp_buf[32 * 8] = 0; + const int line_len = snprintf (line_buf, line_size, "%s%u$%u$%08x%08x$%08x%08x$%s", SIGNATURE_ENCDATAVAULT, encdatavault->version, From e61066f10b554cafe9f6f5673d15fad1425f04ba Mon Sep 17 00:00:00 2001 From: Ewald Snel Date: Fri, 11 Jul 2025 11:20:59 +0200 Subject: [PATCH 44/57] Added CPU support for argon2 (34000 plugin). - Used blake2b_transform() instead of blake2b_update() to avoid compiler problems on Intel OpenCL and segfaults on POCL (still unsure of exact cause but possibly related to the shuffle functions in combination with these OpenCL drivers). - Remove 'bug' comments (these are resolved now). - Added implementation of 'argon2_hash_block()' for non-warped (CPU) case. - Introduced 'LBLOCKSIZE' for the size of an argon2 block per thread in u64. Most of the code should now be able to support any warp/wavefront size. --- OpenCL/inc_hash_argon2.cl | 291 ++++++++++++++++++++++++------------ OpenCL/inc_hash_argon2.h | 4 +- src/modules/argon2_common.c | 7 +- 3 files changed, 206 insertions(+), 96 deletions(-) diff --git a/OpenCL/inc_hash_argon2.cl b/OpenCL/inc_hash_argon2.cl index 190d3b2d2..383d07181 100644 --- a/OpenCL/inc_hash_argon2.cl +++ b/OpenCL/inc_hash_argon2.cl @@ -12,24 +12,26 @@ #include "inc_hash_blake2b.h" #include "inc_hash_argon2.h" +#define LBLOCKSIZE (128 / THREADS_PER_LANE) + DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, const u32 blocknum, const u32 parallelism, GLOBAL_AS argon2_block_t *blocks) { blake2b_ctx_t ctx; blake2b_init (&ctx); - u64 blake_buf[16] = { 0 }; + ctx.m[0] = hl32_to_64 (in[ 0], sizeof(argon2_block_t)); + ctx.m[1] = hl32_to_64 (in[ 2], in[ 1]); + ctx.m[2] = hl32_to_64 (in[ 4], in[ 3]); + ctx.m[3] = hl32_to_64 (in[ 6], in[ 5]); + ctx.m[4] = hl32_to_64 (in[ 8], in[ 7]); + ctx.m[5] = hl32_to_64 (in[10], in[ 9]); + ctx.m[6] = hl32_to_64 (in[12], in[11]); + ctx.m[7] = hl32_to_64 (in[14], in[13]); + ctx.m[8] = hl32_to_64 (blocknum, in[15]); + ctx.m[9] = hl32_to_64 (0, lane); - blake_buf[0] = sizeof(argon2_block_t); - - blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 4); - blake2b_update (&ctx, in, 64); - - blake_buf[0] = hl32_to_64 (lane, blocknum); - - blake2b_update (&ctx, (PRIVATE_AS u32 *) blake_buf, 8); - - blake2b_final (&ctx); + blake2b_transform (ctx.h, ctx.m, 76, (u64) BLAKE2B_FINAL); GLOBAL_AS u64 *out = blocks[(blocknum * parallelism) + lane].values; @@ -38,12 +40,23 @@ DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, co out[2] = ctx.h[2]; out[3] = ctx.h[3]; + ctx.m[8] = 0; + ctx.m[9] = 0; + for (u32 off = 4; off < 124; off += 4) { - for (u32 idx = 0; idx < 8; idx++) blake_buf[idx] = ctx.h[idx]; + for (u32 idx = 0; idx < 8; idx++) ctx.m[idx] = ctx.h[idx]; - blake2b_init (&ctx); - blake2b_transform (ctx.h, blake_buf, 64, (u64) BLAKE2B_FINAL); + ctx.h[0] = BLAKE2B_IV_00 ^ 0x01010040; // default output length: 0x40 = 64 bytes + ctx.h[1] = BLAKE2B_IV_01; + ctx.h[2] = BLAKE2B_IV_02; + ctx.h[3] = BLAKE2B_IV_03; + ctx.h[4] = BLAKE2B_IV_04; + ctx.h[5] = BLAKE2B_IV_05; + ctx.h[6] = BLAKE2B_IV_06; + ctx.h[7] = BLAKE2B_IV_07; + + blake2b_transform (ctx.h, ctx.m, 64, (u64) BLAKE2B_FINAL); out[off + 0] = ctx.h[0]; out[off + 1] = ctx.h[1]; @@ -57,39 +70,85 @@ DECLSPEC void argon2_initial_block (PRIVATE_AS const u32 *in, const u32 lane, co out[127] = ctx.h[7]; } +DECLSPEC void blake2b_update_8 (PRIVATE_AS blake2b_ctx_t *ctx, const u32 w0, const u32 w1, const int len) +{ + const int pos = ctx->len & 127; + + if (pos == 0) + { + if (ctx->len > 0) // if new block (pos == 0) AND the (old) len is not zero => transform + { + blake2b_transform (ctx->h, ctx->m, ctx->len, BLAKE2B_UPDATE); + } + } + + const u64 m = hl32_to_64 (w1, w0); + const u32 s = (pos & 7) * 8; + const u64 m0 = (m << s); + const u64 m1 = (m >> 8) >> (56 - s); + + const int idx = pos / 8; + + ctx->m[ 0] |= (idx == 0) ? m0 : 0; + ctx->m[ 1] |= (idx == 1) ? m0 : (idx == 0) ? m1 : 0; + ctx->m[ 2] |= (idx == 2) ? m0 : (idx == 1) ? m1 : 0; + ctx->m[ 3] |= (idx == 3) ? m0 : (idx == 2) ? m1 : 0; + ctx->m[ 4] |= (idx == 4) ? m0 : (idx == 3) ? m1 : 0; + ctx->m[ 5] |= (idx == 5) ? m0 : (idx == 4) ? m1 : 0; + ctx->m[ 6] |= (idx == 6) ? m0 : (idx == 5) ? m1 : 0; + ctx->m[ 7] |= (idx == 7) ? m0 : (idx == 6) ? m1 : 0; + ctx->m[ 8] |= (idx == 8) ? m0 : (idx == 7) ? m1 : 0; + ctx->m[ 9] |= (idx == 9) ? m0 : (idx == 8) ? m1 : 0; + ctx->m[10] |= (idx == 10) ? m0 : (idx == 9) ? m1 : 0; + ctx->m[11] |= (idx == 11) ? m0 : (idx == 10) ? m1 : 0; + ctx->m[12] |= (idx == 12) ? m0 : (idx == 11) ? m1 : 0; + ctx->m[13] |= (idx == 13) ? m0 : (idx == 12) ? m1 : 0; + ctx->m[14] |= (idx == 14) ? m0 : (idx == 13) ? m1 : 0; + ctx->m[15] |= (idx == 15) ? m0 : (idx == 14) ? m1 : 0; + + if ((pos + len) > 128) + { + const u32 cur_len = ((ctx->len + len) / 128) * 128; + + blake2b_transform (ctx->h, ctx->m, cur_len, (u64) BLAKE2B_UPDATE); + + for (u32 i = 1; i < 16; i++) ctx->m[i] = 0; + + ctx->m[0] = m1; + } + + ctx->len += len; +} + DECLSPEC void argon2_initial_hash (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *salt, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS u64 *blockhash) { blake2b_ctx_t ctx; blake2b_init (&ctx); - u32 option_input[32] = { 0 }; + ctx.m[0] = hl32_to_64 (options->digest_len, options->parallelism); + ctx.m[1] = hl32_to_64 (options->iterations, options->memory_usage_in_kib); + ctx.m[2] = hl32_to_64 (options->type, options->version); + ctx.len = 24; - option_input[0] = options->parallelism; - option_input[1] = options->digest_len; - option_input[2] = options->memory_usage_in_kib; - option_input[3] = options->iterations; - option_input[4] = options->version; - option_input[5] = options->type; + const u32 pw_len = pw->pw_len; - blake2b_update (&ctx, option_input, 24); + blake2b_update_8 (&ctx, pw_len, 0, 4); - u32 len_input[32] = { 0 }; + for (u32 i = 0, idx = 0; i < pw_len; i += 8, idx += 2) + { + blake2b_update_8 (&ctx, pw->i[idx + 0], pw->i[idx + 1], MIN((pw_len - i), 8)); + } - len_input[0] = pw->pw_len; + const u32 salt_len = salt->salt_len; - blake2b_update (&ctx, len_input, 4); - blake2b_update_global (&ctx, pw->i, pw->pw_len); + blake2b_update_8 (&ctx, salt_len, 0, 4); - len_input[0] = salt->salt_len; - - blake2b_update (&ctx, len_input, 4); - blake2b_update_global (&ctx, salt->salt_buf, salt->salt_len); - - len_input[0] = 0; - - blake2b_update (&ctx, len_input, 4); // secret (K) - blake2b_update (&ctx, len_input, 4); // associated data (X) + for (u32 i = 0, idx = 0; i < salt_len; i += 8, idx += 2) + { + blake2b_update_8 (&ctx, salt->salt_buf[idx + 0], salt->salt_buf[idx + 1], MIN((salt_len - i), 8)); + } + blake2b_update_8 (&ctx, 0, 0, 8); // secret (K) and associated data (X) blake2b_final (&ctx); for (u32 idx = 0; idx < 8; idx++) blockhash[idx] = ctx.h[idx]; @@ -110,7 +169,6 @@ DECLSPEC void argon2_init (GLOBAL_AS const pw_t *pw, GLOBAL_AS const salt_t *sal } } -// TODO: reconsider 'trunc_mul()' DECLSPEC u64 trunc_mul (u64 x, u64 y) { const u32 xlo = (u32) x; @@ -142,8 +200,6 @@ DECLSPEC inline u32 argon2_ref_address (PRIVATE_AS const argon2_options_t *optio ref_area += (index - 1); } - // if ref_area == 0xFFFFFFFF => bug - const u32 j1 = l32_from_64_S (pseudo_random); ref_index = (ref_area - 1 - hc_umulhi (ref_area, hc_umulhi (j1, j1))); @@ -188,8 +244,36 @@ DECLSPEC int argon2_shift (int idx, int argon2_thread) return (argon2_thread & 0x0e) | (((argon2_thread & 0x11) + delta + 0x0e) & 0x11); } -DECLSPEC void argon2_hash_block (u64 R[4], int argon2_thread, LOCAL_AS u64 *shuffle_buf, int argon2_lsz) +DECLSPEC void argon2_hash_block (u64 R[LBLOCKSIZE], int argon2_thread, LOCAL_AS u64 *shuffle_buf, int argon2_lsz) { +#if THREADS_PER_LANE == 1 + u64 v[16]; + + for (u32 i = 0, offset = 0; i < 8; i++, offset += 16) + { + for (u32 j = 0; j < 16; j++) v[j] = R[offset + j]; + + ARGON2_P(); + + for (u32 j = 0; j < 16; j++) R[offset + j] = v[j]; + } + + for (u32 i = 0, offset = 0; i < 8; i++, offset += 2) + { + for (u32 j = 0, k = offset; j < 16; j += 2, k += 16) { + v[j + 0] = R[k + 0]; + v[j + 1] = R[k + 1]; + } + + ARGON2_P(); + + for (u32 j = 0, k = offset; j < 16; j += 2, k += 16) + { + R[k + 0] = v[j + 0]; + R[k + 1] = v[j + 1]; + } + } +#else for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_thread ^ (idx << 2), argon2_thread, argon2_lsz); transpose_permute_block (R, argon2_thread); @@ -215,49 +299,45 @@ DECLSPEC void argon2_hash_block (u64 R[4], int argon2_thread, LOCAL_AS u64 *shuf ARGON2_G(R[0], R[1], R[2], R[3]); for (u32 idx = 1; idx < 4; idx++) R[idx] = hc__shfl_sync (shuffle_buf, FULL_MASK, R[idx], argon2_shift ((4 - idx), argon2_thread), argon2_thread, argon2_lsz); +#endif } DECLSPEC void argon2_next_addresses (PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, PRIVATE_AS u32 *addresses, u32 start_index, u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz) { - u64 Z[4] = { 0 }; + u64 Z[LBLOCKSIZE] = { 0 }; + u64 tmp[LBLOCKSIZE] = { 0 }; - u64 tmp[4] = { 0 }; - - tmp[0] = 0; - tmp[1] = 0; - tmp[2] = 0; - tmp[3] = 0; - - switch (argon2_thread) + for (u32 i = 0, index = argon2_thread; i < (LBLOCKSIZE / 4); i++, index += THREADS_PER_LANE) { - case 0: Z[0] = pos->pass; break; - case 1: Z[0] = pos->lane; break; - case 2: Z[0] = pos->slice; break; - case 3: Z[0] = options->memory_block_count; break; - case 4: Z[0] = options->iterations; break; - case 5: Z[0] = options->type; break; - case 6: Z[0] = (start_index / 128) + 1; break; - default: Z[0] = 0; break; + switch (index) + { + case 0: Z[i] = pos->pass; break; + case 1: Z[i] = pos->lane; break; + case 2: Z[i] = pos->slice; break; + case 3: Z[i] = options->memory_block_count; break; + case 4: Z[i] = options->iterations; break; + case 5: Z[i] = options->type; break; + case 6: Z[i] = (start_index / 128) + 1; break; + default: Z[i] = 0; break; + } + + tmp[i] = Z[i]; } - tmp[0] = Z[0]; + argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz); + + for (u32 idx = 0; idx < (LBLOCKSIZE / 4); idx++) Z[idx] ^= tmp[idx]; + + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) tmp[idx] = Z[idx]; argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz); - Z[0] ^= tmp[0]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) Z[idx] ^= tmp[idx]; - for (u32 idx = 0; idx < 4; idx++) tmp[idx] = Z[idx]; - - argon2_hash_block (Z, argon2_thread, shuffle_buf, argon2_lsz); - - for (u32 idx = 0; idx < 4; idx++) Z[idx] ^= tmp[idx]; - - for (u32 i = 0, index = (start_index + argon2_thread); i < 4; i++, index += THREADS_PER_LANE) + for (u32 i = 0, index = (start_index + argon2_thread); i < LBLOCKSIZE; i++, index += THREADS_PER_LANE) { addresses[i] = argon2_ref_address (options, pos, index, Z[i]); } - - // if addresses[0] == 0xFFFFFFFE => bug } DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index) @@ -277,20 +357,20 @@ DECLSPEC u32 index_u32x4 (const u32 array[4], u32 index) return (u32) -1; } -DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[4], u32 argon2_thread) +DECLSPEC GLOBAL_AS argon2_block_t *argon2_get_current_block (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, u32 lane, u32 index_in_lane, u64 R[LBLOCKSIZE], u32 argon2_thread) { // Apply wrap-around to previous block index if the current block is the first block in the lane const u32 prev_in_lane = (index_in_lane == 0) ? (options->lane_length - 1) : (index_in_lane - 1); GLOBAL_AS argon2_block_t *prev_block = &blocks[(prev_in_lane * options->parallelism) + lane]; - for (u32 idx = 0; idx < 4; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) R[idx] = prev_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; return &blocks[(index_in_lane * options->parallelism) + lane]; } -DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, bool indep_addr, const u32 addresses[4], - u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[4], u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz) +DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const argon2_options_t *options, PRIVATE_AS const argon2_pos_t *pos, bool indep_addr, const u32 addresses[LBLOCKSIZE], + u32 start_index, u32 end_index, GLOBAL_AS argon2_block_t *cur_block, u64 R[LBLOCKSIZE], u32 argon2_thread, LOCAL_AS u64 *shuffle_buf, u32 argon2_lsz) { for (u32 index = start_index; index < end_index; index++, cur_block += options->parallelism) { @@ -298,34 +378,40 @@ DECLSPEC void argon2_fill_subsegment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_ if (indep_addr) { - ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % ARGON2_SYNC_POINTS); +#if THREADS_PER_LANE == 1 + ref_address = addresses[(index / THREADS_PER_LANE) % LBLOCKSIZE]; +#else + ref_address = index_u32x4 (addresses, (index / THREADS_PER_LANE) % LBLOCKSIZE); ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, index, argon2_thread, argon2_lsz); +#endif } else { ref_address = argon2_ref_address (options, pos, index, R[0]); +#if THREADS_PER_LANE != 1 ref_address = hc__shfl_sync (shuffle_buf, FULL_MASK, ref_address, 0, argon2_thread, argon2_lsz); +#endif } GLOBAL_AS const argon2_block_t *ref_block = &blocks[ref_address]; - u64 tmp[4] = { 0 }; + u64 tmp[LBLOCKSIZE] = { 0 }; // First pass is overwrite, next passes are XOR with previous if ((pos->pass > 0) && (options->version != ARGON2_VERSION_10)) { - for (u32 idx = 0; idx < 4; idx++) tmp[idx] = cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) tmp[idx] = cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; } - for (u32 idx = 0; idx < 4; idx++) R[idx] ^= ref_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) R[idx] ^= ref_block->values[(idx * THREADS_PER_LANE) + argon2_thread]; - for (u32 idx = 0; idx < 4; idx++) tmp[idx] ^= R[idx]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) tmp[idx] ^= R[idx]; argon2_hash_block (R, argon2_thread, shuffle_buf, argon2_lsz); - for (u32 idx = 0; idx < 4; idx++) R[idx] ^= tmp[idx]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) R[idx] ^= tmp[idx]; - for (u32 idx = 0; idx < 4; idx++) cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread] = R[idx]; + for (u32 idx = 0; idx < LBLOCKSIZE; idx++) cur_block->values[(idx * THREADS_PER_LANE) + argon2_thread] = R[idx]; } } @@ -335,7 +421,7 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const u32 skip_blocks = (pos->pass == 0) && (pos->slice == 0) ? 2 : 0; const u32 index_in_lane = (pos->slice * options->segment_length) + skip_blocks; - u64 R[4] = { 0 }; + u64 R[LBLOCKSIZE] = { 0 }; GLOBAL_AS argon2_block_t *cur_block = argon2_get_current_block (blocks, options, pos->lane, index_in_lane, R, argon2_thread); @@ -346,7 +432,7 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const u32 start_index = (block_index == 0) ? skip_blocks : block_index; const u32 end_index = MIN(((start_index | 127) + 1), options->segment_length); - u32 addresses[4] = { 0 }; + u32 addresses[LBLOCKSIZE] = { 0 }; argon2_next_addresses (options, pos, addresses, block_index, argon2_thread, shuffle_buf, argon2_lsz); argon2_fill_subsegment (blocks, options, pos, true, addresses, start_index, end_index, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz); @@ -356,7 +442,7 @@ DECLSPEC void argon2_fill_segment (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS } else { - u32 addresses[4] = { 0 }; + u32 addresses[LBLOCKSIZE] = { 0 }; argon2_fill_subsegment (blocks, options, pos, false, addresses, skip_blocks, options->segment_length, cur_block, R, argon2_thread, shuffle_buf, argon2_lsz); } @@ -367,26 +453,43 @@ DECLSPEC void argon2_final (GLOBAL_AS argon2_block_t *blocks, PRIVATE_AS const a const u32 lane_length = options->lane_length; const u32 lanes = options->parallelism; - argon2_block_t final_block = { }; - - for (u32 l = 0; l < lanes; l++) - { - for (u32 idx = 0; idx < 128; idx++) final_block.values[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx]; - } - - u32 output_len[32] = { 0 }; - output_len[0] = options->digest_len; - blake2b_ctx_t ctx; blake2b_init (&ctx); // Override default (0x40) value in BLAKE2b ctx.h[0] ^= 0x40 ^ options->digest_len; - blake2b_update (&ctx, output_len, 4); - blake2b_update (&ctx, (PRIVATE_AS u32 *) final_block.values, sizeof(final_block)); + u32 rem = options->digest_len; - blake2b_final (&ctx); + for (u32 offset = 0; offset < 128; offset += 16) + { + for (u32 l = 0; l < lanes; l++) + { + for (u32 idx = 0; idx < 16; idx++) + { + ctx.m[idx] ^= blocks[((lane_length - 1) * lanes) + l].values[idx + offset]; + } + } + + for (u32 idx = 0; idx < 16; idx++) + { + const u64 value = ctx.m[idx]; + + ctx.m[idx] = hl32_to_64 (l32_from_64_S (value), rem); + + rem = h32_from_64_S (value); + } + + ctx.len += 128; + + blake2b_transform (ctx.h, ctx.m, ctx.len, (u64) BLAKE2B_UPDATE); + + for (u32 idx = 0; idx < 16; idx++) ctx.m[idx] = 0; + } + + ctx.m[0] = hl32_to_64 (0, rem); + + blake2b_transform (ctx.h, ctx.m, 1028, (u64) BLAKE2B_FINAL); for (uint i = 0, idx = 0; i < (options->digest_len / 4); i += 2, idx += 1) { diff --git a/OpenCL/inc_hash_argon2.h b/OpenCL/inc_hash_argon2.h index a623f492f..eb6a5e544 100644 --- a/OpenCL/inc_hash_argon2.h +++ b/OpenCL/inc_hash_argon2.h @@ -1,4 +1,3 @@ - /** * Author......: Netherlands Forensic Institute * License.....: MIT @@ -12,7 +11,10 @@ #define ARGON2_VERSION_10 0x10 #define ARGON2_VERSION_13 0x13 +#ifndef THREADS_PER_LANE #define THREADS_PER_LANE 32 +#endif + #define FULL_MASK 0xffffffff #define BLAKE2B_OUTBYTES 64 diff --git a/src/modules/argon2_common.c b/src/modules/argon2_common.c index 52961a2e3..6885bc177 100644 --- a/src/modules/argon2_common.c +++ b/src/modules/argon2_common.c @@ -143,7 +143,12 @@ char *argon2_module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconf char *jit_build_options = NULL; - //hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u -D ARGON2_TMP_ELEM=%u", options[0].parallelism, options[0].memory_block_count); + //hc_asprintf (&jit_build_options, "-D ARGON2_PARALLELISM=%u", options[0].parallelism); + + if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) + { + hc_asprintf (&jit_build_options, "-D THREADS_PER_LANE=1"); + } return jit_build_options; } From fb9ea5d642f6eebe88229cec872a2b70ba0703e8 Mon Sep 17 00:00:00 2001 From: Chick3nman Date: Fri, 11 Jul 2025 05:52:38 -0500 Subject: [PATCH 45/57] Update error when using --skip/--limit in an invalid attack configuration --- src/hashcat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hashcat.c b/src/hashcat.c index ab4d3139e..1d4f8a054 100644 --- a/src/hashcat.c +++ b/src/hashcat.c @@ -795,7 +795,7 @@ static int outer_loop (hashcat_ctx_t *hashcat_ctx) { if ((mask_ctx->masks_cnt > 1) || (straight_ctx->dicts_cnt > 1)) { - event_log_error (hashcat_ctx, "Use of --skip/--limit is not supported with --increment, mask files, or --stdout."); + event_log_error (hashcat_ctx, "Use of --skip/--limit is not supported with --increment, mask files, multiple dictionaries, or --stdout."); return -1; } From c2c8561702229f0b64677ff53c32192c973a2e2f Mon Sep 17 00:00:00 2001 From: Chick3nman Date: Fri, 11 Jul 2025 06:06:41 -0500 Subject: [PATCH 46/57] Enhancement: Set benchmark to true if --benchmark-all is passed --- src/user_options.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/user_options.c b/src/user_options.c index 403e8074a..0a36bc810 100644 --- a/src/user_options.c +++ b/src/user_options.c @@ -1298,6 +1298,11 @@ int user_options_sanity (hashcat_ctx_t *hashcat_ctx) } } + if (user_options->benchmark_all == true) + { + user_options->benchmark = true; + } + if (user_options->benchmark == true) { // sanity checks based on automatically overwritten configuration variables by From d7fb2ffa064f82213d145a622aeb72bb380be02e Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 11 Jul 2025 15:02:58 +0200 Subject: [PATCH 47/57] Fixed both a false positive and a false negative in -m 21800. Previously, only the first hash in a multihash list was marked as cracked, regardless of which hash was actually cracked. For example, if the second hash was cracked, it incorrectly marked the first as cracked and left the second uncracked. This issue only affected beta versions and only in multihash cracking mode. Added deep-comp kernel support for Kerberos modes 28800 and 28900, enabling multihash cracking for the same user in the same domain, even if the password was changed or the recording was bad. Added a rule ensuring that device buffer sizes for password candidates, hooks, and transport (tmps) must be smaller than 1/4 of the maximum allocatable memory. If not, hashcat now automatically reduces kernel-accel down to 1, then halves the number of threads and restores kernel-accel up to its maximum, repeating until the size requirement is met. Fixed salt length limit verification for -m 20712. Fixed password length limit for -m 14400. Fixed unit test salt generator for -m 21100, which could produce duplicate hashes under certain conditions. Added the OPTS_TYPE_NATIVE_THREADS flag to the following hash modes (after benchmarking): 7700, 7701, 9000, 1375x, 1376x, 14800, 19500, 23900. --- OpenCL/m21800-pure.cl | 12 ++++++------ src/backend.c | 7 ++++--- src/interface.c | 6 ++++++ src/modules/module_07700.c | 1 + src/modules/module_07701.c | 1 + src/modules/module_09000.c | 1 + src/modules/module_13751.c | 1 + src/modules/module_13752.c | 1 + src/modules/module_13753.c | 1 + src/modules/module_13761.c | 1 + src/modules/module_13762.c | 1 + src/modules/module_13763.c | 1 + src/modules/module_14800.c | 1 + src/modules/module_19500.c | 1 + src/modules/module_20712.c | 4 ++-- src/modules/module_23900.c | 1 + src/modules/module_28800.c | 8 +++++++- src/modules/module_28900.c | 8 +++++++- tools/test_modules/m14400.pm | 2 +- tools/test_modules/m21100.pm | 2 +- 20 files changed, 46 insertions(+), 15 deletions(-) diff --git a/OpenCL/m21800-pure.cl b/OpenCL/m21800-pure.cl index a9b2bf635..9c2508d31 100644 --- a/OpenCL/m21800-pure.cl +++ b/OpenCL/m21800-pure.cl @@ -655,12 +655,12 @@ KERNEL_FQ KERNEL_FA void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, elec if ((entropy >= MIN_ENTROPY) && (entropy <= MAX_ENTROPY)) { - if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, digest_cur, gid, 0, 0, 0); } - return; + //return; } } } @@ -676,11 +676,11 @@ KERNEL_FQ KERNEL_FA void m21800_comp (KERN_ATTR_TMPS_ESALT (electrum_tmp_t, elec ((tmp[0] == 0x7b) && (tmp[1] == 0x0d) && (tmp[2] == 0x0a) && (tmp[3] == 0x20) && (tmp[4] == 0x20) && (tmp[5] == 0x20) && (tmp[6] == 0x20) && (tmp[7] == 0x22))) { - if (hc_atomic_inc (&hashes_shown[DIGESTS_OFFSET_HOST]) == 0) + if (hc_atomic_inc (&hashes_shown[digest_cur]) == 0) { - mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, DIGESTS_OFFSET_HOST + 0, gid, 0, 0, 0); + mark_hash (plains_buf, d_return_buf, SALT_POS_HOST, DIGESTS_CNT, 0, digest_cur, gid, 0, 0, 0); } - return; + //return; } } diff --git a/src/backend.c b/src/backend.c index 317be0168..279d2f407 100644 --- a/src/backend.c +++ b/src/backend.c @@ -16329,10 +16329,11 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx) // let's add some extra space just to be sure. // now depends on the kernel-accel value (where scrypt and similar benefits), but also hard minimum 64mb and maximum 1024mb limit // let's see if we still need this now that we have low-level API to report free memory + // we don't want these get too big. if a plugin requires really a lot of memory, the extra buffer should be used instead. - if (size_pws > device_param->device_maxmem_alloc) memory_limit_hit = 1; - if (size_tmps > device_param->device_maxmem_alloc) memory_limit_hit = 1; - if (size_hooks > device_param->device_maxmem_alloc) memory_limit_hit = 1; + if (size_pws > device_param->device_maxmem_alloc / 4) memory_limit_hit = 1; + if (size_tmps > device_param->device_maxmem_alloc / 4) memory_limit_hit = 1; + if (size_hooks > device_param->device_maxmem_alloc / 4) memory_limit_hit = 1; // work around, for some reason apple opencl can't have buffers larger 2^31 // typically runs into trap 6 diff --git a/src/interface.c b/src/interface.c index 129c054f8..2ad355174 100644 --- a/src/interface.c +++ b/src/interface.c @@ -273,6 +273,12 @@ int hashconfig_init (hashcat_ctx_t *hashcat_ctx) CHECK_MANDATORY (module_ctx->module_hash_encode); } + // check deep comp kernel requirements + if (hashconfig->opts_type & OPTS_TYPE_DEEP_COMP_KERNEL) + { + CHECK_MANDATORY (module_ctx->module_deep_comp_kernel); + } + #undef CHECK_MANDATORY if (user_options->keyboard_layout_mapping) diff --git a/src/modules/module_07700.c b/src/modules/module_07700.c index 1d783d351..f9afd52e3 100644 --- a/src/modules/module_07700.c +++ b/src/modules/module_07700.c @@ -25,6 +25,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_PT_UPPER | OPTS_TYPE_ST_UPPER; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; diff --git a/src/modules/module_07701.c b/src/modules/module_07701.c index 232d7ca4d..46530492c 100644 --- a/src/modules/module_07701.c +++ b/src/modules/module_07701.c @@ -25,6 +25,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_PT_UPPER | OPTS_TYPE_ST_UPPER; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c index 5e0f66941..7b9680213 100644 --- a/src/modules/module_09000.c +++ b/src/modules/module_09000.c @@ -22,6 +22,7 @@ static const u64 KERN_TYPE = 9000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_AUTODETECT_DISABLE | OPTS_TYPE_DYNAMIC_SHARED; diff --git a/src/modules/module_13751.c b/src/modules/module_13751.c index d2d0a50c2..2fbafff6d 100644 --- a/src/modules/module_13751.c +++ b/src/modules/module_13751.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_13752.c b/src/modules/module_13752.c index bf51b8d7b..5c16f3736 100644 --- a/src/modules/module_13752.c +++ b/src/modules/module_13752.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_13753.c b/src/modules/module_13753.c index fadb4ffed..282c78486 100644 --- a/src/modules/module_13753.c +++ b/src/modules/module_13753.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_13761.c b/src/modules/module_13761.c index 5b1b82b27..db4e7e957 100644 --- a/src/modules/module_13761.c +++ b/src/modules/module_13761.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_13762.c b/src/modules/module_13762.c index 6f1a27929..358b83cce 100644 --- a/src/modules/module_13762.c +++ b/src/modules/module_13762.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_13763.c b/src/modules/module_13763.c index fb50002a2..9ebd0f7f5 100644 --- a/src/modules/module_13763.c +++ b/src/modules/module_13763.c @@ -27,6 +27,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_REGISTER_LIMIT; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_BINARY_HASHFILE | OPTS_TYPE_LOOP_EXTENDED | OPTS_TYPE_MP_MULTI_DISABLE diff --git a/src/modules/module_14800.c b/src/modules/module_14800.c index 1e4b91a15..c6a6a9afd 100644 --- a/src/modules/module_14800.c +++ b/src/modules/module_14800.c @@ -26,6 +26,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_SLOW_HASH_SIMD_LOOP2; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_ST_HEX | OPTS_TYPE_MP_MULTI_DISABLE | OPTS_TYPE_INIT2 diff --git a/src/modules/module_19500.c b/src/modules/module_19500.c index f7f139bfb..69d5fac2a 100644 --- a/src/modules/module_19500.c +++ b/src/modules/module_19500.c @@ -23,6 +23,7 @@ static const u64 KERN_TYPE = 19500; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_RAW_HASH; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_PT_GENERATE_BE; static const u32 SALT_TYPE = SALT_TYPE_GENERIC; static const char *ST_PASS = "hashcat"; diff --git a/src/modules/module_20712.c b/src/modules/module_20712.c index 987d688c4..94fc9a611 100644 --- a/src/modules/module_20712.c +++ b/src/modules/module_20712.c @@ -64,8 +64,8 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE token.attr[0] = TOKEN_ATTR_FIXED_LENGTH | TOKEN_ATTR_VERIFY_HEX; - token.len_min[1] = SALT_MIN; - token.len_max[1] = SALT_MAX; + token.len_min[1] = ((SALT_MIN * 8) / 6) + 0; + token.len_max[1] = ((SALT_MAX * 8) / 6) + 3; token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH | TOKEN_ATTR_VERIFY_BASE64A; diff --git a/src/modules/module_23900.c b/src/modules/module_23900.c index fcee86004..48dbfcc39 100644 --- a/src/modules/module_23900.c +++ b/src/modules/module_23900.c @@ -22,6 +22,7 @@ static const u64 KERN_TYPE = 23900; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS | OPTS_TYPE_ST_HEX; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; diff --git a/src/modules/module_28800.c b/src/modules/module_28800.c index 519299654..772309913 100644 --- a/src/modules/module_28800.c +++ b/src/modules/module_28800.c @@ -23,6 +23,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_DEEP_COMP_KERNEL | OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; @@ -63,6 +64,11 @@ typedef struct krb5db_17_tmp static const char *SIGNATURE_KRB5DB = "$krb5db$17$"; +u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) +{ + return KERN_RUN_3; +} + u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 tmp_size = (const u64) sizeof (krb5db_17_tmp_t); @@ -254,7 +260,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_bridge_name = MODULE_DEFAULT; module_ctx->module_bridge_type = MODULE_DEFAULT; module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; module_ctx->module_deprecated_notice = MODULE_DEFAULT; module_ctx->module_dgst_pos0 = module_dgst_pos0; module_ctx->module_dgst_pos1 = module_dgst_pos1; diff --git a/src/modules/module_28900.c b/src/modules/module_28900.c index 6f20f3c59..93c3374c4 100644 --- a/src/modules/module_28900.c +++ b/src/modules/module_28900.c @@ -23,6 +23,7 @@ static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE | OPTI_TYPE_NOT_ITERATED | OPTI_TYPE_SLOW_HASH_SIMD_LOOP; static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_DEEP_COMP_KERNEL | OPTS_TYPE_PT_GENERATE_LE; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; @@ -63,6 +64,11 @@ typedef struct krb5db_18_tmp static const char *SIGNATURE_KRB5DB = "$krb5db$18$"; +u32 module_deep_comp_kernel (MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) +{ + return KERN_RUN_3; +} + u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 tmp_size = (const u64) sizeof (krb5db_18_tmp_t); @@ -263,7 +269,7 @@ void module_init (module_ctx_t *module_ctx) module_ctx->module_bridge_name = MODULE_DEFAULT; module_ctx->module_bridge_type = MODULE_DEFAULT; module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; module_ctx->module_deprecated_notice = MODULE_DEFAULT; module_ctx->module_dgst_pos0 = module_dgst_pos0; module_ctx->module_dgst_pos1 = module_dgst_pos1; diff --git a/tools/test_modules/m14400.pm b/tools/test_modules/m14400.pm index 095d62e7f..664530bc3 100644 --- a/tools/test_modules/m14400.pm +++ b/tools/test_modules/m14400.pm @@ -10,7 +10,7 @@ use warnings; use Digest::SHA qw (sha1_hex); -sub module_constraints { [[0, 235], [20, 20], [0, 35], [20, 20], [0, 55]] } +sub module_constraints { [[0, 235], [20, 20], [0, 24], [20, 20], [0, 55]] } sub module_generate_hash { diff --git a/tools/test_modules/m21100.pm b/tools/test_modules/m21100.pm index 3bbd15b93..23e01cf7c 100644 --- a/tools/test_modules/m21100.pm +++ b/tools/test_modules/m21100.pm @@ -16,7 +16,7 @@ sub module_constraints { [[0, 256], [0, 256], [0, 55], [0, 55], [0, 55]] } sub module_generate_hash { my $word = shift; - my $salt = random_hex_string (1, 256); + my $salt = shift; my $digest = sha1_hex (md5_hex ($word . $salt)); From 063618f699393c87e1ef4bdb511a1147ead21125 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Fri, 11 Jul 2025 20:41:12 +0200 Subject: [PATCH 48/57] fix edge testing with password type HEX --- tools/test_edge.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/test_edge.sh b/tools/test_edge.sh index a6960e00e..8a92ad41f 100755 --- a/tools/test_edge.sh +++ b/tools/test_edge.sh @@ -315,6 +315,10 @@ for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | CUR_OPTS_V="${CUR_OPTS} --backend-vector-width ${vector_width}" + if [ $pt_hex -eq 1 ]; then + CUR_OPTS_V="${CUR_OPTS_V} --hex-charset" + fi + # single hash if [ $TARGET_TYPE == all ] || [ $TARGET_TYPE == 0 ]; then From 5f66927222f4edb2566c2be32e043da088289e81 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Fri, 11 Jul 2025 21:12:43 +0200 Subject: [PATCH 49/57] Fixed build failed for aarch64 (es: rpi) --- docs/changes.txt | 1 + src/Makefile | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 1bee8f03e..cd9d7db03 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -106,6 +106,7 @@ - Fixed build failed for 18400 with Apple Metal - Fixed build failed for 18600 with Apple Metal - Fixed build failed for 4410 with vector width > 1 +- Fixed build failed for aarch64 (es: rpi) - Fixed clang identification in src/Makefile - Fixed build failure for almost all hash modes that make use of hc_swap64 and/or hc_swap64_S with Apple Metal / Apple Silicon - Fixed debug mode 5 by adding the missing colon between original-word and finding-rule diff --git a/src/Makefile b/src/Makefile index 575c23589..deefec4dd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -88,6 +88,9 @@ DARWIN_VERSION := $(shell uname -r | cut -d. -f1) IS_APPLE_SILICON := $(shell [ "$$(sysctl -in hw.optional.arm64 2>/dev/null)" = "1" ] && echo 1 || echo 0) endif +IS_AARCH64 := $(shell [ "$$(arch 2>/dev/null)" = "aarch64" ] && echo 1 || echo 0) +IS_ARM := $(or $(filter 1,$(IS_APPLE_SILICON)),$(filter 1,$(IS_AARCH64))) + ifneq (,$(filter $(UNAME),FreeBSD NetBSD)) CC := cc CXX := c++ @@ -380,8 +383,6 @@ LFLAGS_NATIVE += -lpthread LFLAGS_NATIVE += -liconv ifeq ($(IS_APPLE_SILICON),1) -CFLAGS_NATIVE += -DSSE2NEON_SUPPRESS_WARNINGS -CFLAGS_NATIVE += -I$(DEPS_SSE2NEON) CFLAGS_NATIVE += -arch arm64 CFLAGS_NATIVE += -arch x86_64 ifeq ($(SHARED),1) @@ -392,6 +393,11 @@ endif endif # Darwin +ifeq ($(IS_ARM),1) +CFLAGS_NATIVE += -DSSE2NEON_SUPPRESS_WARNINGS +CFLAGS_NATIVE += -I$(DEPS_SSE2NEON) +endif + ifeq ($(UNAME),CYGWIN) CFLAGS_NATIVE := $(CFLAGS) CFLAGS_NATIVE += -DWITH_HWMON @@ -838,12 +844,12 @@ CFLAGS_LZMA_WIN += -Wno-misleading-indentation CFLAGS_UNRAR_WIN += -Wno-misleading-indentation CFLAGS_UNRAR_WIN += -Wno-class-memaccess +endif -ifeq ($(IS_APPLE_SILICON),1) +ifeq ($(IS_ARM),1) CFLAGS_CROSS_LINUX += -DSSE2NEON_SUPPRESS_WARNINGS CFLAGS_CROSS_LINUX += -I$(DEPS_SSE2NEON) endif -endif ## ## Targets From 894ded7e146251762d758fb096abed872e901b86 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 00:31:19 +0200 Subject: [PATCH 50/57] Modified 'edge' arguments Improved argument handling Added --backend-devices-keepfree option Supported multiple Attack Type filter (ex: -a 0,3) Supported multiple Device ID filter (ex: -d 1,2) Supported multiple Device-Type ID filter (ex: -D 1,2) Supported multiple verbose levels --- tools/test_edge.sh | 476 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 365 insertions(+), 111 deletions(-) diff --git a/tools/test_edge.sh b/tools/test_edge.sh index 8a92ad41f..28850a04f 100755 --- a/tools/test_edge.sh +++ b/tools/test_edge.sh @@ -5,8 +5,12 @@ ## License.....: MIT ## +VERSION="1.1" + function usage() { + echo "> Edge Testing Suite, version ${VERSION}" + echo "" echo "> Usage: $0 []" echo "" echo ":" @@ -15,24 +19,32 @@ function usage() echo " --hash-type-min : set min hash-type (default: 0)" echo " --hash-type-max : set max hash-type (default: 99999)" echo "" - echo "-a / --attack-type : set Attack Type (default: all. supported: 0 (Straight), 1 (Combination), 3 (Brute-force), 6 (Hybrid Wordlist + Mask), 7 (Hybrid Mask + Wordlist))" - echo "-K / --kernel-type : set Kernel-Type (default: all. supported: 0 (Pure), 1 (Optimized))" + echo "-a / --attack-type : set Attack Type or a list of comma-separated Attack Types" + echo " (default: all. supported: 0 (Straight), 1 (Combination), 3 (Brute-force), 6 (Hybrid Wordlist + Mask), 7 (Hybrid Mask + Wordlist))" + echo "-K / --kernel-type : set Kernel Type (default: all. supported: 0 (Pure), 1 (Optimized))" + echo "" echo "-t / --target-type : set Target Type (default: all. supported: single, multi)" echo "" echo "-V / --vector-width : set Vector Width (default: all. supported: 1, 2, 4, 8, 16)" echo " --vector-width-min : set min vector-width (default: 1)" echo " --vector-width-max : set max vector-width (default: 16)" echo "" - echo "-d : set Device ID" - echo "-D : set Device-Type ID" + echo "-d : set Device ID or a list of comma-separated Device IDs (default: not set)" + echo "" + echo "-D : set Device-Type ID or a list of comma-separated Device-Type IDs (default: not set)" echo "" echo "-r : set max runtime, in seconds, for each kernel execution (default: 270)" + echo "" echo " --metal-compiler-runtime : set max runtime, in seconds, for each kernel build using Apple Metal (default: 120)" echo "" echo " --metal-backend : exclude all hash types that do not work with Metal, exclude vector-width > 4, set --metal-compiler-runtime argument" echo "" + echo " --backend-devices-keepfree : Keep specified percentage of device memory free (default: disabled. supported: from 1 to 100)" + echo "" echo "-f / --force : run hashcat using --force" - echo "-v / --verbose : show debug messages" + echo "" + echo "-v / --verbose : show debug messages (supported: -v or -vv)" + echo "" echo "-h / --help : show this help, then exit" echo "" @@ -64,12 +76,14 @@ VECTOR_WIDTH=all VECTOR_WIDTHS="1 2 4 8 16" VECTOR_WIDTH_MIN=1 VECTOR_WIDTH_MAX=16 +DEVICE_TYPE="" FORCE=0 VERBOSE=0 RUNTIME_MAX=270 # 4.5 min METAL_BACKEND=0 METAL_COMPILER_RUNTIME=120 +BACKEND_DEVICES_KEEPFREE=0 OPTS="--quiet --potfile-disable --hwmon-disable --self-test-disable --machine-readable --logfile-disable" @@ -81,121 +95,354 @@ SKIP_OUT_MATCH_HASH_TYPES="14000 14100 18100 22000" SKIP_SAME_SALT_HASH_TYPES="6600 7100 7200 8200 13200 13400 15300 15310 15900 15910 16900 18300 18900 20200 20300 20400 27000 27100 29700 29930 29940" #SKIP_SAME_SALT_HASH_TYPES="400 3200 5800 6400 6500 6600 6700 7100 7200 7401 7900 8200 9100 9200 9400 10500 10901 12001 12200 12300 12400 12500 12700 12800 12900 13000 13200 13400 13600 14700 14800 15100 15200 15300 15310 15400 15600 15900 15910 16200 16300 16700 16900 18300 18400 18800 18900 19000 19100 19600 19700 19800 19900 20011 20012 20013 20200 20300 20400 21501 22100 22400 22600 23100 23300 23500 23600 23700 23900 24100 24200 24410 24420 24500 25300 25400 25500 25600 25800 26100 26500 26600 27000 27100 27400 27500 27600 28100 28400 28600 28800 28900 29600 29700 29910 29920 29930 29940 30600 31200 31900" -while [ $# -gt 0 ]; do - case $1 in - --metal-backend) METAL_BACKEND=1 ;; - --metal-compiler-runtime) METAL_COMPILER_RUNTIME=${2}; shift ;; - -r) RUNTIME_MAX=${2}; shift ;; - -h|--help) usage; break ;; - -v|--verbose) VERBOSE=1 ;; - -f|--force) FORCE=1 ;; - -V|--vector-width) - if [ "${2}" != "all" ]; then - if [[ ${2} =~ ^-?[0-9]+$ ]]; then - if [ "${2}" == "1" ]; then - VECTOR_WIDTH=1 - elif [ "${2}" == "2" ]; then - VECTOR_WIDTH=2 - elif [ "${2}" == "4" ]; then - VECTOR_WIDTH=4 - elif [ "${2}" == "8" ]; then - VECTOR_WIDTH=8 - elif [ "${2}" == "16" ]; then - VECTOR_WIDTH=16 - else - usage - fi - else - usage - fi +# Parse long options manually +#while [[ "$1" == --* ]]; do +while [[ $# -gt 0 ]]; do + case "$1" in + --backend-devices-keepfree) + BACKEND_DEVICES_KEEPFREE=$2 + shift 2 + + # Validate: must be numeric and > 0 + if ! [[ "$BACKEND_DEVICES_KEEPFREE" =~ ^[0-9]+$ ]]; then + echo "Error: --backend-devices-keepfree must be a positive integer." + usage + elif (( BACKEND_DEVICES_KEEPFREE < 1 || BACKEND_DEVICES_KEEPFREE > 100 )); then + echo "Error: --backend-devices-keepfree must be between 1 and 100." + usage fi + ;; + --metal-backend) + METAL_BACKEND=1 shift ;; - --vector-width-min) VECTOR_WIDTH_MIN=${2}; shift ;; - --vector-width-max) VECTOR_WIDTH_MAX=${2}; shift ;; - -t|--target-type) - if [ "${2}" != "all" ]; then - if [ "${2}" == "single" ]; then - TARGET_TYPE=0 - elif [ "${2}" == "multi" ]; then - TARGET_TYPE=1 - else - usage - fi - fi - shift - ;; - -m|--hash-type) - if [ "${2}" != "all" ]; then - if [[ ${2} =~ ^-?[0-9]+$ ]]; then - HASH_TYPE=${2} - else - usage - fi - fi - shift - ;; - --hash-type-min) HASH_TYPE_MIN=${2}; shift ;; - --hash-type-max) HASH_TYPE_MAX=${2}; shift ;; - -a|--attack-type) - if [ "${2}" != "all" ]; then - if [[ ${2} =~ ^-?[0-9]+$ ]]; then - if [ "${2}" == "0" ]; then - ATTACK_TYPE=0 - elif [ "${2}" == "1" ]; then - ATTACK_TYPE=1 - elif [ "${2}" == "3" ]; then - ATTACK_TYPE=3 - elif [ "${2}" == "6" ]; then - ATTACK_TYPE=6 - elif [ "${OPTARG}" == "7" ]; then - ATTACK_TYPE=7 - else - usage - fi - else - usage - fi - fi - shift - ;; - -K|--kernel-type) - if [ "${2}" != "all" ]; then - if [[ ${2} =~ ^-?[0-9]+$ ]]; then - if [ "${2}" == "0" ]; then - KERNEL_TYPE=0 #pure - elif [ "${2}" == "1" ]; then - KERNEL_TYPE=1 #optimized - else - usage - fi - else - usage - fi - fi - shift - ;; - -d) OPTS="${OPTS} -d ${2}"; shift ;; - -D) - if [ "${2}" == "1" ]; then - OPTS="${OPTS} -D 1" - DEVICE_TYPE="Cpu" - elif [ "${2}" == "2" ]; then - OPTS="${OPTS} -D 2" - DEVICE_TYPE="Gpu" + --metal-compiler-runtime) + if [[ "$2" =~ ^-?[0-9]+$ ]]; then + METAL_COMPILER_RUNTIME=$2 else - OPTS="${OPTS} -D ${2}" - DEVICE_TYPE="Cpu + Gpu" + echo "Error: --metal-compiler-runtime requires a valid argument (integer)" + usage fi + shift 2 + ;; + --vector-width-min) + if [[ "$2" =~ ^(1|2|4|8|16)$ ]]; then + VECTOR_WIDTH_MIN=$2 + else + echo "Error: --vector-width-min requires a valid argument" + usage + fi + shift 2 + ;; + --vector-width-max) + if [[ "$2" =~ ^(1|2|4|8|16)$ ]]; then + VECTOR_WIDTH_MAX=$2 + else + echo "Error: --vector-width-max requires a valid argument" + usage + fi + shift 2 + ;; + --hash-type-min) + if [[ "$2" =~ ^[0-9]+$ ]] && (( $2 >= 0 && $2 <= 99999 )); then + HASH_TYPE_MIN=$2 + else + echo "Error: --hash-type-min requires a valid argument (integer between 0 and 99999)" + usage + fi + shift 2 + ;; + --hash-type-max) + if [[ "$2" =~ ^[0-9]+$ ]] && (( $2 >= 0 && $2 <= 99999 )); then + HASH_TYPE_MAX=$2 + else + echo "Error: --hash-type-max requires a valid argument (integer between 0 and 99999)" + usage + fi + shift 2 + ;; + --help) + usage + ;; + -?*) + optstring="${1:1}" # strip leading '-' + # Parse each char in the cluster + for (( i=0; i<${#optstring}; i++ )); do + opt="${optstring:i:1}" + case "$opt" in + r) + if [[ "$2" =~ ^-?[0-9]+$ ]]; then + RUNTIME_MAX="$2" + else + echo "Error: -r requires a valid argument (integer)" + usage + fi + ;; + v) + (( VERBOSE++ )) + if [ ${VERBOSE} -gt 2 ]; then + echo "Error: too many -v specified (max: 2)" + usage + fi + ;; + f) + FORCE=1 + ;; + h) + usage + ;; + d) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -d requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -d requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ ! "$optarg" =~ ^[0-9,]+$ ]]; then + echo "Error: -d argument must be comma-separated numbers" + usage + fi + + OPTS="${OPTS} -d ${optarg}" + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + D) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -D requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -D requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ ! "$optarg" =~ ^[0-9,]+$ ]]; then + echo "Error: -D argument must be comma-separated numbers" + usage + fi + + case "$optarg" in + 1) OPTS="${OPTS} -D 1"; DEVICE_TYPE="Cpu" ;; + 2) OPTS="${OPTS} -D 2"; DEVICE_TYPE="Gpu" ;; + *) OPTS="${OPTS} -D $optarg"; DEVICE_TYPE="Cpu + Gpu" ;; + esac + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + V) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -V requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -V requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ "$optarg" == "all" ]]; then + : + elif [[ "$optarg" =~ ^(1|2|4|8|16)$ ]]; then + VECTOR_WIDTH="$optarg" + else + echo "Invalid vector width: $optarg" + usage + fi + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + t) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -t requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -t requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ "$optarg" == "single" ]]; then + TARGET_TYPE=0 + elif [[ "$optarg" == "multi" ]]; then + TARGET_TYPE=1 + elif [[ "$optarg" == "all" ]]; then + : + else + echo "Invalid target type: $optarg" + usage + fi + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + m) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -m requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -m requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ "$optarg" == "all" ]]; then + : + elif [[ "$optarg" =~ ^[0-9]+$ ]]; then + HASH_TYPE="$optarg" + else + echo "Invalid hash type: $optarg" + usage + fi + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + a) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -a requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -a requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ "$optarg" == "all" ]]; then + : + else + ATTACK_TYPES="" + + IFS=',' read -ra INPUT_ATTACK_TYPES <<< "$optarg" + for atk in "${INPUT_ATTACK_TYPES[@]}"; do + if [[ "$atk" =~ ^(0|1|3|6|7)$ ]]; then + ATTACK_TYPES+=" $atk" + else + echo "Invalid attack type: $atk" + usage + fi + done + + ATTACK_TYPES="$(echo "$ATTACK_TYPES" | xargs)" # Trim leading/trailing spaces + fi + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + K) + if (( i + 1 < ${#optstring} )); then + optarg="${optstring:$((i+1))}" + shift_inline=1 + elif [[ -n "$2" && "$2" != -* ]]; then + optarg="$2" + shift_inline=0 + else + echo "Error: -K requires an argument" + usage + fi + + if [[ "$optarg" == -* ]]; then + echo "Error: -K requires a valid argument, not another option (-$optarg)" + usage + fi + + if [[ "$optarg" == "all" ]]; then + : + elif [[ "$optarg" =~ ^(0|1)$ ]]; then + KERNEL_TYPE="$optarg" + else + echo "Invalid kernel type: $optarg" + usage + fi + + [[ "$shift_inline" -eq 0 ]] && shift + + break + ;; + *) + echo "Unknown option: -$opt" + usage + ;; + esac + done + shift + ;; + --*) + echo "Unknown long option: $1" + usage + ;; + *) + echo "empty $1" shift ;; - *) echo "Unknown parameter passed: $1"; usage; break ;; esac - shift done OPTS="${OPTS} --runtime ${RUNTIME_MAX}" +if [[ "$HASH_TYPE" != "all" && ( "$HASH_TYPE_MIN" -ne 0 || "$HASH_TYPE_MAX" -ne 99999 ) ]]; then + echo "Error: cannot set --hash-type and --hash-type-min/--hash-type-max" + usage +fi + +if [[ "$VECTOR_WIDTH" != "all" && ( "$VECTOR_WIDTH_MIN" -ne 1 || "$VECTOR_WIDTH_MAX" -ne 16 ) ]]; then + echo "Error: cannot set --vector-width and --vector-width-min/--vector-width-max" + usage +fi + if [ ${FORCE} -eq 1 ]; then OPTS="${OPTS} --force" fi @@ -212,6 +459,14 @@ if [ $METAL_BACKEND -eq 1 ]; then fi fi +if [ $BACKEND_DEVICES_KEEPFREE -gt 0 ]; then + OPTS="${OPTS} --backend-devices-keepfree ${BACKEND_DEVICES_KEEPFREE}" +fi + +if [ ${VERBOSE} -ge 1 ]; then + echo "Global hashcat options selected: ${OPTS}" +fi + mkdir -p ${OUTD} &> /dev/null for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | awk '{print $1+=0}'); do @@ -292,7 +547,7 @@ for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | ./tools/test.pl edge ${hash_type} ${attack_type} ${optimized} 2>/dev/null > ${edge_out} - if [ ${VERBOSE} -eq 1 ]; then + if [ ${VERBOSE} -ge 2 ]; then cat ${edge_out} fi @@ -351,7 +606,7 @@ for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | word=$(eval $x) - if [ ${VERBOSE} -eq 1 ]; then + if [ ${VERBOSE} -ge 1 ]; then echo "[ ${OUTD} ] > Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Salt '${salt}', Hash ${hash}" | tee -a ${OUTD}/test_edge.details.log else echo "[ ${OUTD} ] > Hash-Type ${hash_type}, Attack-Type ${attack_type}, Kernel-Type ${kernel_type}, Test ID ${i}, Word len ${word_len}, Salt len ${salt_len}, Word '${word}', Salt '${salt}', Hash ${hash}" >> ${OUTD}/test_edge.details.log @@ -506,7 +761,6 @@ for hash_type in $(ls tools/test_modules/*.pm | cut -d'm' -f3 | cut -d'.' -f1 | # multi hash if [ $TARGET_TYPE == all ] || [ $TARGET_TYPE == 1 ]; then - cnt_max=-1 tmp_cnt_max=$(./hashcat -m ${hash_type} -HH | grep Hashes\\.Count\\.Max | awk '{print $2}') if [[ $tmp_cnt_max =~ ^-?[0-9]+$ ]]; then From dbd41ca750ff4d4794be9dc8f939569e51bbcc7c Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 01:35:25 +0200 Subject: [PATCH 51/57] Fixed bug in module_constraints and kernel for hash-mode 7800 --- OpenCL/m07800_a3-optimized.cl | 142 +++++++++++++++++++++++++++------- docs/changes.txt | 1 + tools/test_modules/m07800.pm | 2 +- 3 files changed, 114 insertions(+), 31 deletions(-) diff --git a/OpenCL/m07800_a3-optimized.cl b/OpenCL/m07800_a3-optimized.cl index 5b442f6c6..f3b3e0582 100644 --- a/OpenCL/m07800_a3-optimized.cl +++ b/OpenCL/m07800_a3-optimized.cl @@ -62,16 +62,12 @@ DECLSPEC void m07800m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w * salt */ - u32 salt_buf[8]; + u32 salt_buf[4]; salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]); salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]); salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[2]); salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[7]); const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; @@ -84,10 +80,10 @@ DECLSPEC void m07800m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w s0[1] = salt_buf[1]; s0[2] = salt_buf[2]; s0[3] = salt_buf[3]; - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; s2[0] = 0; s2[1] = 0; s2[2] = 0; @@ -206,8 +202,8 @@ DECLSPEC void m07800m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w final[ 5] = w1[1]; final[ 6] = w1[2]; final[ 7] = w1[3]; - final[ 8] = 0; - final[ 9] = 0; + final[ 8] = w2[0]; + final[ 9] = w2[1]; final[10] = 0; final[11] = 0; final[12] = 0; @@ -279,16 +275,12 @@ DECLSPEC void m07800s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w * salt */ - u32 salt_buf[8]; + u32 salt_buf[4]; salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]); salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]); salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[2]); salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[7]); const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; @@ -301,10 +293,10 @@ DECLSPEC void m07800s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w s0[1] = salt_buf[1]; s0[2] = salt_buf[2]; s0[3] = salt_buf[3]; - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; s2[0] = 0; s2[1] = 0; s2[2] = 0; @@ -435,8 +427,8 @@ DECLSPEC void m07800s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w final[ 5] = w1[1]; final[ 6] = w1[2]; final[ 7] = w1[3]; - final[ 8] = 0; - final[ 9] = 0; + final[ 8] = w2[0]; + final[ 9] = w2[1]; final[10] = 0; final[11] = 0; final[12] = 0; @@ -514,8 +506,6 @@ KERNEL_FQ KERNEL_FA void m07800_m04 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -569,8 +559,6 @@ KERNEL_FQ KERNEL_FA void m07800_m08 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -610,6 +598,55 @@ KERNEL_FQ KERNEL_FA void m07800_m08 (KERN_ATTR_BASIC ()) KERNEL_FQ KERNEL_FA void m07800_m16 (KERN_ATTR_BASIC ()) { + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m07800m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } KERNEL_FQ KERNEL_FA void m07800_s04 (KERN_ATTR_BASIC ()) @@ -628,8 +665,6 @@ KERNEL_FQ KERNEL_FA void m07800_s04 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -683,8 +718,6 @@ KERNEL_FQ KERNEL_FA void m07800_s08 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -724,4 +757,53 @@ KERNEL_FQ KERNEL_FA void m07800_s08 (KERN_ATTR_BASIC ()) KERNEL_FQ KERNEL_FA void m07800_s16 (KERN_ATTR_BASIC ()) { + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m07800s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } diff --git a/docs/changes.txt b/docs/changes.txt index cd9d7db03..5c683f006 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -91,6 +91,7 @@ - Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH - Fixed a bug in all SCRYPT-based hash modes with Apple Metal - Fixed buffer overflow on module_26600.c / module_hash_encode() +- Fixed bug in module_constraints and kernel for hash-mode 7800 - Fixed bug in 18400 module_hash_encode - Fixed bug in 23800/unrar with Apple Silicon - Fixed bug in 26900 module_hash_encode diff --git a/tools/test_modules/m07800.pm b/tools/test_modules/m07800.pm index bb5532120..32353e830 100644 --- a/tools/test_modules/m07800.pm +++ b/tools/test_modules/m07800.pm @@ -10,7 +10,7 @@ use warnings; use Digest::SHA qw (sha1 sha1_hex); -sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [1, 12], [0, 55]] } +sub module_constraints { [[-1, -1], [-1, -1], [0, 40], [1, 12], [0, 55]] } sub module_generate_hash { From dc3418eaa4dd64c9f54a985063416aaf2e320af9 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 01:49:22 +0200 Subject: [PATCH 52/57] Fixed bug in module_constraints and kernel for hash-mode 7801 --- OpenCL/m07801_a3-optimized.cl | 142 +++++++++++++++++++++++++++------- docs/changes.txt | 1 + tools/test_modules/m07801.pm | 2 +- 3 files changed, 114 insertions(+), 31 deletions(-) diff --git a/OpenCL/m07801_a3-optimized.cl b/OpenCL/m07801_a3-optimized.cl index 340aa6258..476d144d2 100644 --- a/OpenCL/m07801_a3-optimized.cl +++ b/OpenCL/m07801_a3-optimized.cl @@ -62,16 +62,12 @@ DECLSPEC void m07801m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w * salt */ - u32 salt_buf[8]; + u32 salt_buf[4]; salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]); salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]); salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[2]); salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[7]); const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; @@ -84,10 +80,10 @@ DECLSPEC void m07801m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w s0[1] = salt_buf[1]; s0[2] = salt_buf[2]; s0[3] = salt_buf[3]; - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; s2[0] = 0; s2[1] = 0; s2[2] = 0; @@ -206,8 +202,8 @@ DECLSPEC void m07801m (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w final[ 5] = w1[1]; final[ 6] = w1[2]; final[ 7] = w1[3]; - final[ 8] = 0; - final[ 9] = 0; + final[ 8] = w2[0]; + final[ 9] = w2[1]; final[10] = 0; final[11] = 0; final[12] = 0; @@ -279,16 +275,12 @@ DECLSPEC void m07801s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w * salt */ - u32 salt_buf[8]; + u32 salt_buf[4]; salt_buf[0] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[0]); salt_buf[1] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[1]); salt_buf[2] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[2]); salt_buf[3] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[3]); - salt_buf[4] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[4]); - salt_buf[5] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[5]); - salt_buf[6] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[6]); - salt_buf[7] = hc_swap32_S (salt_bufs[SALT_POS_HOST].salt_buf[7]); const u32 salt_len = salt_bufs[SALT_POS_HOST].salt_len; @@ -301,10 +293,10 @@ DECLSPEC void m07801s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w s0[1] = salt_buf[1]; s0[2] = salt_buf[2]; s0[3] = salt_buf[3]; - s1[0] = salt_buf[4]; - s1[1] = salt_buf[5]; - s1[2] = salt_buf[6]; - s1[3] = salt_buf[7]; + s1[0] = 0; + s1[1] = 0; + s1[2] = 0; + s1[3] = 0; s2[0] = 0; s2[1] = 0; s2[2] = 0; @@ -435,8 +427,8 @@ DECLSPEC void m07801s (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w final[ 5] = w1[1]; final[ 6] = w1[2]; final[ 7] = w1[3]; - final[ 8] = 0; - final[ 9] = 0; + final[ 8] = w2[0]; + final[ 9] = w2[1]; final[10] = 0; final[11] = 0; final[12] = 0; @@ -514,8 +506,6 @@ KERNEL_FQ KERNEL_FA void m07801_m04 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -569,8 +559,6 @@ KERNEL_FQ KERNEL_FA void m07801_m08 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -610,6 +598,55 @@ KERNEL_FQ KERNEL_FA void m07801_m08 (KERN_ATTR_BASIC ()) KERNEL_FQ KERNEL_FA void m07801_m16 (KERN_ATTR_BASIC ()) { + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m07801m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } KERNEL_FQ KERNEL_FA void m07801_s04 (KERN_ATTR_BASIC ()) @@ -628,8 +665,6 @@ KERNEL_FQ KERNEL_FA void m07801_s04 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -683,8 +718,6 @@ KERNEL_FQ KERNEL_FA void m07801_s08 (KERN_ATTR_BASIC ()) * modifier */ - //const u64 lid = get_local_id (0); - u32 w0[4]; w0[0] = pws[gid].i[ 0]; @@ -724,4 +757,53 @@ KERNEL_FQ KERNEL_FA void m07801_s08 (KERN_ATTR_BASIC ()) KERNEL_FQ KERNEL_FA void m07801_s16 (KERN_ATTR_BASIC ()) { + /** + * base + */ + + const u64 lid = get_local_id (0); + const u64 gid = get_global_id (0); + const u64 lsz = get_local_size (0); + + if (gid >= GID_CNT) return; + + /** + * modifier + */ + + u32 w0[4]; + + w0[0] = pws[gid].i[ 0]; + w0[1] = pws[gid].i[ 1]; + w0[2] = pws[gid].i[ 2]; + w0[3] = pws[gid].i[ 3]; + + u32 w1[4]; + + w1[0] = pws[gid].i[ 4]; + w1[1] = pws[gid].i[ 5]; + w1[2] = pws[gid].i[ 6]; + w1[3] = pws[gid].i[ 7]; + + u32 w2[4]; + + w2[0] = pws[gid].i[ 8]; + w2[1] = pws[gid].i[ 9]; + w2[2] = pws[gid].i[10]; + w2[3] = pws[gid].i[11]; + + u32 w3[4]; + + w3[0] = pws[gid].i[12]; + w3[1] = pws[gid].i[13]; + w3[2] = 0; + w3[3] = 0; + + const u32 pw_len = pws[gid].pw_len & 63; + + /** + * main + */ + + m07801s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, kernel_param, gid, lid, lsz); } diff --git a/docs/changes.txt b/docs/changes.txt index cd9d7db03..1a948ddd8 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -91,6 +91,7 @@ - Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH - Fixed a bug in all SCRYPT-based hash modes with Apple Metal - Fixed buffer overflow on module_26600.c / module_hash_encode() +- Fixed bug in module_constraints and kernel for hash-mode 7801 - Fixed bug in 18400 module_hash_encode - Fixed bug in 23800/unrar with Apple Silicon - Fixed bug in 26900 module_hash_encode diff --git a/tools/test_modules/m07801.pm b/tools/test_modules/m07801.pm index fdd707d74..fe9421d2b 100644 --- a/tools/test_modules/m07801.pm +++ b/tools/test_modules/m07801.pm @@ -10,7 +10,7 @@ use warnings; use Digest::SHA qw (sha1 sha1_hex); -sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [1, 12], [0, 55]] } +sub module_constraints { [[-1, -1], [-1, -1], [0, 40], [1, 12], [0, 55]] } sub module_generate_hash { From 4567acd5f0f6fc198c2651e19a227f7eb9bcec0d Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 01:59:45 +0200 Subject: [PATCH 53/57] Fixed bug in module_constraints for hash-mode 8700 --- tools/test_modules/m08700.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m08700.pm b/tools/test_modules/m08700.pm index a5b770db5..0b4778111 100644 --- a/tools/test_modules/m08700.pm +++ b/tools/test_modules/m08700.pm @@ -8,7 +8,7 @@ use strict; use warnings; -sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [5, 5], [-1, -1]] } +sub module_constraints { [[-1, -1], [-1, -1], [0, 32], [5, 5], [0, 55]] } my $LOTUS_MAGIC_TABLE = [ From 9806b7d6156e28360cec11544a1965699a9e0acd Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 02:07:17 +0200 Subject: [PATCH 54/57] Fixed bug in module_constraints for hash-mode 10100 --- tools/test_modules/m10100.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m10100.pm b/tools/test_modules/m10100.pm index 8c29310ad..303213cbd 100644 --- a/tools/test_modules/m10100.pm +++ b/tools/test_modules/m10100.pm @@ -10,7 +10,7 @@ use warnings; use Digest::SipHash qw (siphash); -sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [32, 32], [-1, -1]] } +sub module_constraints { [[-1, -1], [-1, -1], [0, 55], [32, 32], [0, 55]] } sub module_generate_hash { From 84cc247fc6633e5c8cbe53af47d077c6669d5e87 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 02:20:53 +0200 Subject: [PATCH 55/57] Fixed bug in module_constraints for hash-mode 11100 --- tools/test_modules/m11100.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m11100.pm b/tools/test_modules/m11100.pm index d64e6b0cb..c576641db 100644 --- a/tools/test_modules/m11100.pm +++ b/tools/test_modules/m11100.pm @@ -10,7 +10,7 @@ use warnings; use Digest::MD5 qw (md5_hex); -sub module_constraints { [[0, 256], [8, 8], [0, 55], [8, 8], [-1, -1]] } +sub module_constraints { [[0, 256], [8, 8], [0, 55], [8, 8], [0, 55]] } sub module_generate_hash { From ddf99ca69d073c90ffccaa8973dde72e4089af5f Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 02:39:31 +0200 Subject: [PATCH 56/57] Fixed bug in module_constraints for hash-mode 15000 --- tools/test_modules/m15000.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m15000.pm b/tools/test_modules/m15000.pm index 6ee5a7c27..7c92039c1 100644 --- a/tools/test_modules/m15000.pm +++ b/tools/test_modules/m15000.pm @@ -10,7 +10,7 @@ use warnings; use Digest::SHA qw (sha512_hex); -sub module_constraints { [[0, 256], [64, 64], [0, 55], [64, 64], [-1, -1]] } +sub module_constraints { [[0, 256], [64, 64], [0, 47], [64, 64], [0, 55]] } sub module_generate_hash { From f3adb3c318e43d44121f99027b7e39159700b164 Mon Sep 17 00:00:00 2001 From: Gabriele Gristina Date: Sat, 12 Jul 2025 08:54:15 +0200 Subject: [PATCH 57/57] Fixed bug in module_constraints for hash-mode 31000 --- tools/test_modules/m31000.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m31000.pm b/tools/test_modules/m31000.pm index 4c7ef55b2..b799ea0a4 100644 --- a/tools/test_modules/m31000.pm +++ b/tools/test_modules/m31000.pm @@ -10,7 +10,7 @@ use warnings; use Crypt::Digest::BLAKE2s_256 qw (blake2s_256_hex); -sub module_constraints { [[0, 128], [-1, -1], [0, 64], [-1, -1], [-1, -1]] } +sub module_constraints { [[0, 128], [-1, -1], [0, 55], [-1, -1], [-1, -1]] } sub module_generate_hash {