From 0154d636bfdd2583fe49adea4675475377d392e9 Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 25 Jul 2016 21:51:04 +0200 Subject: [PATCH] Slightly increased NVidias rule-processing performance by using generic instructions instead of byte_perm() --- OpenCL/inc_rp.cl | 1597 ---------------------------------------------- docs/changes.txt | 1 + 2 files changed, 1 insertion(+), 1597 deletions(-) diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl index 4d2d22c2d..760eceeb2 100644 --- a/OpenCL/inc_rp.cl +++ b/OpenCL/inc_rp.cl @@ -5,16 +5,6 @@ * License.....: MIT */ -#define DO_NOT_USE_BYTE_PERM - -#ifdef DO_NOT_USE_BYTE_PERM -#ifdef IS_NV -#undef IS_NV -#define IS_GENERIC -#define RESTORE_NV -#endif -#endif - inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len); inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len); inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]); @@ -138,18 +128,6 @@ inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len) inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - #ifdef IS_NV - out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[3] = __byte_perm_S (in1[3], 0, 0x4321); - #endif - - #if defined IS_AMD || defined IS_GENERIC out0[0] = amd_bytealign_S (in0[1], in0[0], 1); out0[1] = amd_bytealign_S (in0[2], in0[1], 1); out0[2] = amd_bytealign_S (in0[3], in0[2], 1); @@ -158,23 +136,10 @@ inline void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 o out1[1] = amd_bytealign_S (in1[2], in1[1], 1); out1[2] = amd_bytealign_S (in1[3], in1[2], 1); out1[3] = amd_bytealign_S ( 0, in1[3], 1); - #endif } inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { - #ifdef IS_NV - out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[0] = __byte_perm_S ( 0, in0[0], 0x6543); - #endif - - #if defined IS_AMD || defined IS_GENERIC out1[3] = amd_bytealign_S (in1[3], in1[2], 3); out1[2] = amd_bytealign_S (in1[2], in1[1], 3); out1[1] = amd_bytealign_S (in1[1], in1[0], 3); @@ -183,307 +148,10 @@ inline void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 o out0[2] = amd_bytealign_S (in0[2], in0[1], 3); out0[1] = amd_bytealign_S (in0[1], in0[0], 3); out0[0] = amd_bytealign_S (in0[0], 0, 3); - #endif } inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) { - #ifdef IS_NV - switch (num) - { - case 0: out0[0] = in0[0]; - out0[1] = in0[1]; - out0[2] = in0[2]; - out0[3] = in0[3]; - out1[0] = in1[0]; - out1[1] = in1[1]; - out1[2] = in1[2]; - out1[3] = in1[3]; - break; - case 1: out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[3] = __byte_perm_S (in1[3], 0, 0x4321); - break; - case 2: out0[0] = __byte_perm_S (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm_S (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm_S (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm_S (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm_S (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm_S (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm_S (in1[2], in1[3], 0x5432); - out1[3] = __byte_perm_S (in1[3], 0, 0x5432); - break; - case 3: out0[0] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm_S (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[3] = __byte_perm_S (in1[3], 0, 0x6543); - break; - case 4: out0[0] = in0[1]; - out0[1] = in0[2]; - out0[2] = in0[3]; - out0[3] = in1[0]; - out1[0] = in1[1]; - out1[1] = in1[2]; - out1[2] = in1[3]; - out1[3] = 0; - break; - case 5: out0[0] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm_S (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm_S (in1[3], 0, 0x4321); - out1[3] = 0; - break; - case 6: out0[0] = __byte_perm_S (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm_S (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm_S (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm_S (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm_S (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm_S (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm_S (in1[3], 0, 0x5432); - out1[3] = 0; - break; - case 7: out0[0] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm_S (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm_S (in1[3], 0, 0x6543); - out1[3] = 0; - break; - case 8: out0[0] = in0[2]; - out0[1] = in0[3]; - out0[2] = in1[0]; - out0[3] = in1[1]; - out1[0] = in1[2]; - out1[1] = in1[3]; - out1[2] = 0; - out1[3] = 0; - break; - case 9: out0[0] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[1] = __byte_perm_S (in0[3], in1[0], 0x4321); - out0[2] = __byte_perm_S (in1[0], in1[1], 0x4321); - out0[3] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[0] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[1] = __byte_perm_S (in1[3], 0, 0x4321); - out1[2] = 0; - out1[3] = 0; - break; - case 10: out0[0] = __byte_perm_S (in0[2], in0[3], 0x5432); - out0[1] = __byte_perm_S (in0[3], in1[0], 0x5432); - out0[2] = __byte_perm_S (in1[0], in1[1], 0x5432); - out0[3] = __byte_perm_S (in1[1], in1[2], 0x5432); - out1[0] = __byte_perm_S (in1[2], in1[3], 0x5432); - out1[1] = __byte_perm_S (in1[3], 0, 0x5432); - out1[2] = 0; - out1[3] = 0; - break; - case 11: out0[0] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[1] = __byte_perm_S (in0[3], in1[0], 0x6543); - out0[2] = __byte_perm_S (in1[0], in1[1], 0x6543); - out0[3] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[0] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[1] = __byte_perm_S (in1[3], 0, 0x6543); - out1[2] = 0; - out1[3] = 0; - break; - case 12: out0[0] = in0[3]; - out0[1] = in1[0]; - out0[2] = in1[1]; - out0[3] = in1[2]; - out1[0] = in1[3]; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 13: - out0[0] = __byte_perm_S (in0[3], in1[0], 0x4321); - out0[1] = __byte_perm_S (in1[0], in1[1], 0x4321); - out0[2] = __byte_perm_S (in1[1], in1[2], 0x4321); - out0[3] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[0] = __byte_perm_S (in1[3], 0, 0x4321); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 14: out0[0] = __byte_perm_S (in0[3], in1[0], 0x5432); - out0[1] = __byte_perm_S (in1[0], in1[1], 0x5432); - out0[2] = __byte_perm_S (in1[1], in1[2], 0x5432); - out0[3] = __byte_perm_S (in1[2], in1[3], 0x5432); - out1[0] = __byte_perm_S (in1[3], 0, 0x5432); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 15: out0[0] = __byte_perm_S (in0[3], in1[0], 0x6543); - out0[1] = __byte_perm_S (in1[0], in1[1], 0x6543); - out0[2] = __byte_perm_S (in1[1], in1[2], 0x6543); - out0[3] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[0] = __byte_perm_S (in1[3], 0, 0x6543); - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 16: out0[0] = in1[0]; - out0[1] = in1[1]; - out0[2] = in1[2]; - out0[3] = in1[3]; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 17: out0[0] = __byte_perm_S (in1[0], in1[1], 0x4321); - out0[1] = __byte_perm_S (in1[1], in1[2], 0x4321); - out0[2] = __byte_perm_S (in1[2], in1[3], 0x4321); - out0[3] = __byte_perm_S (in1[3], 0, 0x4321); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 18: out0[0] = __byte_perm_S (in1[0], in1[1], 0x5432); - out0[1] = __byte_perm_S (in1[1], in1[2], 0x5432); - out0[2] = __byte_perm_S (in1[2], in1[3], 0x5432); - out0[3] = __byte_perm_S (in1[3], 0, 0x5432); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 19: out0[0] = __byte_perm_S (in1[0], in1[1], 0x6543); - out0[1] = __byte_perm_S (in1[1], in1[2], 0x6543); - out0[2] = __byte_perm_S (in1[2], in1[3], 0x6543); - out0[3] = __byte_perm_S (in1[3], 0, 0x6543); - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 20: out0[0] = in1[1]; - out0[1] = in1[2]; - out0[2] = in1[3]; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 21: out0[0] = __byte_perm_S (in1[1], in1[2], 0x4321); - out0[1] = __byte_perm_S (in1[2], in1[3], 0x4321); - out0[2] = __byte_perm_S (in1[3], 0, 0x4321); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 22: out0[0] = __byte_perm_S (in1[1], in1[2], 0x5432); - out0[1] = __byte_perm_S (in1[2], in1[3], 0x5432); - out0[2] = __byte_perm_S (in1[3], 0, 0x5432); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 23: out0[0] = __byte_perm_S (in1[1], in1[2], 0x6543); - out0[1] = __byte_perm_S (in1[2], in1[3], 0x6543); - out0[2] = __byte_perm_S (in1[3], 0, 0x6543); - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 24: out0[0] = in1[2]; - out0[1] = in1[3]; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 25: out0[0] = __byte_perm_S (in1[2], in1[3], 0x4321); - out0[1] = __byte_perm_S (in1[3], 0, 0x4321); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 26: out0[0] = __byte_perm_S (in1[2], in1[3], 0x5432); - out0[1] = __byte_perm_S (in1[3], 0, 0x5432); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 27: out0[0] = __byte_perm_S (in1[2], in1[3], 0x6543); - out0[1] = __byte_perm_S (in1[3], 0, 0x6543); - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 28: out0[0] = in1[3]; - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 29: out0[0] = __byte_perm_S (in1[3], 0, 0x4321); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 30: out0[0] = __byte_perm_S (in1[3], 0, 0x5432); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - case 31: out0[0] = __byte_perm_S (in1[3], 0, 0x6543); - out0[1] = 0; - out0[2] = 0; - out0[3] = 0; - out1[0] = 0; - out1[1] = 0; - out1[2] = 0; - out1[3] = 0; - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC switch (num) { case 0: out0[0] = in0[0]; @@ -775,306 +443,10 @@ inline void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[3] = 0; break; } - #endif } inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4], const u32 num) { - #ifdef IS_NV - switch (num) - { - case 0: out1[3] = in1[3]; - out1[2] = in1[2]; - out1[1] = in1[1]; - out1[0] = in1[0]; - out0[3] = in0[3]; - out0[2] = in0[2]; - out0[1] = in0[1]; - out0[0] = in0[0]; - break; - case 1: out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[0] = __byte_perm_S ( 0, in0[0], 0x6543); - break; - case 2: out1[3] = __byte_perm_S (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm_S (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm_S (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm_S (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm_S (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm_S (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm_S (in0[0], in0[1], 0x5432); - out0[0] = __byte_perm_S ( 0, in0[0], 0x5432); - break; - case 3: out1[3] = __byte_perm_S (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm_S (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[0] = __byte_perm_S ( 0, in0[0], 0x4321); - break; - case 4: out1[3] = in1[2]; - out1[2] = in1[1]; - out1[1] = in1[0]; - out1[0] = in0[3]; - out0[3] = in0[2]; - out0[2] = in0[1]; - out0[1] = in0[0]; - out0[0] = 0; - break; - case 5: out1[3] = __byte_perm_S (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm_S (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm_S (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm_S ( 0, in0[0], 0x6543); - out0[0] = 0; - break; - case 6: out1[3] = __byte_perm_S (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm_S (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm_S (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm_S (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm_S (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm_S (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm_S ( 0, in0[0], 0x5432); - out0[0] = 0; - break; - case 7: out1[3] = __byte_perm_S (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm_S (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm_S (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm_S ( 0, in0[0], 0x4321); - out0[0] = 0; - break; - case 8: out1[3] = in1[1]; - out1[2] = in1[0]; - out1[1] = in0[3]; - out1[0] = in0[2]; - out0[3] = in0[1]; - out0[2] = in0[0]; - out0[1] = 0; - out0[0] = 0; - break; - case 9: out1[3] = __byte_perm_S (in1[0], in1[1], 0x6543); - out1[2] = __byte_perm_S (in0[3], in1[0], 0x6543); - out1[1] = __byte_perm_S (in0[2], in0[3], 0x6543); - out1[0] = __byte_perm_S (in0[1], in0[2], 0x6543); - out0[3] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[2] = __byte_perm_S ( 0, in0[0], 0x6543); - out0[1] = 0; - out0[0] = 0; - break; - case 10: out1[3] = __byte_perm_S (in1[0], in1[1], 0x5432); - out1[2] = __byte_perm_S (in0[3], in1[0], 0x5432); - out1[1] = __byte_perm_S (in0[2], in0[3], 0x5432); - out1[0] = __byte_perm_S (in0[1], in0[2], 0x5432); - out0[3] = __byte_perm_S (in0[0], in0[1], 0x5432); - out0[2] = __byte_perm_S ( 0, in0[0], 0x5432); - out0[1] = 0; - out0[0] = 0; - break; - case 11: out1[3] = __byte_perm_S (in1[0], in1[1], 0x4321); - out1[2] = __byte_perm_S (in0[3], in1[0], 0x4321); - out1[1] = __byte_perm_S (in0[2], in0[3], 0x4321); - out1[0] = __byte_perm_S (in0[1], in0[2], 0x4321); - out0[3] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[2] = __byte_perm_S ( 0, in0[0], 0x4321); - out0[1] = 0; - out0[0] = 0; - break; - case 12: out1[3] = in1[0]; - out1[2] = in0[3]; - out1[1] = in0[2]; - out1[0] = in0[1]; - out0[3] = in0[0]; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 13: out1[3] = __byte_perm_S (in0[3], in1[0], 0x6543); - out1[2] = __byte_perm_S (in0[2], in0[3], 0x6543); - out1[1] = __byte_perm_S (in0[1], in0[2], 0x6543); - out1[0] = __byte_perm_S (in0[0], in0[1], 0x6543); - out0[3] = __byte_perm_S ( 0, in0[0], 0x6543); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 14: out1[3] = __byte_perm_S (in0[3], in1[0], 0x5432); - out1[2] = __byte_perm_S (in0[2], in0[3], 0x5432); - out1[1] = __byte_perm_S (in0[1], in0[2], 0x5432); - out1[0] = __byte_perm_S (in0[0], in0[1], 0x5432); - out0[3] = __byte_perm_S ( 0, in0[0], 0x5432); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 15: out1[3] = __byte_perm_S (in0[3], in1[0], 0x4321); - out1[2] = __byte_perm_S (in0[2], in0[3], 0x4321); - out1[1] = __byte_perm_S (in0[1], in0[2], 0x4321); - out1[0] = __byte_perm_S (in0[0], in0[1], 0x4321); - out0[3] = __byte_perm_S ( 0, in0[0], 0x4321); - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 16: out1[3] = in0[3]; - out1[2] = in0[2]; - out1[1] = in0[1]; - out1[0] = in0[0]; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 17: out1[3] = __byte_perm_S (in0[2], in0[3], 0x6543); - out1[2] = __byte_perm_S (in0[1], in0[2], 0x6543); - out1[1] = __byte_perm_S (in0[0], in0[1], 0x6543); - out1[0] = __byte_perm_S ( 0, in0[0], 0x6543); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 18: out1[3] = __byte_perm_S (in0[2], in0[3], 0x5432); - out1[2] = __byte_perm_S (in0[1], in0[2], 0x5432); - out1[1] = __byte_perm_S (in0[0], in0[1], 0x5432); - out1[0] = __byte_perm_S ( 0, in0[0], 0x5432); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 19: out1[3] = __byte_perm_S (in0[2], in0[3], 0x4321); - out1[2] = __byte_perm_S (in0[1], in0[2], 0x4321); - out1[1] = __byte_perm_S (in0[0], in0[1], 0x4321); - out1[0] = __byte_perm_S ( 0, in0[0], 0x4321); - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 20: out1[3] = in0[2]; - out1[2] = in0[1]; - out1[1] = in0[0]; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 21: out1[3] = __byte_perm_S (in0[1], in0[2], 0x6543); - out1[2] = __byte_perm_S (in0[0], in0[1], 0x6543); - out1[1] = __byte_perm_S ( 0, in0[0], 0x6543); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 22: out1[3] = __byte_perm_S (in0[1], in0[2], 0x5432); - out1[2] = __byte_perm_S (in0[0], in0[1], 0x5432); - out1[1] = __byte_perm_S ( 0, in0[0], 0x5432); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 23: out1[3] = __byte_perm_S (in0[1], in0[2], 0x4321); - out1[2] = __byte_perm_S (in0[0], in0[1], 0x4321); - out1[1] = __byte_perm_S ( 0, in0[0], 0x4321); - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 24: out1[3] = in0[1]; - out1[2] = in0[0]; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 25: out1[3] = __byte_perm_S (in0[0], in0[1], 0x6543); - out1[2] = __byte_perm_S ( 0, in0[0], 0x6543); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 26: out1[3] = __byte_perm_S (in0[0], in0[1], 0x5432); - out1[2] = __byte_perm_S ( 0, in0[0], 0x5432); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 27: out1[3] = __byte_perm_S (in0[0], in0[1], 0x4321); - out1[2] = __byte_perm_S ( 0, in0[0], 0x4321); - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 28: out1[3] = in0[0]; - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 29: out1[3] = __byte_perm_S ( 0, in0[0], 0x6543); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 30: out1[3] = __byte_perm_S ( 0, in0[0], 0x5432); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - case 31: out1[3] = __byte_perm_S ( 0, in0[0], 0x4321); - out1[2] = 0; - out1[1] = 0; - out1[0] = 0; - out0[3] = 0; - out0[2] = 0; - out0[1] = 0; - out0[0] = 0; - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC switch (num) { case 0: out1[3] = in1[3]; @@ -1366,7 +738,6 @@ inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[0] = 0; break; } - #endif } inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0) @@ -1419,254 +790,6 @@ inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4]) { -/* - #ifdef IS_NV - switch (offset) - { - case 0: - dst0[0] = src_r0[0]; - dst0[1] = src_r0[1]; - dst0[2] = src_r0[2]; - dst0[3] = src_r0[3]; - dst1[0] = src_r1[0]; - dst1[1] = src_r1[1]; - dst1[2] = src_r1[2]; - dst1[3] = src_r1[3]; - break; - - case 1: - dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x6540); - dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); - dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); - dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); - dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543); - dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x6543); - break; - - case 2: - dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x5410); - dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); - dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); - dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); - dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432); - dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x5432); - break; - - case 3: - dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x4210); - dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); - dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); - dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); - dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321); - dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x4321); - break; - - case 4: - dst0[1] = src_r0[0]; - dst0[2] = src_r0[1]; - dst0[3] = src_r0[2]; - dst1[0] = src_r0[3]; - dst1[1] = src_r1[0]; - dst1[2] = src_r1[1]; - dst1[3] = src_r1[2]; - break; - - case 5: - dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x6540); - dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); - dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); - dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); - dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543); - break; - - case 6: - dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x5410); - dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); - dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); - dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); - dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432); - break; - - case 7: - dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x4210); - dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); - dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); - dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); - dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321); - break; - - case 8: - dst0[2] = src_r0[0]; - dst0[3] = src_r0[1]; - dst1[0] = src_r0[2]; - dst1[1] = src_r0[3]; - dst1[2] = src_r1[0]; - dst1[3] = src_r1[1]; - break; - - case 9: - dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x6540); - dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); - dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); - dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); - break; - - case 10: - dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x5410); - dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); - dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); - dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); - break; - - case 11: - dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x4210); - dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); - dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); - dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); - break; - - case 12: - dst0[3] = src_r0[0]; - dst1[0] = src_r0[1]; - dst1[1] = src_r0[2]; - dst1[2] = src_r0[3]; - dst1[3] = src_r1[0]; - break; - - case 13: - dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x6540); - dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); - dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); - break; - - case 14: - dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x5410); - dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); - dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); - break; - - case 15: - dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x4210); - dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); - dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); - break; - - case 16: - dst1[0] = src_r0[0]; - dst1[1] = src_r0[1]; - dst1[2] = src_r0[2]; - dst1[3] = src_r0[3]; - break; - - case 17: - dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x6540); - dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); - break; - - case 18: - dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x5410); - dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); - break; - - case 19: - dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x4210); - dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); - break; - - case 20: - dst1[1] = src_r0[0]; - dst1[2] = src_r0[1]; - dst1[3] = src_r0[2]; - break; - - case 21: - dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x6540); - dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); - break; - - case 22: - dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x5410); - dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); - break; - - case 23: - dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x4210); - dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); - break; - - case 24: - dst1[2] = src_r0[0]; - dst1[3] = src_r0[1]; - break; - - case 25: - dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x6540); - dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); - break; - - case 26: - dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x5410); - dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); - break; - - case 27: - dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x4210); - dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); - break; - - case 28: - dst1[3] = src_r0[0]; - break; - - case 29: - dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x6540); - break; - - case 30: - dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x5410); - break; - - case 31: - dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x4210); - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC -*/ switch (offset) { case 31: @@ -1878,7 +1001,6 @@ inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst0[0] = src_r0[0]; break; } -// #endif } inline void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], const u32 len) @@ -2456,80 +1578,6 @@ inline u32 rule_op_mangle_truncate_at (const u32 p0, const u32 p1, u32 buf0[4], inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) { - #ifdef IS_NV - for (u32 i = 0; i < in_len; i++) - { - switch (i) - { - case 0: if ((__byte_perm_S (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7650); - break; - case 1: if ((__byte_perm_S (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7604); - break; - case 2: if ((__byte_perm_S (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7054); - break; - case 3: if ((__byte_perm_S (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x0654); - break; - case 4: if ((__byte_perm_S (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7650); - break; - case 5: if ((__byte_perm_S (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7604); - break; - case 6: if ((__byte_perm_S (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7054); - break; - case 7: if ((__byte_perm_S (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x0654); - break; - case 8: if ((__byte_perm_S (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7650); - break; - case 9: if ((__byte_perm_S (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7604); - break; - case 10: if ((__byte_perm_S (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7054); - break; - case 11: if ((__byte_perm_S (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x0654); - break; - case 12: if ((__byte_perm_S (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7650); - break; - case 13: if ((__byte_perm_S (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7604); - break; - case 14: if ((__byte_perm_S (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7054); - break; - case 15: if ((__byte_perm_S (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x0654); - break; - case 16: if ((__byte_perm_S (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7650); - break; - case 17: if ((__byte_perm_S (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7604); - break; - case 18: if ((__byte_perm_S (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7054); - break; - case 19: if ((__byte_perm_S (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x0654); - break; - case 20: if ((__byte_perm_S (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7650); - break; - case 21: if ((__byte_perm_S (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7604); - break; - case 22: if ((__byte_perm_S (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7054); - break; - case 23: if ((__byte_perm_S (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x0654); - break; - case 24: if ((__byte_perm_S (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7650); - break; - case 25: if ((__byte_perm_S (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7604); - break; - case 26: if ((__byte_perm_S (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7054); - break; - case 27: if ((__byte_perm_S (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x0654); - break; - case 28: if ((__byte_perm_S (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7650); - break; - case 29: if ((__byte_perm_S (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7604); - break; - case 30: if ((__byte_perm_S (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7054); - break; - case 31: if ((__byte_perm_S (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x0654); - break; - } - } - #endif - - #if defined IS_AMD || defined IS_GENERIC const uchar4 tmp0 = (uchar4) (p0); const uchar4 tmp1 = (uchar4) (p1); @@ -2543,7 +1591,6 @@ inline u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 tmp = as_uchar4 (buf1[1]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[1] = as_uint (tmp); tmp = as_uchar4 (buf1[2]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[2] = as_uint (tmp); tmp = as_uchar4 (buf1[3]); tmp = select (tmp, tmp1, tmp == tmp0); buf1[3] = as_uint (tmp); - #endif return in_len; } @@ -2571,180 +1618,6 @@ inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4 rshift_block_N (buf0, buf1, buf0, buf1, p0); - #ifdef IS_NV - switch (p0) - { - case 1: buf0[0] |= tmp; - break; - case 2: buf0[0] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 3: buf0[0] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 4: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 5: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= tmp; - break; - case 6: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 7: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 8: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 9: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= tmp; - break; - case 10: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 11: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 12: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 13: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= tmp; - break; - case 14: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 15: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 16: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 17: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= tmp; - break; - case 18: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 19: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 20: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 21: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= tmp; - break; - case 22: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 23: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 24: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 25: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= tmp; - break; - case 26: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 27: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x4000); - break; - case 28: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); - break; - case 29: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[3] |= tmp; - break; - case 30: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[3] |= __byte_perm_S (tmp, 0, 0x5400); - break; - case 31: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); - buf1[3] |= __byte_perm_S (tmp, 0, 0x4000); - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC switch (p0) { case 1: buf0[0] |= tmp << 0; @@ -2915,7 +1788,6 @@ inline u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4 buf1[3] |= tmp << 0 | tmp << 8 | tmp << 16; break; } - #endif out_len += p0; @@ -2967,27 +1839,6 @@ inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 tib40[4]; u32 tib41[4]; - #ifdef IS_NV - tib40[0] = __byte_perm_S (buf0[0], 0, 0x1100); - tib40[1] = __byte_perm_S (buf0[0], 0, 0x3322); - tib40[2] = __byte_perm_S (buf0[1], 0, 0x1100); - tib40[3] = __byte_perm_S (buf0[1], 0, 0x3322); - tib41[0] = __byte_perm_S (buf0[2], 0, 0x1100); - tib41[1] = __byte_perm_S (buf0[2], 0, 0x3322); - tib41[2] = __byte_perm_S (buf0[3], 0, 0x1100); - tib41[3] = __byte_perm_S (buf0[3], 0, 0x3322); - - buf0[0] = tib40[0]; - buf0[1] = tib40[1]; - buf0[2] = tib40[2]; - buf0[3] = tib40[3]; - buf1[0] = tib41[0]; - buf1[1] = tib41[1]; - buf1[2] = tib41[2]; - buf1[3] = tib41[3]; - #endif - - #if defined IS_AMD || defined IS_GENERIC tib40[0] = ((buf0[0] & 0x000000FF) << 0) | ((buf0[0] & 0x0000FF00) << 8); tib40[1] = ((buf0[0] & 0x00FF0000) >> 16) | ((buf0[0] & 0xFF000000) >> 8); tib40[2] = ((buf0[1] & 0x000000FF) << 0) | ((buf0[1] & 0x0000FF00) << 8); @@ -3005,7 +1856,6 @@ inline u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], buf1[1] = tib41[1] | (tib41[1] << 8); buf1[2] = tib41[2] | (tib41[2] << 8); buf1[3] = tib41[3] | (tib41[3] << 8); - #endif out_len = out_len + out_len; @@ -3016,13 +1866,7 @@ inline u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], { if (in_len < 2) return (in_len); - #ifdef IS_NV - buf0[0] = __byte_perm_S (buf0[0], 0, 0x3201); - #endif - - #if defined IS_AMD || defined IS_GENERIC buf0[0] = (buf0[0] & 0xFFFF0000) | ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); - #endif return in_len; } @@ -3031,87 +1875,6 @@ inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], { if (in_len < 2) return (in_len); - #ifdef IS_NV - switch (in_len) - { - case 2: buf0[0] = __byte_perm_S (buf0[0], 0, 0x5401); - break; - case 3: buf0[0] = __byte_perm_S (buf0[0], 0, 0x4120); - break; - case 4: buf0[0] = __byte_perm_S (buf0[0], 0, 0x2310); - break; - case 5: buf0[1] = __byte_perm_S (buf0[1], buf0[0], 0x7210); - buf0[0] = __byte_perm_S (buf0[0], buf0[1], 0x4210); - buf0[1] = __byte_perm_S (buf0[1], 0, 0x6543); - break; - case 6: buf0[1] = __byte_perm_S (buf0[1], 0, 0x5401); - break; - case 7: buf0[1] = __byte_perm_S (buf0[1], 0, 0x4120); - break; - case 8: buf0[1] = __byte_perm_S (buf0[1], 0, 0x2310); - break; - case 9: buf0[2] = __byte_perm_S (buf0[2], buf0[1], 0x7210); - buf0[1] = __byte_perm_S (buf0[1], buf0[2], 0x4210); - buf0[2] = __byte_perm_S (buf0[2], 0, 0x6543); - break; - case 10: buf0[2] = __byte_perm_S (buf0[2], 0, 0x5401); - break; - case 11: buf0[2] = __byte_perm_S (buf0[2], 0, 0x4120); - break; - case 12: buf0[2] = __byte_perm_S (buf0[2], 0, 0x2310); - break; - case 13: buf0[3] = __byte_perm_S (buf0[3], buf0[2], 0x7210); - buf0[2] = __byte_perm_S (buf0[2], buf0[3], 0x4210); - buf0[3] = __byte_perm_S (buf0[3], 0, 0x6543); - break; - case 14: buf0[3] = __byte_perm_S (buf0[3], 0, 0x5401); - break; - case 15: buf0[3] = __byte_perm_S (buf0[3], 0, 0x4120); - break; - case 16: buf0[3] = __byte_perm_S (buf0[3], 0, 0x2310); - break; - case 17: buf1[0] = __byte_perm_S (buf1[0], buf0[3], 0x7210); - buf0[3] = __byte_perm_S (buf0[3], buf1[0], 0x4210); - buf1[0] = __byte_perm_S (buf1[0], 0, 0x6543); - break; - case 18: buf1[0] = __byte_perm_S (buf1[0], 0, 0x5401); - break; - case 19: buf1[0] = __byte_perm_S (buf1[0], 0, 0x4120); - break; - case 20: buf1[0] = __byte_perm_S (buf1[0], 0, 0x2310); - break; - case 21: buf1[1] = __byte_perm_S (buf1[1], buf1[0], 0x7210); - buf1[0] = __byte_perm_S (buf1[0], buf1[1], 0x4210); - buf1[1] = __byte_perm_S (buf1[1], 0, 0x6543); - break; - case 22: buf1[1] = __byte_perm_S (buf1[1], 0, 0x5401); - break; - case 23: buf1[1] = __byte_perm_S (buf1[1], 0, 0x4120); - break; - case 24: buf1[1] = __byte_perm_S (buf1[1], 0, 0x2310); - break; - case 25: buf1[2] = __byte_perm_S (buf1[2], buf1[1], 0x7210); - buf1[1] = __byte_perm_S (buf1[1], buf1[2], 0x4210); - buf1[2] = __byte_perm_S (buf1[2], 0, 0x6543); - break; - case 26: buf1[2] = __byte_perm_S (buf1[2], 0, 0x5401); - break; - case 27: buf1[2] = __byte_perm_S (buf1[2], 0, 0x4120); - break; - case 28: buf1[2] = __byte_perm_S (buf1[2], 0, 0x2310); - break; - case 29: buf1[3] = __byte_perm_S (buf1[3], buf1[2], 0x7210); - buf1[2] = __byte_perm_S (buf1[2], buf1[3], 0x4210); - buf1[3] = __byte_perm_S (buf1[3], 0, 0x6543); - break; - case 30: buf1[3] = __byte_perm_S (buf1[3], 0, 0x5401); - break; - case 31: buf1[3] = __byte_perm_S (buf1[3], 0, 0x4120); - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC switch (in_len) { case 2: buf0[0] = ((buf0[0] << 8) & 0x0000FF00) | ((buf0[0] >> 8) & 0x000000FF); @@ -3189,7 +1952,6 @@ inline u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], case 31: buf1[3] = (buf1[3] & 0x000000FF) | ((buf1[3] << 8) & 0x00FF0000) | ((buf1[3] >> 8) & 0x0000FF00); break; } - #endif return in_len; } @@ -3202,245 +1964,6 @@ inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u3 u32 tmp0 = 0; u32 tmp1 = 0; - #ifdef IS_NV - switch (p0) - { - case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540); - break; - case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541); - break; - case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542); - break; - case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543); - break; - case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540); - break; - case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541); - break; - case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542); - break; - case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543); - break; - case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540); - break; - case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541); - break; - case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542); - break; - case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543); - break; - case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540); - break; - case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541); - break; - case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542); - break; - case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543); - break; - case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540); - break; - case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541); - break; - case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542); - break; - case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543); - break; - case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540); - break; - case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541); - break; - case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542); - break; - case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543); - break; - case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540); - break; - case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541); - break; - case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542); - break; - case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543); - break; - case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540); - break; - case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541); - break; - case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542); - break; - case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543); - break; - } - - switch (p1) - { - case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540); - buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650); - break; - case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541); - buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604); - break; - case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542); - buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054); - break; - case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543); - buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654); - break; - case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540); - buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650); - break; - case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541); - buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604); - break; - case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542); - buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054); - break; - case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543); - buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654); - break; - case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540); - buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650); - break; - case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541); - buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604); - break; - case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542); - buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054); - break; - case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543); - buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654); - break; - case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540); - buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650); - break; - case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541); - buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604); - break; - case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542); - buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054); - break; - case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543); - buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654); - break; - case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540); - buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650); - break; - case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541); - buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604); - break; - case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542); - buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054); - break; - case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543); - buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654); - break; - case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540); - buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650); - break; - case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541); - buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604); - break; - case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542); - buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054); - break; - case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543); - buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654); - break; - case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540); - buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650); - break; - case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541); - buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604); - break; - case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542); - buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054); - break; - case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543); - buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654); - break; - case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540); - buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650); - break; - case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541); - buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604); - break; - case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542); - buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054); - break; - case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543); - buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654); - break; - } - - switch (p0) - { - case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650); - break; - case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604); - break; - case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054); - break; - case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654); - break; - case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650); - break; - case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604); - break; - case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054); - break; - case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654); - break; - case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650); - break; - case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604); - break; - case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054); - break; - case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654); - break; - case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650); - break; - case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604); - break; - case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054); - break; - case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654); - break; - case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650); - break; - case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604); - break; - case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054); - break; - case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654); - break; - case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650); - break; - case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604); - break; - case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054); - break; - case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654); - break; - case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650); - break; - case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604); - break; - case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054); - break; - case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654); - break; - case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650); - break; - case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604); - break; - case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054); - break; - case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654); - break; - } - #endif - - #if defined IS_AMD || defined IS_GENERIC switch (p0) { case 0: tmp0 = (buf0[0] >> 0) & 0xFF; @@ -3676,7 +2199,6 @@ inline u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u3 case 31: buf1[3] = (buf1[3] & 0x00ffffff) | tmp1 << 24; break; } - #endif return in_len; } @@ -3907,116 +2429,6 @@ inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 bu buf1[2] |= (generate_cmask (buf1[2])); buf1[3] |= (generate_cmask (buf1[3])); - #ifdef IS_NV - buf0[0] &= ~(0x00000020 & generate_cmask (buf0[0])); - - for (u32 i = 0; i < in_len; i++) - { - u32 tmp0; - u32 tmp1; - - switch (i) - { - case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break; - case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break; - case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break; - case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break; - case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break; - case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break; - case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break; - case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break; - case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break; - case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break; - case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break; - case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break; - case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break; - case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break; - case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break; - case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break; - case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break; - case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break; - case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break; - case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break; - case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break; - case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break; - case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break; - case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break; - case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break; - case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break; - case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break; - case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543); - tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break; - case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540); - tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break; - case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541); - tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break; - case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542); - tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break; - } - - if (i < 3) - { - if (tmp0 == ' ') buf0[0] &= tmp1 ; - } - else if (i < 7) - { - if (tmp0 == ' ') buf0[1] &= tmp1 ; - } - else if (i < 11) - { - if (tmp0 == ' ') buf0[2] &= tmp1 ; - } - else if (i < 15) - { - if (tmp0 == ' ') buf0[3] &= tmp1 ; - } - else if (i < 19) - { - if (tmp0 == ' ') buf1[0] &= tmp1 ; - } - else if (i < 23) - { - if (tmp0 == ' ') buf1[1] &= tmp1 ; - } - else if (i < 27) - { - if (tmp0 == ' ') buf1[2] &= tmp1 ; - } - else if (i < 31) - { - if (tmp0 == ' ') buf1[3] &= tmp1 ; - } - } - #endif - - #if defined IS_AMD || defined IS_GENERIC u32 tib40[4]; u32 tib41[4]; @@ -4045,7 +2457,6 @@ inline u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 bu buf1[1] &= ~(generate_cmask (buf1[1]) & tib41[1]); buf1[2] &= ~(generate_cmask (buf1[2]) & tib41[2]); buf1[3] &= ~(generate_cmask (buf1[3]) & tib41[3]); - #endif return in_len; } @@ -4365,11 +2776,3 @@ inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const #endif } - -#ifdef DO_NOT_USE_BYTE_PERM -#ifdef RESTORE_NV -#undef RESTORE_NV -#undef IS_GENERIC -#define IS_NV -#endif -#endif diff --git a/docs/changes.txt b/docs/changes.txt index 2426d150f..a1551c736 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -19,6 +19,7 @@ - Workaround for OpenCL runtimes which do accept -I parameter in the OpenCL kernel build options, but do not allow quotes - Output cracked hashes on Windows using \r\n and not \n - Replace RegGetValue() with RegQueryValueEx() to enable Windows XP 32 bit compatibility +- Slightly increased NVidias rule-processing performance by using generic instructions instead of byte_perm() ## ## Bugs