From 7f234bad24e650a9e4373f8a1127637841a683ef Mon Sep 17 00:00:00 2001 From: jsteube Date: Tue, 1 Nov 2016 03:21:45 +0100 Subject: [PATCH] Optimize 0x01, 0x02 and 0x80 handling in inc_common.c even more --- OpenCL/inc_common.cl | 2508 +++++------------------------------------- 1 file changed, 264 insertions(+), 2244 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index ac11e9f0d..8358db65a 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -202,666 +202,66 @@ inline void undo_unicode (const u32x in1[4], const u32x in2[4], u32x out[4]) inline void append_0x01_1x4 (u32x w0[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + const u32 tmp = 0x01 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x01_2x4 (u32x w0[4], u32x w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + const u32 tmp = 0x01 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x01_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + const u32 tmp = 0x01 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x01_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + const u32 tmp = 0x01 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0100; - break; - - case 2: - w0[0] = w0[0] | 0x010000; - break; - - case 3: - w0[0] = w0[0] | 0x01000000; - break; - - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void append_0x01_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) @@ -1384,666 +784,66 @@ inline void append_0x01_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 inline void append_0x02_1x4 (u32x w0[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + const u32 tmp = 0x02 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x02_2x4 (u32x w0[4], u32x w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + const u32 tmp = 0x02 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x02_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + const u32 tmp = 0x02 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x02_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + const u32 tmp = 0x02 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x0200; - break; - - case 2: - w0[0] = w0[0] | 0x020000; - break; - - case 3: - w0[0] = w0[0] | 0x02000000; - break; - - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - - case 48: - w3[0] = 0x02; - break; - - case 49: - w3[0] = w3[0] | 0x0200; - break; - - case 50: - w3[0] = w3[0] | 0x020000; - break; - - case 51: - w3[0] = w3[0] | 0x02000000; - break; - - case 52: - w3[1] = 0x02; - break; - - case 53: - w3[1] = w3[1] | 0x0200; - break; - - case 54: - w3[1] = w3[1] | 0x020000; - break; - - case 55: - w3[1] = w3[1] | 0x02000000; - break; - - case 56: - w3[2] = 0x02; - break; - - case 57: - w3[2] = w3[2] | 0x0200; - break; - - case 58: - w3[2] = w3[2] | 0x020000; - break; - - case 59: - w3[2] = w3[2] | 0x02000000; - break; - - case 60: - w3[3] = 0x02; - break; - - case 61: - w3[3] = w3[3] | 0x0200; - break; - - case 62: - w3[3] = w3[3] | 0x020000; - break; - - case 63: - w3[3] = w3[3] | 0x02000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void append_0x02_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) @@ -2566,666 +1366,66 @@ inline void append_0x02_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32 inline void append_0x80_1x4 (u32x w0[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + const u32 tmp = 0x80 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x80_2x4 (u32x w0[4], u32x w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + const u32 tmp = 0x80 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x80_3x4 (u32x w0[4], u32x w1[4], u32x w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + const u32 tmp = 0x80 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x80_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + const u32 tmp = 0x80 << ((offset & 3) * 8); - case 1: - w0[0] = w0[0] | 0x8000; - break; - - case 2: - w0[0] = w0[0] | 0x800000; - break; - - case 3: - w0[0] = w0[0] | 0x80000000; - break; - - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void append_0x80_8x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset) @@ -6069,374 +4269,194 @@ inline void overwrite_at_be_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], inline void append_0x01_1x4_S (u32 w0[4], const u32 offset) { - u32 w[4]; + const u32 tmp = 0x01 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x01; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - u32 w[8]; + const u32 tmp = 0x01 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - w[4] = w1[0]; - w[5] = w1[1]; - w[6] = w1[2]; - w[7] = w1[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x01; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; - w1[0] = w[4]; - w1[1] = w[5]; - w1[2] = w[6]; - w1[3] = w[7]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x01_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - u32 w[12]; + const u32 tmp = 0x01 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x01; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x01_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { - u32 w[16]; + const u32 tmp = 0x01 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x01; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void append_0x02_1x4_S (u32 w0[4], const u32 offset) { - u32 w[4]; + const u32 tmp = 0x02 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x02; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x02_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - u32 w[8]; + const u32 tmp = 0x02 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - w[4] = w1[0]; - w[5] = w1[1]; - w[6] = w1[2]; - w[7] = w1[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x02; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; - w1[0] = w[4]; - w1[1] = w[5]; - w1[2] = w[6]; - w1[3] = w[7]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x02_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - u32 w[12]; + const u32 tmp = 0x02 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x02; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x02_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { - u32 w[16]; + const u32 tmp = 0x02 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x02; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void append_0x80_1x4_S (u32 w0[4], const u32 offset) { - u32 w[4]; + const u32 tmp = 0x80 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x80; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= (offset >= 12) ? tmp : 0; } inline void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - u32 w[8]; + const u32 tmp = 0x80 << ((offset & 3) * 8); - w[0] = w0[0]; - w[1] = w0[1]; - w[2] = w0[2]; - w[3] = w0[3]; - w[4] = w1[0]; - w[5] = w1[1]; - w[6] = w1[2]; - w[7] = w1[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x80; - - w0[0] = w[0]; - w0[1] = w[1]; - w0[2] = w[2]; - w0[3] = w[3]; - w1[0] = w[4]; - w1[1] = w[5]; - w1[2] = w[6]; - w1[3] = w[7]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= (offset >= 28) ? tmp : 0; } inline void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - u32 w[12]; + const u32 tmp = 0x80 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x80; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= (offset >= 44) ? tmp : 0; } inline void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { - u32 w[16]; + const u32 tmp = 0x80 << ((offset & 3) * 8); - w[ 0] = w0[0]; - w[ 1] = w0[1]; - w[ 2] = w0[2]; - w[ 3] = w0[3]; - w[ 4] = w1[0]; - w[ 5] = w1[1]; - w[ 6] = w1[2]; - w[ 7] = w1[3]; - w[ 8] = w2[0]; - w[ 9] = w2[1]; - w[10] = w2[2]; - w[11] = w2[3]; - w[12] = w3[0]; - w[13] = w3[1]; - w[14] = w3[2]; - w[15] = w3[3]; - - u8 *w_ptr = (u8 *) w; - - w_ptr[offset] = 0x80; - - w0[0] = w[ 0]; - w0[1] = w[ 1]; - w0[2] = w[ 2]; - w0[3] = w[ 3]; - w1[0] = w[ 4]; - w1[1] = w[ 5]; - w1[2] = w[ 6]; - w1[3] = w[ 7]; - w2[0] = w[ 8]; - w2[1] = w[ 9]; - w2[2] = w[10]; - w2[3] = w[11]; - w3[0] = w[12]; - w3[1] = w[13]; - w3[2] = w[14]; - w3[3] = w[15]; + w0[0] |= (offset < 4) ? tmp : 0; + w0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0; + w0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0; + w0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0; + w1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0; + w1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0; + w1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0; + w1[3] |= ((offset >= 28) && (offset < 32)) ? tmp : 0; + w2[0] |= ((offset >= 32) && (offset < 36)) ? tmp : 0; + w2[1] |= ((offset >= 36) && (offset < 40)) ? tmp : 0; + w2[2] |= ((offset >= 40) && (offset < 44)) ? tmp : 0; + w2[3] |= ((offset >= 44) && (offset < 48)) ? tmp : 0; + w3[0] |= ((offset >= 48) && (offset < 52)) ? tmp : 0; + w3[1] |= ((offset >= 52) && (offset < 56)) ? tmp : 0; + w3[2] |= ((offset >= 56) && (offset < 60)) ? tmp : 0; + w3[3] |= (offset >= 60) ? tmp : 0; } inline void truncate_block_S (u32 w[4], const u32 len)