From 492f9f2290e716583082ca68dee79ddc96f92a6f Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 31 Oct 2016 14:22:00 +0100 Subject: [PATCH] For some easy actions, maxwell GPU prefer not to use switch() --- OpenCL/inc_common.cl | 1852 +++++++----------------------------------- 1 file changed, 282 insertions(+), 1570 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 75c4d8bf5..ac11e9f0d 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -6069,1662 +6069,374 @@ inline void overwrite_at_be_4x4 (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], inline void append_0x01_1x4_S (u32 w0[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + u32 w[4]; - case 1: - w0[0] = w0[0] | 0x0100; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; - case 2: - w0[0] = w0[0] | 0x010000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x01000000; - break; + w_ptr[offset] = 0x01; - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; } inline void append_0x01_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + u32 w[8]; - case 1: - w0[0] = w0[0] | 0x0100; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; + w[4] = w1[0]; + w[5] = w1[1]; + w[6] = w1[2]; + w[7] = w1[3]; - case 2: - w0[0] = w0[0] | 0x010000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x01000000; - break; + w_ptr[offset] = 0x01; - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; + w1[0] = w[4]; + w1[1] = w[5]; + w1[2] = w[6]; + w1[3] = w[7]; } inline void append_0x01_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + u32 w[12]; - case 1: - w0[0] = w0[0] | 0x0100; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; - case 2: - w0[0] = w0[0] | 0x010000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x01000000; - break; + w_ptr[offset] = 0x01; - case 4: - w0[1] = 0x01; - break; - - case 5: - w0[1] = w0[1] | 0x0100; - break; - - case 6: - w0[1] = w0[1] | 0x010000; - break; - - case 7: - w0[1] = w0[1] | 0x01000000; - break; - - case 8: - w0[2] = 0x01; - break; - - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - } + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; } inline void append_0x01_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x01; - break; + u32 w[16]; - case 1: - w0[0] = w0[0] | 0x0100; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; + w[12] = w3[0]; + w[13] = w3[1]; + w[14] = w3[2]; + w[15] = w3[3]; - case 2: - w0[0] = w0[0] | 0x010000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x01000000; - break; + w_ptr[offset] = 0x01; - case 4: - w0[1] = 0x01; - break; + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; +} - case 5: - w0[1] = w0[1] | 0x0100; - break; +inline void append_0x02_1x4_S (u32 w0[4], const u32 offset) +{ + u32 w[4]; - case 6: - w0[1] = w0[1] | 0x010000; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; - case 7: - w0[1] = w0[1] | 0x01000000; - break; + u8 *w_ptr = (u8 *) w; - case 8: - w0[2] = 0x01; - break; + w_ptr[offset] = 0x02; - case 9: - w0[2] = w0[2] | 0x0100; - break; - - case 10: - w0[2] = w0[2] | 0x010000; - break; - - case 11: - w0[2] = w0[2] | 0x01000000; - break; - - case 12: - w0[3] = 0x01; - break; - - case 13: - w0[3] = w0[3] | 0x0100; - break; - - case 14: - w0[3] = w0[3] | 0x010000; - break; - - case 15: - w0[3] = w0[3] | 0x01000000; - break; - - case 16: - w1[0] = 0x01; - break; - - case 17: - w1[0] = w1[0] | 0x0100; - break; - - case 18: - w1[0] = w1[0] | 0x010000; - break; - - case 19: - w1[0] = w1[0] | 0x01000000; - break; - - case 20: - w1[1] = 0x01; - break; - - case 21: - w1[1] = w1[1] | 0x0100; - break; - - case 22: - w1[1] = w1[1] | 0x010000; - break; - - case 23: - w1[1] = w1[1] | 0x01000000; - break; - - case 24: - w1[2] = 0x01; - break; - - case 25: - w1[2] = w1[2] | 0x0100; - break; - - case 26: - w1[2] = w1[2] | 0x010000; - break; - - case 27: - w1[2] = w1[2] | 0x01000000; - break; - - case 28: - w1[3] = 0x01; - break; - - case 29: - w1[3] = w1[3] | 0x0100; - break; - - case 30: - w1[3] = w1[3] | 0x010000; - break; - - case 31: - w1[3] = w1[3] | 0x01000000; - break; - - case 32: - w2[0] = 0x01; - break; - - case 33: - w2[0] = w2[0] | 0x0100; - break; - - case 34: - w2[0] = w2[0] | 0x010000; - break; - - case 35: - w2[0] = w2[0] | 0x01000000; - break; - - case 36: - w2[1] = 0x01; - break; - - case 37: - w2[1] = w2[1] | 0x0100; - break; - - case 38: - w2[1] = w2[1] | 0x010000; - break; - - case 39: - w2[1] = w2[1] | 0x01000000; - break; - - case 40: - w2[2] = 0x01; - break; - - case 41: - w2[2] = w2[2] | 0x0100; - break; - - case 42: - w2[2] = w2[2] | 0x010000; - break; - - case 43: - w2[2] = w2[2] | 0x01000000; - break; - - case 44: - w2[3] = 0x01; - break; - - case 45: - w2[3] = w2[3] | 0x0100; - break; - - case 46: - w2[3] = w2[3] | 0x010000; - break; - - case 47: - w2[3] = w2[3] | 0x01000000; - break; - - case 48: - w3[0] = 0x01; - break; - - case 49: - w3[0] = w3[0] | 0x0100; - break; - - case 50: - w3[0] = w3[0] | 0x010000; - break; - - case 51: - w3[0] = w3[0] | 0x01000000; - break; - - case 52: - w3[1] = 0x01; - break; - - case 53: - w3[1] = w3[1] | 0x0100; - break; - - case 54: - w3[1] = w3[1] | 0x010000; - break; - - case 55: - w3[1] = w3[1] | 0x01000000; - break; - - case 56: - w3[2] = 0x01; - break; - - case 57: - w3[2] = w3[2] | 0x0100; - break; - - case 58: - w3[2] = w3[2] | 0x010000; - break; - - case 59: - w3[2] = w3[2] | 0x01000000; - break; - - case 60: - w3[3] = 0x01; - break; - - case 61: - w3[3] = w3[3] | 0x0100; - break; - - case 62: - w3[3] = w3[3] | 0x010000; - break; - - case 63: - w3[3] = w3[3] | 0x01000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; } inline void append_0x02_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + u32 w[8]; - case 1: - w0[0] = w0[0] | 0x0200; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; + w[4] = w1[0]; + w[5] = w1[1]; + w[6] = w1[2]; + w[7] = w1[3]; - case 2: - w0[0] = w0[0] | 0x020000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x02000000; - break; + w_ptr[offset] = 0x02; - case 4: - w0[1] = 0x02; - break; - - case 5: - w0[1] = w0[1] | 0x0200; - break; - - case 6: - w0[1] = w0[1] | 0x020000; - break; - - case 7: - w0[1] = w0[1] | 0x02000000; - break; - - case 8: - w0[2] = 0x02; - break; - - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; + w1[0] = w[4]; + w1[1] = w[5]; + w1[2] = w[6]; + w1[3] = w[7]; } inline void append_0x02_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x02; - break; + u32 w[12]; - case 1: - w0[0] = w0[0] | 0x0200; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; - case 2: - w0[0] = w0[0] | 0x020000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x02000000; - break; + w_ptr[offset] = 0x02; - case 4: - w0[1] = 0x02; - break; + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; +} - case 5: - w0[1] = w0[1] | 0x0200; - break; +inline void append_0x02_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) +{ + u32 w[16]; - case 6: - w0[1] = w0[1] | 0x020000; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; + w[12] = w3[0]; + w[13] = w3[1]; + w[14] = w3[2]; + w[15] = w3[3]; - case 7: - w0[1] = w0[1] | 0x02000000; - break; + u8 *w_ptr = (u8 *) w; - case 8: - w0[2] = 0x02; - break; + w_ptr[offset] = 0x02; - case 9: - w0[2] = w0[2] | 0x0200; - break; - - case 10: - w0[2] = w0[2] | 0x020000; - break; - - case 11: - w0[2] = w0[2] | 0x02000000; - break; - - case 12: - w0[3] = 0x02; - break; - - case 13: - w0[3] = w0[3] | 0x0200; - break; - - case 14: - w0[3] = w0[3] | 0x020000; - break; - - case 15: - w0[3] = w0[3] | 0x02000000; - break; - - case 16: - w1[0] = 0x02; - break; - - case 17: - w1[0] = w1[0] | 0x0200; - break; - - case 18: - w1[0] = w1[0] | 0x020000; - break; - - case 19: - w1[0] = w1[0] | 0x02000000; - break; - - case 20: - w1[1] = 0x02; - break; - - case 21: - w1[1] = w1[1] | 0x0200; - break; - - case 22: - w1[1] = w1[1] | 0x020000; - break; - - case 23: - w1[1] = w1[1] | 0x02000000; - break; - - case 24: - w1[2] = 0x02; - break; - - case 25: - w1[2] = w1[2] | 0x0200; - break; - - case 26: - w1[2] = w1[2] | 0x020000; - break; - - case 27: - w1[2] = w1[2] | 0x02000000; - break; - - case 28: - w1[3] = 0x02; - break; - - case 29: - w1[3] = w1[3] | 0x0200; - break; - - case 30: - w1[3] = w1[3] | 0x020000; - break; - - case 31: - w1[3] = w1[3] | 0x02000000; - break; - - case 32: - w2[0] = 0x02; - break; - - case 33: - w2[0] = w2[0] | 0x0200; - break; - - case 34: - w2[0] = w2[0] | 0x020000; - break; - - case 35: - w2[0] = w2[0] | 0x02000000; - break; - - case 36: - w2[1] = 0x02; - break; - - case 37: - w2[1] = w2[1] | 0x0200; - break; - - case 38: - w2[1] = w2[1] | 0x020000; - break; - - case 39: - w2[1] = w2[1] | 0x02000000; - break; - - case 40: - w2[2] = 0x02; - break; - - case 41: - w2[2] = w2[2] | 0x0200; - break; - - case 42: - w2[2] = w2[2] | 0x020000; - break; - - case 43: - w2[2] = w2[2] | 0x02000000; - break; - - case 44: - w2[3] = 0x02; - break; - - case 45: - w2[3] = w2[3] | 0x0200; - break; - - case 46: - w2[3] = w2[3] | 0x020000; - break; - - case 47: - w2[3] = w2[3] | 0x02000000; - break; - } + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; } inline void append_0x80_1x4_S (u32 w0[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + u32 w[4]; - case 1: - w0[0] = w0[0] | 0x8000; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; - case 2: - w0[0] = w0[0] | 0x800000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x80000000; - break; + w_ptr[offset] = 0x80; - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; } inline void append_0x80_2x4_S (u32 w0[4], u32 w1[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + u32 w[8]; - case 1: - w0[0] = w0[0] | 0x8000; - break; + w[0] = w0[0]; + w[1] = w0[1]; + w[2] = w0[2]; + w[3] = w0[3]; + w[4] = w1[0]; + w[5] = w1[1]; + w[6] = w1[2]; + w[7] = w1[3]; - case 2: - w0[0] = w0[0] | 0x800000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x80000000; - break; + w_ptr[offset] = 0x80; - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - } + w0[0] = w[0]; + w0[1] = w[1]; + w0[2] = w[2]; + w0[3] = w[3]; + w1[0] = w[4]; + w1[1] = w[5]; + w1[2] = w[6]; + w1[3] = w[7]; } inline void append_0x80_3x4_S (u32 w0[4], u32 w1[4], u32 w2[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + u32 w[12]; - case 1: - w0[0] = w0[0] | 0x8000; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; - case 2: - w0[0] = w0[0] | 0x800000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x80000000; - break; + w_ptr[offset] = 0x80; - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - } + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; } inline void append_0x80_4x4_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset) { - switch (offset) - { - case 0: - w0[0] = 0x80; - break; + u32 w[16]; - case 1: - w0[0] = w0[0] | 0x8000; - break; + w[ 0] = w0[0]; + w[ 1] = w0[1]; + w[ 2] = w0[2]; + w[ 3] = w0[3]; + w[ 4] = w1[0]; + w[ 5] = w1[1]; + w[ 6] = w1[2]; + w[ 7] = w1[3]; + w[ 8] = w2[0]; + w[ 9] = w2[1]; + w[10] = w2[2]; + w[11] = w2[3]; + w[12] = w3[0]; + w[13] = w3[1]; + w[14] = w3[2]; + w[15] = w3[3]; - case 2: - w0[0] = w0[0] | 0x800000; - break; + u8 *w_ptr = (u8 *) w; - case 3: - w0[0] = w0[0] | 0x80000000; - break; + w_ptr[offset] = 0x80; - case 4: - w0[1] = 0x80; - break; - - case 5: - w0[1] = w0[1] | 0x8000; - break; - - case 6: - w0[1] = w0[1] | 0x800000; - break; - - case 7: - w0[1] = w0[1] | 0x80000000; - break; - - case 8: - w0[2] = 0x80; - break; - - case 9: - w0[2] = w0[2] | 0x8000; - break; - - case 10: - w0[2] = w0[2] | 0x800000; - break; - - case 11: - w0[2] = w0[2] | 0x80000000; - break; - - case 12: - w0[3] = 0x80; - break; - - case 13: - w0[3] = w0[3] | 0x8000; - break; - - case 14: - w0[3] = w0[3] | 0x800000; - break; - - case 15: - w0[3] = w0[3] | 0x80000000; - break; - - case 16: - w1[0] = 0x80; - break; - - case 17: - w1[0] = w1[0] | 0x8000; - break; - - case 18: - w1[0] = w1[0] | 0x800000; - break; - - case 19: - w1[0] = w1[0] | 0x80000000; - break; - - case 20: - w1[1] = 0x80; - break; - - case 21: - w1[1] = w1[1] | 0x8000; - break; - - case 22: - w1[1] = w1[1] | 0x800000; - break; - - case 23: - w1[1] = w1[1] | 0x80000000; - break; - - case 24: - w1[2] = 0x80; - break; - - case 25: - w1[2] = w1[2] | 0x8000; - break; - - case 26: - w1[2] = w1[2] | 0x800000; - break; - - case 27: - w1[2] = w1[2] | 0x80000000; - break; - - case 28: - w1[3] = 0x80; - break; - - case 29: - w1[3] = w1[3] | 0x8000; - break; - - case 30: - w1[3] = w1[3] | 0x800000; - break; - - case 31: - w1[3] = w1[3] | 0x80000000; - break; - - case 32: - w2[0] = 0x80; - break; - - case 33: - w2[0] = w2[0] | 0x8000; - break; - - case 34: - w2[0] = w2[0] | 0x800000; - break; - - case 35: - w2[0] = w2[0] | 0x80000000; - break; - - case 36: - w2[1] = 0x80; - break; - - case 37: - w2[1] = w2[1] | 0x8000; - break; - - case 38: - w2[1] = w2[1] | 0x800000; - break; - - case 39: - w2[1] = w2[1] | 0x80000000; - break; - - case 40: - w2[2] = 0x80; - break; - - case 41: - w2[2] = w2[2] | 0x8000; - break; - - case 42: - w2[2] = w2[2] | 0x800000; - break; - - case 43: - w2[2] = w2[2] | 0x80000000; - break; - - case 44: - w2[3] = 0x80; - break; - - case 45: - w2[3] = w2[3] | 0x8000; - break; - - case 46: - w2[3] = w2[3] | 0x800000; - break; - - case 47: - w2[3] = w2[3] | 0x80000000; - break; - - case 48: - w3[0] = 0x80; - break; - - case 49: - w3[0] = w3[0] | 0x8000; - break; - - case 50: - w3[0] = w3[0] | 0x800000; - break; - - case 51: - w3[0] = w3[0] | 0x80000000; - break; - - case 52: - w3[1] = 0x80; - break; - - case 53: - w3[1] = w3[1] | 0x8000; - break; - - case 54: - w3[1] = w3[1] | 0x800000; - break; - - case 55: - w3[1] = w3[1] | 0x80000000; - break; - - case 56: - w3[2] = 0x80; - break; - - case 57: - w3[2] = w3[2] | 0x8000; - break; - - case 58: - w3[2] = w3[2] | 0x800000; - break; - - case 59: - w3[2] = w3[2] | 0x80000000; - break; - - case 60: - w3[3] = 0x80; - break; - - case 61: - w3[3] = w3[3] | 0x8000; - break; - - case 62: - w3[3] = w3[3] | 0x800000; - break; - - case 63: - w3[3] = w3[3] | 0x80000000; - break; - } + w0[0] = w[ 0]; + w0[1] = w[ 1]; + w0[2] = w[ 2]; + w0[3] = w[ 3]; + w1[0] = w[ 4]; + w1[1] = w[ 5]; + w1[2] = w[ 6]; + w1[3] = w[ 7]; + w2[0] = w[ 8]; + w2[1] = w[ 9]; + w2[2] = w[10]; + w2[3] = w[11]; + w3[0] = w[12]; + w3[1] = w[13]; + w3[2] = w[14]; + w3[3] = w[15]; } inline void truncate_block_S (u32 w[4], const u32 len)