From 7cb510f1cecf5e6438a9d4dd70006cab2f9b3fe1 Mon Sep 17 00:00:00 2001 From: jsteube Date: Sat, 16 Mar 2019 21:11:02 +0100 Subject: [PATCH] More manually unrolled cipher code --- OpenCL/inc_cipher_aes.cl | 433 +++++++++++++++++++++++------------ OpenCL/inc_cipher_twofish.cl | 39 ++-- 2 files changed, 305 insertions(+), 167 deletions(-) diff --git a/OpenCL/inc_cipher_aes.cl b/OpenCL/inc_cipher_aes.cl index 6a706cf70..d3d0019bd 100644 --- a/OpenCL/inc_cipher_aes.cl +++ b/OpenCL/inc_cipher_aes.cl @@ -695,28 +695,90 @@ __constant u32a rcon[10] = DECLSPEC void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); DECLSPEC void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { - ks[0] = ukey[0]; - ks[1] = ukey[1]; - ks[2] = ukey[2]; - ks[3] = ukey[3]; - - for (int i = 0, j = 0; i < 10; i += 1, j += 4) - { - u32 temp = ks[j + 3]; - - temp = (s_te2[(temp >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp >> 24) & 0xff] & 0x000000ff); - - ks[j + 4] = ks[j + 0] - ^ temp - ^ rcon[i]; - - ks[j + 5] = ks[j + 1] ^ ks[j + 4]; - ks[j + 6] = ks[j + 2] ^ ks[j + 5]; - ks[j + 7] = ks[j + 3] ^ ks[j + 6]; - } + ks[ 0] = ukey[0]; + ks[ 1] = ukey[1]; + ks[ 2] = ukey[2]; + ks[ 3] = ukey[3]; + ks[ 4] = ks[ 0] ^ 0x01000000 + ^ (s_te2[(ks[ 3] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[ 3] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[ 3] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[ 3] >> 24) & 0xff] & 0x000000ff); + ks[ 5] = ks[ 1] ^ ks[ 4]; + ks[ 6] = ks[ 2] ^ ks[ 5]; + ks[ 7] = ks[ 3] ^ ks[ 6]; + ks[ 8] = ks[ 4] ^ 0x02000000 + ^ (s_te2[(ks[ 7] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[ 7] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[ 7] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[ 7] >> 24) & 0xff] & 0x000000ff); + ks[ 9] = ks[ 5] ^ ks[ 8]; + ks[10] = ks[ 6] ^ ks[ 9]; + ks[11] = ks[ 7] ^ ks[10]; + ks[12] = ks[ 8] ^ 0x04000000 + ^ (s_te2[(ks[11] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[11] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[11] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[11] >> 24) & 0xff] & 0x000000ff); + ks[13] = ks[ 9] ^ ks[12]; + ks[14] = ks[10] ^ ks[13]; + ks[15] = ks[11] ^ ks[14]; + ks[16] = ks[12] ^ 0x08000000 + ^ (s_te2[(ks[15] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[15] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[15] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[15] >> 24) & 0xff] & 0x000000ff); + ks[17] = ks[13] ^ ks[16]; + ks[18] = ks[14] ^ ks[17]; + ks[19] = ks[15] ^ ks[18]; + ks[20] = ks[16] ^ 0x10000000 + ^ (s_te2[(ks[19] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[19] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[19] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[19] >> 24) & 0xff] & 0x000000ff); + ks[21] = ks[17] ^ ks[20]; + ks[22] = ks[18] ^ ks[21]; + ks[23] = ks[19] ^ ks[22]; + ks[24] = ks[20] ^ 0x20000000 + ^ (s_te2[(ks[23] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[23] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[23] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[23] >> 24) & 0xff] & 0x000000ff); + ks[25] = ks[21] ^ ks[24]; + ks[26] = ks[22] ^ ks[25]; + ks[27] = ks[23] ^ ks[26]; + ks[28] = ks[24] ^ 0x40000000 + ^ (s_te2[(ks[27] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[27] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[27] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[27] >> 24) & 0xff] & 0x000000ff); + ks[29] = ks[25] ^ ks[28]; + ks[30] = ks[26] ^ ks[29]; + ks[31] = ks[27] ^ ks[30]; + ks[32] = ks[28] ^ 0x80000000 + ^ (s_te2[(ks[31] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[31] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[31] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[31] >> 24) & 0xff] & 0x000000ff); + ks[33] = ks[29] ^ ks[32]; + ks[34] = ks[30] ^ ks[33]; + ks[35] = ks[31] ^ ks[34]; + ks[36] = ks[32] ^ 0x1b000000 + ^ (s_te2[(ks[35] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[35] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[35] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[35] >> 24) & 0xff] & 0x000000ff); + ks[37] = ks[33] ^ ks[36]; + ks[38] = ks[34] ^ ks[37]; + ks[39] = ks[35] ^ ks[38]; + ks[40] = ks[36] ^ 0x36000000 + ^ (s_te2[(ks[39] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[39] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[39] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[39] >> 24) & 0xff] & 0x000000ff); + ks[41] = ks[37] ^ ks[40]; + ks[42] = ks[38] ^ ks[41]; + ks[43] = ks[39] ^ ks[42]; } DECLSPEC void aes128_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4); @@ -745,49 +807,42 @@ DECLSPEC void aes128_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te temp = ks[18]; ks[18] = ks[26]; ks[26] = temp; temp = ks[19]; ks[19] = ks[27]; ks[27] = temp; - for (int i = 1, j = 4; i < 10; i += 1, j += 4) - { - const u32 x0s0 = (ks[j + 0] >> 0) & 0xff; - const u32 x0s1 = (ks[j + 0] >> 8) & 0xff; - const u32 x0s2 = (ks[j + 0] >> 16) & 0xff; - const u32 x0s3 = (ks[j + 0] >> 24) & 0xff; - const u32 x1s0 = (ks[j + 1] >> 0) & 0xff; - const u32 x1s1 = (ks[j + 1] >> 8) & 0xff; - const u32 x1s2 = (ks[j + 1] >> 16) & 0xff; - const u32 x1s3 = (ks[j + 1] >> 24) & 0xff; - const u32 x2s0 = (ks[j + 2] >> 0) & 0xff; - const u32 x2s1 = (ks[j + 2] >> 8) & 0xff; - const u32 x2s2 = (ks[j + 2] >> 16) & 0xff; - const u32 x2s3 = (ks[j + 2] >> 24) & 0xff; - const u32 x3s0 = (ks[j + 3] >> 0) & 0xff; - const u32 x3s1 = (ks[j + 3] >> 8) & 0xff; - const u32 x3s2 = (ks[j + 3] >> 16) & 0xff; - const u32 x3s3 = (ks[j + 3] >> 24) & 0xff; - - ks[j + 0] = - s_td0[s_te1[x0s3] & 0xff] ^ - s_td1[s_te1[x0s2] & 0xff] ^ - s_td2[s_te1[x0s1] & 0xff] ^ - s_td3[s_te1[x0s0] & 0xff]; - - ks[j + 1] = - s_td0[s_te1[x1s3] & 0xff] ^ - s_td1[s_te1[x1s2] & 0xff] ^ - s_td2[s_te1[x1s1] & 0xff] ^ - s_td3[s_te1[x1s0] & 0xff]; - - ks[j + 2] = - s_td0[s_te1[x2s3] & 0xff] ^ - s_td1[s_te1[x2s2] & 0xff] ^ - s_td2[s_te1[x2s1] & 0xff] ^ - s_td3[s_te1[x2s0] & 0xff]; - - ks[j + 3] = - s_td0[s_te1[x3s3] & 0xff] ^ - s_td1[s_te1[x3s2] & 0xff] ^ - s_td2[s_te1[x3s1] & 0xff] ^ - s_td3[s_te1[x3s0] & 0xff]; - } + ks[ 4] = td0[te1[(ks[ 4] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 4] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 4] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 4] >> 0) & 0xff] & 0xff]; + ks[ 5] = td0[te1[(ks[ 5] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 5] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 5] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 5] >> 0) & 0xff] & 0xff]; + ks[ 6] = td0[te1[(ks[ 6] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 6] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 6] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 6] >> 0) & 0xff] & 0xff]; + ks[ 7] = td0[te1[(ks[ 7] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 7] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 7] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 7] >> 0) & 0xff] & 0xff]; + ks[ 8] = td0[te1[(ks[ 8] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 8] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 8] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 8] >> 0) & 0xff] & 0xff]; + ks[ 9] = td0[te1[(ks[ 9] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 9] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 9] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 9] >> 0) & 0xff] & 0xff]; + ks[10] = td0[te1[(ks[10] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[10] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[10] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[10] >> 0) & 0xff] & 0xff]; + ks[11] = td0[te1[(ks[11] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[11] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[11] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[11] >> 0) & 0xff] & 0xff]; + ks[12] = td0[te1[(ks[12] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[12] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[12] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[12] >> 0) & 0xff] & 0xff]; + ks[13] = td0[te1[(ks[13] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[13] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[13] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[13] >> 0) & 0xff] & 0xff]; + ks[14] = td0[te1[(ks[14] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[14] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[14] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[14] >> 0) & 0xff] & 0xff]; + ks[15] = td0[te1[(ks[15] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[15] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[15] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[15] >> 0) & 0xff] & 0xff]; + ks[16] = td0[te1[(ks[16] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[16] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[16] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[16] >> 0) & 0xff] & 0xff]; + ks[17] = td0[te1[(ks[17] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[17] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[17] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[17] >> 0) & 0xff] & 0xff]; + ks[18] = td0[te1[(ks[18] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[18] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[18] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[18] >> 0) & 0xff] & 0xff]; + ks[19] = td0[te1[(ks[19] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[19] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[19] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[19] >> 0) & 0xff] & 0xff]; + ks[20] = td0[te1[(ks[20] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[20] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[20] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[20] >> 0) & 0xff] & 0xff]; + ks[21] = td0[te1[(ks[21] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[21] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[21] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[21] >> 0) & 0xff] & 0xff]; + ks[22] = td0[te1[(ks[22] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[22] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[22] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[22] >> 0) & 0xff] & 0xff]; + ks[23] = td0[te1[(ks[23] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[23] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[23] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[23] >> 0) & 0xff] & 0xff]; + ks[24] = td0[te1[(ks[24] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[24] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[24] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[24] >> 0) & 0xff] & 0xff]; + ks[25] = td0[te1[(ks[25] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[25] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[25] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[25] >> 0) & 0xff] & 0xff]; + ks[26] = td0[te1[(ks[26] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[26] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[26] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[26] >> 0) & 0xff] & 0xff]; + ks[27] = td0[te1[(ks[27] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[27] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[27] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[27] >> 0) & 0xff] & 0xff]; + ks[28] = td0[te1[(ks[28] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[28] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[28] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[28] >> 0) & 0xff] & 0xff]; + ks[29] = td0[te1[(ks[29] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[29] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[29] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[29] >> 0) & 0xff] & 0xff]; + ks[30] = td0[te1[(ks[30] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[30] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[30] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[30] >> 0) & 0xff] & 0xff]; + ks[31] = td0[te1[(ks[31] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[31] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[31] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[31] >> 0) & 0xff] & 0xff]; + ks[32] = td0[te1[(ks[32] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[32] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[32] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[32] >> 0) & 0xff] & 0xff]; + ks[33] = td0[te1[(ks[33] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[33] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[33] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[33] >> 0) & 0xff] & 0xff]; + ks[34] = td0[te1[(ks[34] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[34] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[34] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[34] >> 0) & 0xff] & 0xff]; + ks[35] = td0[te1[(ks[35] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[35] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[35] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[35] >> 0) & 0xff] & 0xff]; + ks[36] = td0[te1[(ks[36] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[36] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[36] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[36] >> 0) & 0xff] & 0xff]; + ks[37] = td0[te1[(ks[37] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[37] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[37] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[37] >> 0) & 0xff] & 0xff]; + ks[38] = td0[te1[(ks[38] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[38] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[38] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[38] >> 0) & 0xff] & 0xff]; + ks[39] = td0[te1[(ks[39] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[39] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[39] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[39] >> 0) & 0xff] & 0xff]; } DECLSPEC void aes128_set_encrypt_key (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); @@ -997,47 +1052,118 @@ DECLSPEC void aes128_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u DECLSPEC void aes256_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); DECLSPEC void aes256_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { - ks[0] = ukey[0]; - ks[1] = ukey[1]; - ks[2] = ukey[2]; - ks[3] = ukey[3]; - ks[4] = ukey[4]; - ks[5] = ukey[5]; - ks[6] = ukey[6]; - ks[7] = ukey[7]; - - int i; - int j; - - for (int i = 0, j = 0; i < 7; i += 1, j += 8) - { - const u32 temp1 = ks[j + 7]; - - ks[j + 8] = ks[j + 0] - ^ (s_te2[(temp1 >> 16) & 0xff] & 0xff000000) - ^ (s_te3[(temp1 >> 8) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp1 >> 0) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp1 >> 24) & 0xff] & 0x000000ff) - ^ rcon[i]; - - ks[j + 9] = ks[j + 1] ^ ks[j + 8]; - ks[j + 10] = ks[j + 2] ^ ks[j + 9]; - ks[j + 11] = ks[j + 3] ^ ks[j + 10]; - - if (i == 6) break; - - const u32 temp2 = ks[j + 11]; - - ks[j + 12] = ks[j + 4] - ^ (s_te2[(temp2 >> 24) & 0xff] & 0xff000000) - ^ (s_te3[(temp2 >> 16) & 0xff] & 0x00ff0000) - ^ (s_te0[(temp2 >> 8) & 0xff] & 0x0000ff00) - ^ (s_te1[(temp2 >> 0) & 0xff] & 0x000000ff); - - ks[j + 13] = ks[j + 5] ^ ks[j + 12]; - ks[j + 14] = ks[j + 6] ^ ks[j + 13]; - ks[j + 15] = ks[j + 7] ^ ks[j + 14]; - } + ks[ 0] = ukey[0]; + ks[ 1] = ukey[1]; + ks[ 2] = ukey[2]; + ks[ 3] = ukey[3]; + ks[ 4] = ukey[4]; + ks[ 5] = ukey[5]; + ks[ 6] = ukey[6]; + ks[ 7] = ukey[7]; + ks[ 8] = ks[ 0] ^ 0x01000000 + ^ (s_te2[(ks[ 7] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[ 7] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[ 7] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[ 7] >> 24) & 0xff] & 0x000000ff); + ks[ 9] = ks[ 1] ^ ks[ 8]; + ks[10] = ks[ 2] ^ ks[ 9]; + ks[11] = ks[ 3] ^ ks[10]; + ks[12] = ks[ 4] ^ 0 + ^ (s_te2[(ks[11] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[11] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[11] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[11] >> 0) & 0xff] & 0x000000ff); + ks[13] = ks[ 5] ^ ks[12]; + ks[14] = ks[ 6] ^ ks[13]; + ks[15] = ks[ 7] ^ ks[14]; + ks[16] = ks[ 8] ^ 0x02000000 + ^ (s_te2[(ks[15] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[15] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[15] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[15] >> 24) & 0xff] & 0x000000ff); + ks[17] = ks[ 9] ^ ks[16]; + ks[18] = ks[10] ^ ks[17]; + ks[19] = ks[11] ^ ks[18]; + ks[20] = ks[12] ^ 0 + ^ (s_te2[(ks[19] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[19] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[19] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[19] >> 0) & 0xff] & 0x000000ff); + ks[21] = ks[13] ^ ks[20]; + ks[22] = ks[14] ^ ks[21]; + ks[23] = ks[15] ^ ks[22]; + ks[24] = ks[16] ^ 0x04000000 + ^ (s_te2[(ks[23] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[23] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[23] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[23] >> 24) & 0xff] & 0x000000ff); + ks[25] = ks[17] ^ ks[24]; + ks[26] = ks[18] ^ ks[25]; + ks[27] = ks[19] ^ ks[26]; + ks[28] = ks[20] ^ 0 + ^ (s_te2[(ks[27] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[27] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[27] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[27] >> 0) & 0xff] & 0x000000ff); + ks[29] = ks[21] ^ ks[28]; + ks[30] = ks[22] ^ ks[29]; + ks[31] = ks[23] ^ ks[30]; + ks[32] = ks[24] ^ 0x08000000 + ^ (s_te2[(ks[31] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[31] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[31] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[31] >> 24) & 0xff] & 0x000000ff); + ks[33] = ks[25] ^ ks[32]; + ks[34] = ks[26] ^ ks[33]; + ks[35] = ks[27] ^ ks[34]; + ks[36] = ks[28] ^ 0 + ^ (s_te2[(ks[35] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[35] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[35] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[35] >> 0) & 0xff] & 0x000000ff); + ks[37] = ks[29] ^ ks[36]; + ks[38] = ks[30] ^ ks[37]; + ks[39] = ks[31] ^ ks[38]; + ks[40] = ks[32] ^ 0x10000000 + ^ (s_te2[(ks[39] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[39] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[39] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[39] >> 24) & 0xff] & 0x000000ff); + ks[41] = ks[33] ^ ks[40]; + ks[42] = ks[34] ^ ks[41]; + ks[43] = ks[35] ^ ks[42]; + ks[44] = ks[36] ^ 0 + ^ (s_te2[(ks[43] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[43] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[43] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[43] >> 0) & 0xff] & 0x000000ff); + ks[45] = ks[37] ^ ks[44]; + ks[46] = ks[38] ^ ks[45]; + ks[47] = ks[39] ^ ks[46]; + ks[48] = ks[40] ^ 0x20000000 + ^ (s_te2[(ks[47] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[47] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[47] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[47] >> 24) & 0xff] & 0x000000ff); + ks[49] = ks[41] ^ ks[48]; + ks[50] = ks[42] ^ ks[49]; + ks[51] = ks[43] ^ ks[50]; + ks[52] = ks[44] ^ 0 + ^ (s_te2[(ks[51] >> 24) & 0xff] & 0xff000000) + ^ (s_te3[(ks[51] >> 16) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[51] >> 8) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[51] >> 0) & 0xff] & 0x000000ff); + ks[53] = ks[45] ^ ks[52]; + ks[54] = ks[46] ^ ks[53]; + ks[55] = ks[47] ^ ks[54]; + ks[56] = ks[48] ^ 0x40000000 + ^ (s_te2[(ks[55] >> 16) & 0xff] & 0xff000000) + ^ (s_te3[(ks[55] >> 8) & 0xff] & 0x00ff0000) + ^ (s_te0[(ks[55] >> 0) & 0xff] & 0x0000ff00) + ^ (s_te1[(ks[55] >> 24) & 0xff] & 0x000000ff); + ks[57] = ks[49] ^ ks[56]; + ks[58] = ks[50] ^ ks[57]; + ks[59] = ks[51] ^ ks[58]; } DECLSPEC void aes256_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4); @@ -1074,49 +1200,58 @@ DECLSPEC void aes256_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te temp = ks[26]; ks[26] = ks[34]; ks[34] = temp; temp = ks[27]; ks[27] = ks[35]; ks[35] = temp; - for (int i = 1, j = 4; i < 14; i += 1, j += 4) - { - const u32 x0s0 = (ks[j + 0] >> 0) & 0xff; - const u32 x0s1 = (ks[j + 0] >> 8) & 0xff; - const u32 x0s2 = (ks[j + 0] >> 16) & 0xff; - const u32 x0s3 = (ks[j + 0] >> 24) & 0xff; - const u32 x1s0 = (ks[j + 1] >> 0) & 0xff; - const u32 x1s1 = (ks[j + 1] >> 8) & 0xff; - const u32 x1s2 = (ks[j + 1] >> 16) & 0xff; - const u32 x1s3 = (ks[j + 1] >> 24) & 0xff; - const u32 x2s0 = (ks[j + 2] >> 0) & 0xff; - const u32 x2s1 = (ks[j + 2] >> 8) & 0xff; - const u32 x2s2 = (ks[j + 2] >> 16) & 0xff; - const u32 x2s3 = (ks[j + 2] >> 24) & 0xff; - const u32 x3s0 = (ks[j + 3] >> 0) & 0xff; - const u32 x3s1 = (ks[j + 3] >> 8) & 0xff; - const u32 x3s2 = (ks[j + 3] >> 16) & 0xff; - const u32 x3s3 = (ks[j + 3] >> 24) & 0xff; - - ks[j + 0] = - s_td0[s_te1[x0s3] & 0xff] ^ - s_td1[s_te1[x0s2] & 0xff] ^ - s_td2[s_te1[x0s1] & 0xff] ^ - s_td3[s_te1[x0s0] & 0xff]; - - ks[j + 1] = - s_td0[s_te1[x1s3] & 0xff] ^ - s_td1[s_te1[x1s2] & 0xff] ^ - s_td2[s_te1[x1s1] & 0xff] ^ - s_td3[s_te1[x1s0] & 0xff]; - - ks[j + 2] = - s_td0[s_te1[x2s3] & 0xff] ^ - s_td1[s_te1[x2s2] & 0xff] ^ - s_td2[s_te1[x2s1] & 0xff] ^ - s_td3[s_te1[x2s0] & 0xff]; - - ks[j + 3] = - s_td0[s_te1[x3s3] & 0xff] ^ - s_td1[s_te1[x3s2] & 0xff] ^ - s_td2[s_te1[x3s1] & 0xff] ^ - s_td3[s_te1[x3s0] & 0xff]; - } + ks[ 4] = td0[te1[(ks[ 4] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 4] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 4] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 4] >> 0) & 0xff] & 0xff]; + ks[ 5] = td0[te1[(ks[ 5] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 5] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 5] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 5] >> 0) & 0xff] & 0xff]; + ks[ 6] = td0[te1[(ks[ 6] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 6] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 6] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 6] >> 0) & 0xff] & 0xff]; + ks[ 7] = td0[te1[(ks[ 7] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 7] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 7] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 7] >> 0) & 0xff] & 0xff]; + ks[ 8] = td0[te1[(ks[ 8] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 8] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 8] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 8] >> 0) & 0xff] & 0xff]; + ks[ 9] = td0[te1[(ks[ 9] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[ 9] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[ 9] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[ 9] >> 0) & 0xff] & 0xff]; + ks[10] = td0[te1[(ks[10] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[10] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[10] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[10] >> 0) & 0xff] & 0xff]; + ks[11] = td0[te1[(ks[11] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[11] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[11] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[11] >> 0) & 0xff] & 0xff]; + ks[12] = td0[te1[(ks[12] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[12] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[12] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[12] >> 0) & 0xff] & 0xff]; + ks[13] = td0[te1[(ks[13] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[13] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[13] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[13] >> 0) & 0xff] & 0xff]; + ks[14] = td0[te1[(ks[14] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[14] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[14] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[14] >> 0) & 0xff] & 0xff]; + ks[15] = td0[te1[(ks[15] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[15] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[15] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[15] >> 0) & 0xff] & 0xff]; + ks[16] = td0[te1[(ks[16] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[16] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[16] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[16] >> 0) & 0xff] & 0xff]; + ks[17] = td0[te1[(ks[17] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[17] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[17] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[17] >> 0) & 0xff] & 0xff]; + ks[18] = td0[te1[(ks[18] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[18] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[18] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[18] >> 0) & 0xff] & 0xff]; + ks[19] = td0[te1[(ks[19] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[19] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[19] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[19] >> 0) & 0xff] & 0xff]; + ks[20] = td0[te1[(ks[20] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[20] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[20] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[20] >> 0) & 0xff] & 0xff]; + ks[21] = td0[te1[(ks[21] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[21] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[21] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[21] >> 0) & 0xff] & 0xff]; + ks[22] = td0[te1[(ks[22] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[22] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[22] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[22] >> 0) & 0xff] & 0xff]; + ks[23] = td0[te1[(ks[23] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[23] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[23] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[23] >> 0) & 0xff] & 0xff]; + ks[24] = td0[te1[(ks[24] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[24] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[24] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[24] >> 0) & 0xff] & 0xff]; + ks[25] = td0[te1[(ks[25] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[25] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[25] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[25] >> 0) & 0xff] & 0xff]; + ks[26] = td0[te1[(ks[26] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[26] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[26] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[26] >> 0) & 0xff] & 0xff]; + ks[27] = td0[te1[(ks[27] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[27] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[27] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[27] >> 0) & 0xff] & 0xff]; + ks[28] = td0[te1[(ks[28] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[28] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[28] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[28] >> 0) & 0xff] & 0xff]; + ks[29] = td0[te1[(ks[29] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[29] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[29] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[29] >> 0) & 0xff] & 0xff]; + ks[30] = td0[te1[(ks[30] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[30] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[30] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[30] >> 0) & 0xff] & 0xff]; + ks[31] = td0[te1[(ks[31] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[31] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[31] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[31] >> 0) & 0xff] & 0xff]; + ks[32] = td0[te1[(ks[32] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[32] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[32] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[32] >> 0) & 0xff] & 0xff]; + ks[33] = td0[te1[(ks[33] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[33] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[33] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[33] >> 0) & 0xff] & 0xff]; + ks[34] = td0[te1[(ks[34] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[34] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[34] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[34] >> 0) & 0xff] & 0xff]; + ks[35] = td0[te1[(ks[35] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[35] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[35] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[35] >> 0) & 0xff] & 0xff]; + ks[36] = td0[te1[(ks[36] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[36] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[36] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[36] >> 0) & 0xff] & 0xff]; + ks[37] = td0[te1[(ks[37] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[37] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[37] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[37] >> 0) & 0xff] & 0xff]; + ks[38] = td0[te1[(ks[38] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[38] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[38] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[38] >> 0) & 0xff] & 0xff]; + ks[39] = td0[te1[(ks[39] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[39] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[39] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[39] >> 0) & 0xff] & 0xff]; + ks[40] = td0[te1[(ks[40] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[40] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[40] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[40] >> 0) & 0xff] & 0xff]; + ks[41] = td0[te1[(ks[41] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[41] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[41] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[41] >> 0) & 0xff] & 0xff]; + ks[42] = td0[te1[(ks[42] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[42] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[42] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[42] >> 0) & 0xff] & 0xff]; + ks[43] = td0[te1[(ks[43] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[43] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[43] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[43] >> 0) & 0xff] & 0xff]; + ks[44] = td0[te1[(ks[44] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[44] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[44] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[44] >> 0) & 0xff] & 0xff]; + ks[45] = td0[te1[(ks[45] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[45] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[45] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[45] >> 0) & 0xff] & 0xff]; + ks[46] = td0[te1[(ks[46] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[46] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[46] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[46] >> 0) & 0xff] & 0xff]; + ks[47] = td0[te1[(ks[47] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[47] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[47] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[47] >> 0) & 0xff] & 0xff]; + ks[48] = td0[te1[(ks[48] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[48] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[48] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[48] >> 0) & 0xff] & 0xff]; + ks[49] = td0[te1[(ks[49] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[49] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[49] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[49] >> 0) & 0xff] & 0xff]; + ks[50] = td0[te1[(ks[50] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[50] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[50] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[50] >> 0) & 0xff] & 0xff]; + ks[51] = td0[te1[(ks[51] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[51] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[51] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[51] >> 0) & 0xff] & 0xff]; + ks[52] = td0[te1[(ks[52] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[52] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[52] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[52] >> 0) & 0xff] & 0xff]; + ks[53] = td0[te1[(ks[53] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[53] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[53] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[53] >> 0) & 0xff] & 0xff]; + ks[54] = td0[te1[(ks[54] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[54] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[54] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[54] >> 0) & 0xff] & 0xff]; + ks[55] = td0[te1[(ks[55] >> 24) & 0xff] & 0xff] ^ td1[te1[(ks[55] >> 16) & 0xff] & 0xff] ^ td2[te1[(ks[55] >> 8) & 0xff] & 0xff] ^ td3[te1[(ks[55] >> 0) & 0xff] & 0xff]; } DECLSPEC void aes256_set_encrypt_key (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl index 02eae097e..f5ca58b54 100644 --- a/OpenCL/inc_cipher_twofish.cl +++ b/OpenCL/inc_cipher_twofish.cl @@ -271,26 +271,29 @@ DECLSPEC u32 mds_rem (u32 p0, u32 p1) { #define G_MOD 0x14d - for (int i = 0; i < 8; i++) - { - u32 t = p1 >> 24; - - p1 = (p1 << 8) | (p0 >> 24); - - p0 <<= 8; - - u32 u = (t << 1); - - if (t & 0x80) u ^= G_MOD; - - p1 ^= t ^ (u << 16); - - u ^= (t >> 1); + #define MDS_REM_ROUND() \ + { \ + u32 t = p1 >> 24; \ + p1 = (p1 << 8) | (p0 >> 24); \ + p0 <<= 8; \ + u32 u = (t << 1); \ + if (t & 0x80) u ^= G_MOD; \ + p1 ^= t ^ (u << 16); \ + u ^= (t >> 1); \ + if (t & 0x01) u ^= G_MOD >> 1; \ + p1 ^= (u << 24) | (u << 8); \ + } - if (t & 0x01) u ^= G_MOD >> 1; + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); + MDS_REM_ROUND(); - p1 ^= (u << 24) | (u << 8); - } + #undef MDS_REM_ROUND return p1; }