diff --git a/OpenCL/inc_cipher_camellia.cl b/OpenCL/inc_cipher_camellia.cl index 03bbdd99c..8d1d9990f 100644 --- a/OpenCL/inc_cipher_camellia.cl +++ b/OpenCL/inc_cipher_camellia.cl @@ -20,6 +20,15 @@ #include "inc_common.h" #include "inc_cipher_camellia.h" +#define c_sbox1(n) c_sbox[(n)] +#define c_sbox2(n) (((c_sbox[(n)] >> 7) ^ (c_sbox[(n)] << 1)) & 0xff) +#define c_sbox3(n) (((c_sbox[(n)] >> 1) ^ (c_sbox[(n)] << 7)) & 0xff) +#define c_sbox4(n) c_sbox[(((n) << 1) ^ ((n) >> 7)) & 0xff] + +#define cam_rotate(a,b,n) hc_swap32_S ((u[(a)] << (n)) ^ (u[(b)] >> (32 - (n)))) + +#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) + DECLSPEC void cam_feistel (const u32 *x, const u32 *k, u32 *y) { u32 b[8]; @@ -323,3 +332,12 @@ DECLSPEC void camellia256_decrypt (const u32 *ks, const u32 *in, u32 *out) out[2] = tmp[0] ^ ks[2]; out[3] = tmp[1] ^ ks[3]; } + +#undef c_sbox1 +#undef c_sbox2 +#undef c_sbox3 +#undef c_sbox4 + +#undef cam_rotate + +#undef extract_byte diff --git a/OpenCL/inc_cipher_camellia.h b/OpenCL/inc_cipher_camellia.h index 79b930a51..7077dc24f 100644 --- a/OpenCL/inc_cipher_camellia.h +++ b/OpenCL/inc_cipher_camellia.h @@ -42,15 +42,6 @@ CONSTANT_AS const u32a c_sbox[256] = 0x15, 0xe3, 0xad, 0xf4, 0x77, 0xc7, 0x80, 0x9e }; -#define c_sbox1(n) c_sbox[(n)] -#define c_sbox2(n) (((c_sbox[(n)] >> 7) ^ (c_sbox[(n)] << 1)) & 0xff) -#define c_sbox3(n) (((c_sbox[(n)] >> 1) ^ (c_sbox[(n)] << 7)) & 0xff) -#define c_sbox4(n) c_sbox[(((n) << 1) ^ ((n) >> 7)) & 0xff] - -#define cam_rotate(a,b,n) hc_swap32_S ((u[(a)] << (n)) ^ (u[(b)] >> (32 - (n)))) - -#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) - DECLSPEC void cam_feistel (const u32 *x, const u32 *k, u32 *y); DECLSPEC void cam_fl (u32 *x, const u32 *kl, const u32 *kr); DECLSPEC void camellia256_set_key (u32 *ks, const u32 *ukey); diff --git a/OpenCL/inc_cipher_des.cl b/OpenCL/inc_cipher_des.cl index d342ff0d9..67edecd5e 100644 --- a/OpenCL/inc_cipher_des.cl +++ b/OpenCL/inc_cipher_des.cl @@ -8,6 +8,58 @@ #include "inc_common.h" #include "inc_cipher_des.h" +#define PERM_OP(a,b,n,m) \ +{ \ + u32x t; \ + t = a >> n; \ + t = t ^ b; \ + t = t & m; \ + b = b ^ t; \ + t = t << n; \ + a = a ^ t; \ +} + +#define HPERM_OP(a,n,m) \ +{ \ + u32x t; \ + t = a << (16 + n); \ + t = t ^ a; \ + t = t & m; \ + a = a ^ t; \ + t = t >> (16 + n); \ + a = a ^ t; \ +} + +#define DES_IP(l,r) \ +{ \ + PERM_OP (r, l, 4, 0x0f0f0f0f); \ + PERM_OP (l, r, 16, 0x0000ffff); \ + PERM_OP (r, l, 2, 0x33333333); \ + PERM_OP (l, r, 8, 0x00ff00ff); \ + PERM_OP (r, l, 1, 0x55555555); \ +} + +#define DES_FP(l,r) \ +{ \ + PERM_OP (l, r, 1, 0x55555555); \ + PERM_OP (r, l, 8, 0x00ff00ff); \ + PERM_OP (l, r, 2, 0x33333333); \ + PERM_OP (r, l, 16, 0x0000ffff); \ + PERM_OP (l, r, 4, 0x0f0f0f0f); \ +} + +#if VECT_SIZE == 1 +#define DES_BOX(i,n,S) (S)[(n)][(i)] +#elif VECT_SIZE == 2 +#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) +#elif VECT_SIZE == 4 +#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) +#elif VECT_SIZE == 8 +#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) +#elif VECT_SIZE == 16 +#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) +#endif + DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]) { u32x r = data[0]; @@ -172,3 +224,10 @@ DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE Kd[i] = hc_rotl32 (Kd[i], 2u); } } + +#undef PERM_OP +#undef HPERM_OP +#undef DES_IP +#undef DES_FP + +#undef DES_BOX diff --git a/OpenCL/inc_cipher_des.h b/OpenCL/inc_cipher_des.h index da12f1df3..b72c1b906 100644 --- a/OpenCL/inc_cipher_des.h +++ b/OpenCL/inc_cipher_des.h @@ -310,58 +310,6 @@ CONSTANT_AS u32a c_skb[8][64] = } }; -#define PERM_OP(a,b,n,m) \ -{ \ - u32x t; \ - t = a >> n; \ - t = t ^ b; \ - t = t & m; \ - b = b ^ t; \ - t = t << n; \ - a = a ^ t; \ -} - -#define HPERM_OP(a,n,m) \ -{ \ - u32x t; \ - t = a << (16 + n); \ - t = t ^ a; \ - t = t & m; \ - a = a ^ t; \ - t = t >> (16 + n); \ - a = a ^ t; \ -} - -#define DES_IP(l,r) \ -{ \ - PERM_OP (r, l, 4, 0x0f0f0f0f); \ - PERM_OP (l, r, 16, 0x0000ffff); \ - PERM_OP (r, l, 2, 0x33333333); \ - PERM_OP (l, r, 8, 0x00ff00ff); \ - PERM_OP (r, l, 1, 0x55555555); \ -} - -#define DES_FP(l,r) \ -{ \ - PERM_OP (l, r, 1, 0x55555555); \ - PERM_OP (r, l, 8, 0x00ff00ff); \ - PERM_OP (l, r, 2, 0x33333333); \ - PERM_OP (r, l, 16, 0x0000ffff); \ - PERM_OP (l, r, 4, 0x0f0f0f0f); \ -} - -#if VECT_SIZE == 1 -#define DES_BOX(i,n,S) (S)[(n)][(i)] -#elif VECT_SIZE == 2 -#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1]) -#elif VECT_SIZE == 4 -#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3]) -#elif VECT_SIZE == 8 -#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7]) -#elif VECT_SIZE == 16 -#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf]) -#endif - DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]); DECLSPEC void _des_crypt_decrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]); DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]); diff --git a/OpenCL/inc_cipher_kuznyechik.cl b/OpenCL/inc_cipher_kuznyechik.cl index 56a8ebed5..c1bdf77ca 100644 --- a/OpenCL/inc_cipher_kuznyechik.cl +++ b/OpenCL/inc_cipher_kuznyechik.cl @@ -17,6 +17,23 @@ #include "inc_common.h" #include "inc_cipher_kuznyechik.h" +#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) + +#define k_lookup(w,sbox) \ + for (int i = 0; i < 4; i++) \ + w[i] = sbox[extract_byte (w[i], 0)] << 0 \ + | sbox[extract_byte (w[i], 1)] << 8 \ + | sbox[extract_byte (w[i], 2)] << 16 \ + | sbox[extract_byte (w[i], 3)] << 24 + +#define k_xor(n) \ + for (int i = (n); i > 0; i /= 2) \ + { \ + z ^= x * (i % 2); \ + x = (x << 1) ^ ((x >> 7) * 0xc3); \ + x &= 0xff; \ + } + DECLSPEC void kuznyechik_linear (u32 *w) { // used in k_xor macro @@ -213,3 +230,7 @@ DECLSPEC void kuznyechik_decrypt (const u32 *ks, const u32 *in, u32 *out) out[3] ^= ks[4 * i + 3]; } } + +#undef k_xor +#undef k_lookup +#undef extract_byte diff --git a/OpenCL/inc_cipher_kuznyechik.h b/OpenCL/inc_cipher_kuznyechik.h index fea3b325d..07c738c65 100644 --- a/OpenCL/inc_cipher_kuznyechik.h +++ b/OpenCL/inc_cipher_kuznyechik.h @@ -6,23 +6,6 @@ #ifndef _INC_CIPHER_KUZNYECHIK_H #define _INC_CIPHER_KUZNYECHIK_H -#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) - -#define k_lookup(w,sbox) \ - for (int i = 0; i < 4; i++) \ - w[i] = sbox[extract_byte (w[i], 0)] << 0 \ - | sbox[extract_byte (w[i], 1)] << 8 \ - | sbox[extract_byte (w[i], 2)] << 16 \ - | sbox[extract_byte (w[i], 3)] << 24 - -#define k_xor(n) \ - for (int i = (n); i > 0; i /= 2) \ - { \ - z ^= x * (i % 2); \ - x = (x << 1) ^ ((x >> 7) * 0xc3); \ - x &= 0xff; \ - } - CONSTANT_AS const u32a k_sbox[256] = { 0xfc, 0xee, 0xdd, 0x11, 0xcf, 0x6e, 0x31, 0x16, diff --git a/OpenCL/inc_cipher_serpent.cl b/OpenCL/inc_cipher_serpent.cl index 303011a8e..5bdb3c3d4 100644 --- a/OpenCL/inc_cipher_serpent.cl +++ b/OpenCL/inc_cipher_serpent.cl @@ -21,6 +21,391 @@ #include "inc_common.h" #include "inc_cipher_serpent.h" +/* 15 terms */ + +#define sb0(a,b,c,d,e,f,g,h) \ + t1 = a ^ d; \ + t2 = a & d; \ + t3 = c ^ t1; \ + t6 = b & t1; \ + t4 = b ^ t3; \ + t10 = ~t3; \ + h = t2 ^ t4; \ + t7 = a ^ t6; \ + t14 = ~t7; \ + t8 = c | t7; \ + t11 = t3 ^ t7; \ + g = t4 ^ t8; \ + t12 = h & t11; \ + f = t10 ^ t12; \ + e = t12 ^ t14 + +/* 15 terms */ + +#define ib0(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = a ^ b; \ + t3 = t1 | t2; \ + t4 = d ^ t3; \ + t7 = d & t2; \ + t5 = c ^ t4; \ + t8 = t1 ^ t7; \ + g = t2 ^ t5; \ + t11 = a & t4; \ + t9 = g & t8; \ + t14 = t5 ^ t8; \ + f = t4 ^ t9; \ + t12 = t5 | f; \ + h = t11 ^ t12; \ + e = h ^ t14 + +/* 14 terms! */ + +#define sb1(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = b ^ t1; \ + t3 = a | t2; \ + t4 = d | t2; \ + t5 = c ^ t3; \ + g = d ^ t5; \ + t7 = b ^ t4; \ + t8 = t2 ^ g; \ + t9 = t5 & t7; \ + h = t8 ^ t9; \ + t11 = t5 ^ t7; \ + f = h ^ t11; \ + t13 = t8 & t11; \ + e = t5 ^ t13 + +/* 17 terms */ + +#define ib1(a,b,c,d,e,f,g,h) \ + t1 = a ^ d; \ + t2 = a & b; \ + t3 = b ^ c; \ + t4 = a ^ t3; \ + t5 = b | d; \ + t7 = c | t1; \ + h = t4 ^ t5; \ + t8 = b ^ t7; \ + t11 = ~t2; \ + t9 = t4 & t8; \ + f = t1 ^ t9; \ + t13 = t9 ^ t11; \ + t12 = h & f; \ + g = t12 ^ t13; \ + t15 = a & d; \ + t16 = c ^ t13; \ + e = t15 ^ t16 + +/* 16 terms */ + +#define sb2(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = b ^ d; \ + t3 = c & t1; \ + t13 = d | t1; \ + e = t2 ^ t3; \ + t5 = c ^ t1; \ + t6 = c ^ e; \ + t7 = b & t6; \ + t10 = e | t5; \ + h = t5 ^ t7; \ + t9 = d | t7; \ + t11 = t9 & t10; \ + t14 = t2 ^ h; \ + g = a ^ t11; \ + t15 = g ^ t13; \ + f = t14 ^ t15 + +/* 16 terms */ + +#define ib2(a,b,c,d,e,f,g,h) \ + t1 = b ^ d; \ + t2 = ~t1; \ + t3 = a ^ c; \ + t4 = c ^ t1; \ + t7 = a | t2; \ + t5 = b & t4; \ + t8 = d ^ t7; \ + t11 = ~t4; \ + e = t3 ^ t5; \ + t9 = t3 | t8; \ + t14 = d & t11; \ + h = t1 ^ t9; \ + t12 = e | h; \ + f = t11 ^ t12; \ + t15 = t3 ^ t12; \ + g = t14 ^ t15 + +/* 17 terms */ + +#define sb3(a,b,c,d,e,f,g,h) \ + t1 = a ^ c; \ + t2 = d ^ t1; \ + t3 = a & t2; \ + t4 = d ^ t3; \ + t5 = b & t4; \ + g = t2 ^ t5; \ + t7 = a | g; \ + t8 = b | d; \ + t11 = a | d; \ + t9 = t4 & t7; \ + f = t8 ^ t9; \ + t12 = b ^ t11; \ + t13 = g ^ t9; \ + t15 = t3 ^ t8; \ + h = t12 ^ t13; \ + t16 = c & t15; \ + e = t12 ^ t16 + +/* 16 term solution that performs less well than 17 term one + in my environment (PPro/PII) + +#define sb3(a,b,c,d,e,f,g,h) \ + t1 = a ^ b; \ + t2 = a & c; \ + t3 = a | d; \ + t4 = c ^ d; \ + t5 = t1 & t3; \ + t6 = t2 | t5; \ + g = t4 ^ t6; \ + t8 = b ^ t3; \ + t9 = t6 ^ t8; \ + t10 = t4 & t9; \ + e = t1 ^ t10; \ + t12 = g & e; \ + f = t9 ^ t12; \ + t14 = b | d; \ + t15 = t4 ^ t12; \ + h = t14 ^ t15 +*/ + +/* 17 terms */ + +#define ib3(a,b,c,d,e,f,g,h) \ + t1 = b ^ c; \ + t2 = b | c; \ + t3 = a ^ c; \ + t7 = a ^ d; \ + t4 = t2 ^ t3; \ + t5 = d | t4; \ + t9 = t2 ^ t7; \ + e = t1 ^ t5; \ + t8 = t1 | t5; \ + t11 = a & t4; \ + g = t8 ^ t9; \ + t12 = e | t9; \ + f = t11 ^ t12; \ + t14 = a & g; \ + t15 = t2 ^ t14; \ + t16 = e & t15; \ + h = t4 ^ t16 + +/* 15 terms */ + +#define sb4(a,b,c,d,e,f,g,h) \ + t1 = a ^ d; \ + t2 = d & t1; \ + t3 = c ^ t2; \ + t4 = b | t3; \ + h = t1 ^ t4; \ + t6 = ~b; \ + t7 = t1 | t6; \ + e = t3 ^ t7; \ + t9 = a & e; \ + t10 = t1 ^ t6; \ + t11 = t4 & t10; \ + g = t9 ^ t11; \ + t13 = a ^ t3; \ + t14 = t10 & g; \ + f = t13 ^ t14 + +/* 17 terms */ + +#define ib4(a,b,c,d,e,f,g,h) \ + t1 = c ^ d; \ + t2 = c | d; \ + t3 = b ^ t2; \ + t4 = a & t3; \ + f = t1 ^ t4; \ + t6 = a ^ d; \ + t7 = b | d; \ + t8 = t6 & t7; \ + h = t3 ^ t8; \ + t10 = ~a; \ + t11 = c ^ h; \ + t12 = t10 | t11;\ + e = t3 ^ t12; \ + t14 = c | t4; \ + t15 = t7 ^ t14; \ + t16 = h | t10; \ + g = t15 ^ t16 + +/* 16 terms */ + +#define sb5(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = a ^ b; \ + t3 = a ^ d; \ + t4 = c ^ t1; \ + t5 = t2 | t3; \ + e = t4 ^ t5; \ + t7 = d & e; \ + t8 = t2 ^ e; \ + t10 = t1 | e; \ + f = t7 ^ t8; \ + t11 = t2 | t7; \ + t12 = t3 ^ t10; \ + t14 = b ^ t7; \ + g = t11 ^ t12; \ + t15 = f & t12; \ + h = t14 ^ t15 + +/* 16 terms */ + +#define ib5(a,b,c,d,e,f,g,h) \ + t1 = ~c; \ + t2 = b & t1; \ + t3 = d ^ t2; \ + t4 = a & t3; \ + t5 = b ^ t1; \ + h = t4 ^ t5; \ + t7 = b | h; \ + t8 = a & t7; \ + f = t3 ^ t8; \ + t10 = a | d; \ + t11 = t1 ^ t7; \ + e = t10 ^ t11; \ + t13 = a ^ c; \ + t14 = b & t10; \ + t15 = t4 | t13; \ + g = t14 ^ t15 + +/* 15 terms */ + +#define sb6(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = a ^ d; \ + t3 = b ^ t2; \ + t4 = t1 | t2; \ + t5 = c ^ t4; \ + f = b ^ t5; \ + t13 = ~t5; \ + t7 = t2 | f; \ + t8 = d ^ t7; \ + t9 = t5 & t8; \ + g = t3 ^ t9; \ + t11 = t5 ^ t8; \ + e = g ^ t11; \ + t14 = t3 & t11; \ + h = t13 ^ t14 + +/* 15 terms */ + +#define ib6(a,b,c,d,e,f,g,h) \ + t1 = ~a; \ + t2 = a ^ b; \ + t3 = c ^ t2; \ + t4 = c | t1; \ + t5 = d ^ t4; \ + t13 = d & t1; \ + f = t3 ^ t5; \ + t7 = t3 & t5; \ + t8 = t2 ^ t7; \ + t9 = b | t8; \ + h = t5 ^ t9; \ + t11 = b | h; \ + e = t8 ^ t11; \ + t14 = t3 ^ t11; \ + g = t13 ^ t14 + +/* 17 terms */ + +#define sb7(a,b,c,d,e,f,g,h) \ + t1 = ~c; \ + t2 = b ^ c; \ + t3 = b | t1; \ + t4 = d ^ t3; \ + t5 = a & t4; \ + t7 = a ^ d; \ + h = t2 ^ t5; \ + t8 = b ^ t5; \ + t9 = t2 | t8; \ + t11 = d & t3; \ + f = t7 ^ t9; \ + t12 = t5 ^ f; \ + t15 = t1 | t4; \ + t13 = h & t12; \ + g = t11 ^ t13; \ + t16 = t12 ^ g; \ + e = t15 ^ t16 + +/* 17 terms */ + +#define ib7(a,b,c,d,e,f,g,h) \ + t1 = a & b; \ + t2 = a | b; \ + t3 = c | t1; \ + t4 = d & t2; \ + h = t3 ^ t4; \ + t6 = ~d; \ + t7 = b ^ t4; \ + t8 = h ^ t6; \ + t11 = c ^ t7; \ + t9 = t7 | t8; \ + f = a ^ t9; \ + t12 = d | f; \ + e = t11 ^ t12; \ + t14 = a & h; \ + t15 = t3 ^ f; \ + t16 = e ^ t14; \ + g = t15 ^ t16 + +#define k_xor(r,a,b,c,d) \ + a ^= ks[4 * r + 8]; \ + b ^= ks[4 * r + 9]; \ + c ^= ks[4 * r + 10]; \ + d ^= ks[4 * r + 11] + +#define k_set(r,a,b,c,d) \ + a = ks[4 * r + 8]; \ + b = ks[4 * r + 9]; \ + c = ks[4 * r + 10]; \ + d = ks[4 * r + 11] + +#define k_get(r,a,b,c,d) \ + ks[4 * r + 8] = a; \ + ks[4 * r + 9] = b; \ + ks[4 * r + 10] = c; \ + ks[4 * r + 11] = d + +/* the linear transformation and its inverse */ + +#define rot(a,b,c,d) \ + a = hc_rotl32_S(a, 13); \ + c = hc_rotl32_S(c, 3); \ + d ^= c ^ (a << 3); \ + b ^= a ^ c; \ + d = hc_rotl32_S(d, 7); \ + b = hc_rotl32_S(b, 1); \ + a ^= b ^ d; \ + c ^= d ^ (b << 7); \ + a = hc_rotl32_S(a, 5); \ + c = hc_rotl32_S(c, 22) + +#define irot(a,b,c,d) \ + c = hc_rotr32_S(c, 22); \ + a = hc_rotr32_S(a, 5); \ + c ^= d ^ (b << 7); \ + a ^= b ^ d; \ + d = hc_rotr32_S(d, 7); \ + b = hc_rotr32_S(b, 1); \ + d ^= c ^ (a << 3); \ + b ^= a ^ c; \ + c = hc_rotr32_S(c, 3); \ + a = hc_rotr32_S(a, 13) + // 128 bit key DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey) @@ -586,3 +971,25 @@ DECLSPEC void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out) out[2] = c; out[3] = d; } + +#undef sb0 +#undef ib0 +#undef sb1 +#undef ib1 +#undef sb2 +#undef ib2 +#undef sb3 +#undef ib3 +#undef sb4 +#undef ib4 +#undef sb5 +#undef ib5 +#undef sb6 +#undef ib6 +#undef sb7 +#undef ib7 +#undef k_xor +#undef k_set +#undef k_get +#undef rot +#undef irot diff --git a/OpenCL/inc_cipher_serpent.h b/OpenCL/inc_cipher_serpent.h index 95e063a1d..7481f0d10 100644 --- a/OpenCL/inc_cipher_serpent.h +++ b/OpenCL/inc_cipher_serpent.h @@ -6,391 +6,6 @@ #ifndef _INC_CIPHER_SERPENT_H #define _INC_CIPHER_SERPENT_H -/* 15 terms */ - -#define sb0(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = a & d; \ - t3 = c ^ t1; \ - t6 = b & t1; \ - t4 = b ^ t3; \ - t10 = ~t3; \ - h = t2 ^ t4; \ - t7 = a ^ t6; \ - t14 = ~t7; \ - t8 = c | t7; \ - t11 = t3 ^ t7; \ - g = t4 ^ t8; \ - t12 = h & t11; \ - f = t10 ^ t12; \ - e = t12 ^ t14 - -/* 15 terms */ - -#define ib0(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = t1 | t2; \ - t4 = d ^ t3; \ - t7 = d & t2; \ - t5 = c ^ t4; \ - t8 = t1 ^ t7; \ - g = t2 ^ t5; \ - t11 = a & t4; \ - t9 = g & t8; \ - t14 = t5 ^ t8; \ - f = t4 ^ t9; \ - t12 = t5 | f; \ - h = t11 ^ t12; \ - e = h ^ t14 - -/* 14 terms! */ - -#define sb1(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = b ^ t1; \ - t3 = a | t2; \ - t4 = d | t2; \ - t5 = c ^ t3; \ - g = d ^ t5; \ - t7 = b ^ t4; \ - t8 = t2 ^ g; \ - t9 = t5 & t7; \ - h = t8 ^ t9; \ - t11 = t5 ^ t7; \ - f = h ^ t11; \ - t13 = t8 & t11; \ - e = t5 ^ t13 - -/* 17 terms */ - -#define ib1(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = a & b; \ - t3 = b ^ c; \ - t4 = a ^ t3; \ - t5 = b | d; \ - t7 = c | t1; \ - h = t4 ^ t5; \ - t8 = b ^ t7; \ - t11 = ~t2; \ - t9 = t4 & t8; \ - f = t1 ^ t9; \ - t13 = t9 ^ t11; \ - t12 = h & f; \ - g = t12 ^ t13; \ - t15 = a & d; \ - t16 = c ^ t13; \ - e = t15 ^ t16 - -/* 16 terms */ - -#define sb2(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = b ^ d; \ - t3 = c & t1; \ - t13 = d | t1; \ - e = t2 ^ t3; \ - t5 = c ^ t1; \ - t6 = c ^ e; \ - t7 = b & t6; \ - t10 = e | t5; \ - h = t5 ^ t7; \ - t9 = d | t7; \ - t11 = t9 & t10; \ - t14 = t2 ^ h; \ - g = a ^ t11; \ - t15 = g ^ t13; \ - f = t14 ^ t15 - -/* 16 terms */ - -#define ib2(a,b,c,d,e,f,g,h) \ - t1 = b ^ d; \ - t2 = ~t1; \ - t3 = a ^ c; \ - t4 = c ^ t1; \ - t7 = a | t2; \ - t5 = b & t4; \ - t8 = d ^ t7; \ - t11 = ~t4; \ - e = t3 ^ t5; \ - t9 = t3 | t8; \ - t14 = d & t11; \ - h = t1 ^ t9; \ - t12 = e | h; \ - f = t11 ^ t12; \ - t15 = t3 ^ t12; \ - g = t14 ^ t15 - -/* 17 terms */ - -#define sb3(a,b,c,d,e,f,g,h) \ - t1 = a ^ c; \ - t2 = d ^ t1; \ - t3 = a & t2; \ - t4 = d ^ t3; \ - t5 = b & t4; \ - g = t2 ^ t5; \ - t7 = a | g; \ - t8 = b | d; \ - t11 = a | d; \ - t9 = t4 & t7; \ - f = t8 ^ t9; \ - t12 = b ^ t11; \ - t13 = g ^ t9; \ - t15 = t3 ^ t8; \ - h = t12 ^ t13; \ - t16 = c & t15; \ - e = t12 ^ t16 - -/* 16 term solution that performs less well than 17 term one - in my environment (PPro/PII) - -#define sb3(a,b,c,d,e,f,g,h) \ - t1 = a ^ b; \ - t2 = a & c; \ - t3 = a | d; \ - t4 = c ^ d; \ - t5 = t1 & t3; \ - t6 = t2 | t5; \ - g = t4 ^ t6; \ - t8 = b ^ t3; \ - t9 = t6 ^ t8; \ - t10 = t4 & t9; \ - e = t1 ^ t10; \ - t12 = g & e; \ - f = t9 ^ t12; \ - t14 = b | d; \ - t15 = t4 ^ t12; \ - h = t14 ^ t15 -*/ - -/* 17 terms */ - -#define ib3(a,b,c,d,e,f,g,h) \ - t1 = b ^ c; \ - t2 = b | c; \ - t3 = a ^ c; \ - t7 = a ^ d; \ - t4 = t2 ^ t3; \ - t5 = d | t4; \ - t9 = t2 ^ t7; \ - e = t1 ^ t5; \ - t8 = t1 | t5; \ - t11 = a & t4; \ - g = t8 ^ t9; \ - t12 = e | t9; \ - f = t11 ^ t12; \ - t14 = a & g; \ - t15 = t2 ^ t14; \ - t16 = e & t15; \ - h = t4 ^ t16 - -/* 15 terms */ - -#define sb4(a,b,c,d,e,f,g,h) \ - t1 = a ^ d; \ - t2 = d & t1; \ - t3 = c ^ t2; \ - t4 = b | t3; \ - h = t1 ^ t4; \ - t6 = ~b; \ - t7 = t1 | t6; \ - e = t3 ^ t7; \ - t9 = a & e; \ - t10 = t1 ^ t6; \ - t11 = t4 & t10; \ - g = t9 ^ t11; \ - t13 = a ^ t3; \ - t14 = t10 & g; \ - f = t13 ^ t14 - -/* 17 terms */ - -#define ib4(a,b,c,d,e,f,g,h) \ - t1 = c ^ d; \ - t2 = c | d; \ - t3 = b ^ t2; \ - t4 = a & t3; \ - f = t1 ^ t4; \ - t6 = a ^ d; \ - t7 = b | d; \ - t8 = t6 & t7; \ - h = t3 ^ t8; \ - t10 = ~a; \ - t11 = c ^ h; \ - t12 = t10 | t11;\ - e = t3 ^ t12; \ - t14 = c | t4; \ - t15 = t7 ^ t14; \ - t16 = h | t10; \ - g = t15 ^ t16 - -/* 16 terms */ - -#define sb5(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = a ^ d; \ - t4 = c ^ t1; \ - t5 = t2 | t3; \ - e = t4 ^ t5; \ - t7 = d & e; \ - t8 = t2 ^ e; \ - t10 = t1 | e; \ - f = t7 ^ t8; \ - t11 = t2 | t7; \ - t12 = t3 ^ t10; \ - t14 = b ^ t7; \ - g = t11 ^ t12; \ - t15 = f & t12; \ - h = t14 ^ t15 - -/* 16 terms */ - -#define ib5(a,b,c,d,e,f,g,h) \ - t1 = ~c; \ - t2 = b & t1; \ - t3 = d ^ t2; \ - t4 = a & t3; \ - t5 = b ^ t1; \ - h = t4 ^ t5; \ - t7 = b | h; \ - t8 = a & t7; \ - f = t3 ^ t8; \ - t10 = a | d; \ - t11 = t1 ^ t7; \ - e = t10 ^ t11; \ - t13 = a ^ c; \ - t14 = b & t10; \ - t15 = t4 | t13; \ - g = t14 ^ t15 - -/* 15 terms */ - -#define sb6(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ d; \ - t3 = b ^ t2; \ - t4 = t1 | t2; \ - t5 = c ^ t4; \ - f = b ^ t5; \ - t13 = ~t5; \ - t7 = t2 | f; \ - t8 = d ^ t7; \ - t9 = t5 & t8; \ - g = t3 ^ t9; \ - t11 = t5 ^ t8; \ - e = g ^ t11; \ - t14 = t3 & t11; \ - h = t13 ^ t14 - -/* 15 terms */ - -#define ib6(a,b,c,d,e,f,g,h) \ - t1 = ~a; \ - t2 = a ^ b; \ - t3 = c ^ t2; \ - t4 = c | t1; \ - t5 = d ^ t4; \ - t13 = d & t1; \ - f = t3 ^ t5; \ - t7 = t3 & t5; \ - t8 = t2 ^ t7; \ - t9 = b | t8; \ - h = t5 ^ t9; \ - t11 = b | h; \ - e = t8 ^ t11; \ - t14 = t3 ^ t11; \ - g = t13 ^ t14 - -/* 17 terms */ - -#define sb7(a,b,c,d,e,f,g,h) \ - t1 = ~c; \ - t2 = b ^ c; \ - t3 = b | t1; \ - t4 = d ^ t3; \ - t5 = a & t4; \ - t7 = a ^ d; \ - h = t2 ^ t5; \ - t8 = b ^ t5; \ - t9 = t2 | t8; \ - t11 = d & t3; \ - f = t7 ^ t9; \ - t12 = t5 ^ f; \ - t15 = t1 | t4; \ - t13 = h & t12; \ - g = t11 ^ t13; \ - t16 = t12 ^ g; \ - e = t15 ^ t16 - -/* 17 terms */ - -#define ib7(a,b,c,d,e,f,g,h) \ - t1 = a & b; \ - t2 = a | b; \ - t3 = c | t1; \ - t4 = d & t2; \ - h = t3 ^ t4; \ - t6 = ~d; \ - t7 = b ^ t4; \ - t8 = h ^ t6; \ - t11 = c ^ t7; \ - t9 = t7 | t8; \ - f = a ^ t9; \ - t12 = d | f; \ - e = t11 ^ t12; \ - t14 = a & h; \ - t15 = t3 ^ f; \ - t16 = e ^ t14; \ - g = t15 ^ t16 - -#define k_xor(r,a,b,c,d) \ - a ^= ks[4 * r + 8]; \ - b ^= ks[4 * r + 9]; \ - c ^= ks[4 * r + 10]; \ - d ^= ks[4 * r + 11] - -#define k_set(r,a,b,c,d) \ - a = ks[4 * r + 8]; \ - b = ks[4 * r + 9]; \ - c = ks[4 * r + 10]; \ - d = ks[4 * r + 11] - -#define k_get(r,a,b,c,d) \ - ks[4 * r + 8] = a; \ - ks[4 * r + 9] = b; \ - ks[4 * r + 10] = c; \ - ks[4 * r + 11] = d - -/* the linear transformation and its inverse */ - -#define rot(a,b,c,d) \ - a = hc_rotl32_S(a, 13); \ - c = hc_rotl32_S(c, 3); \ - d ^= c ^ (a << 3); \ - b ^= a ^ c; \ - d = hc_rotl32_S(d, 7); \ - b = hc_rotl32_S(b, 1); \ - a ^= b ^ d; \ - c ^= d ^ (b << 7); \ - a = hc_rotl32_S(a, 5); \ - c = hc_rotl32_S(c, 22) - -#define irot(a,b,c,d) \ - c = hc_rotr32_S(c, 22); \ - a = hc_rotr32_S(a, 5); \ - c ^= d ^ (b << 7); \ - a ^= b ^ d; \ - d = hc_rotr32_S(d, 7); \ - b = hc_rotr32_S(b, 1); \ - d ^= c ^ (a << 3); \ - b ^= a ^ c; \ - c = hc_rotr32_S(c, 3); \ - a = hc_rotr32_S(a, 13) - DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey); DECLSPEC void serpent128_encrypt (const u32 *ks, const u32 *in, u32 *out); DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out); diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl index dc63fd040..8de65d21f 100644 --- a/OpenCL/inc_cipher_twofish.cl +++ b/OpenCL/inc_cipher_twofish.cl @@ -24,6 +24,82 @@ #include "inc_common.h" #include "inc_cipher_twofish.h" +#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) + +#define g1_fun128(x) \ + (mds (0, q20 (extract_byte (x, 3), sk)) ^ \ + mds (1, q21 (extract_byte (x, 0), sk)) ^ \ + mds (2, q22 (extract_byte (x, 1), sk)) ^ \ + mds (3, q23 (extract_byte (x, 2), sk))) + +#define g0_fun128(x) \ + (mds (0, q20 (extract_byte (x, 0), sk)) ^ \ + mds (1, q21 (extract_byte (x, 1), sk)) ^ \ + mds (2, q22 (extract_byte (x, 2), sk)) ^ \ + mds (3, q23 (extract_byte (x, 3), sk))) + +#define f_rnd128(i) \ +{ \ + u32 t0 = g0_fun128 (data[0]); \ + u32 t1 = g1_fun128 (data[1]); \ + data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ + data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ + u32 t2 = g0_fun128 (data[2]); \ + u32 t3 = g1_fun128 (data[3]); \ + data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ + data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ +} + +#define i_rnd128(i) \ +{ \ + u32 t0 = g0_fun128 (data[0]); \ + u32 t1 = g1_fun128 (data[1]); \ + data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ + data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ + u32 t2 = g0_fun128 (data[2]); \ + u32 t3 = g1_fun128 (data[3]); \ + data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ + data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ +} + +#define f_rnd256(i) \ +{ \ + u32 t0 = g0_fun256 (data[0]); \ + u32 t1 = g1_fun256 (data[1]); \ + data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ + data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ + u32 t2 = g0_fun256 (data[2]); \ + u32 t3 = g1_fun256 (data[3]); \ + data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ + data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ +} + +#define i_rnd256(i) \ +{ \ + u32 t0 = g0_fun256 (data[0]); \ + u32 t1 = g1_fun256 (data[1]); \ + data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ + data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ + u32 t2 = g0_fun256 (data[2]); \ + u32 t3 = g1_fun256 (data[3]); \ + data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ + data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ +} + +#define q(n,x) q_tab[n][x] + +#define mds(n,x) m_tab[n][x] + +#define q20(x,k) q (0, q (0, x) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0) +#define q21(x,k) q (0, q (1, x) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1) +#define q22(x,k) q (1, q (0, x) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2) +#define q23(x,k) q (1, q (1, x) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3) + +#define q40(x,k) q (0, q (0, q (1, q (1, x) ^ extract_byte (k[3], 0)) ^ extract_byte (k[2], 0)) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0) +#define q41(x,k) q (0, q (1, q (1, q (0, x) ^ extract_byte (k[3], 1)) ^ extract_byte (k[2], 1)) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1) +#define q42(x,k) q (1, q (0, q (0, q (0, x) ^ extract_byte (k[3], 2)) ^ extract_byte (k[2], 2)) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2) +#define q43(x,k) q (1, q (1, q (0, q (1, x) ^ extract_byte (k[3], 3)) ^ extract_byte (k[2], 3)) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3) + DECLSPEC u32 mds_rem (u32 p0, u32 p1) { #define G_MOD 0x14d @@ -274,3 +350,23 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u out[2] = data[0] ^ lk[2]; out[3] = data[1] ^ lk[3]; } + +#define g1_fun128 +#define g0_fun128 +#define f_rnd128 +#define i_rnd128 +#define f_rnd256 +#define i_rnd256 + +#define q + +#define mds + +#define q20 +#define q21 +#define q22 +#define q23 +#define q40 +#define q41 +#define q42 +#define q43 diff --git a/OpenCL/inc_cipher_twofish.h b/OpenCL/inc_cipher_twofish.h index 3f5e6b682..5a61e3ddf 100644 --- a/OpenCL/inc_cipher_twofish.h +++ b/OpenCL/inc_cipher_twofish.h @@ -6,70 +6,6 @@ #ifndef _INC_CIPHER_TWOFISH_H #define _INC_CIPHER_TWOFISH_H -#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff) - -// 128 bit key - -#define g1_fun128(x) \ - (mds (0, q20 (extract_byte (x, 3), sk)) ^ \ - mds (1, q21 (extract_byte (x, 0), sk)) ^ \ - mds (2, q22 (extract_byte (x, 1), sk)) ^ \ - mds (3, q23 (extract_byte (x, 2), sk))) - -#define g0_fun128(x) \ - (mds (0, q20 (extract_byte (x, 0), sk)) ^ \ - mds (1, q21 (extract_byte (x, 1), sk)) ^ \ - mds (2, q22 (extract_byte (x, 2), sk)) ^ \ - mds (3, q23 (extract_byte (x, 3), sk))) - -#define f_rnd128(i) \ -{ \ - u32 t0 = g0_fun128 (data[0]); \ - u32 t1 = g1_fun128 (data[1]); \ - data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ - data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ - u32 t2 = g0_fun128 (data[2]); \ - u32 t3 = g1_fun128 (data[3]); \ - data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ - data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ -} - -#define i_rnd128(i) \ -{ \ - u32 t0 = g0_fun128 (data[0]); \ - u32 t1 = g1_fun128 (data[1]); \ - data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ - data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ - u32 t2 = g0_fun128 (data[2]); \ - u32 t3 = g1_fun128 (data[3]); \ - data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ - data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ -} - -#define f_rnd256(i) \ -{ \ - u32 t0 = g0_fun256 (data[0]); \ - u32 t1 = g1_fun256 (data[1]); \ - data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \ - data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \ - u32 t2 = g0_fun256 (data[2]); \ - u32 t3 = g1_fun256 (data[3]); \ - data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \ - data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \ -} - -#define i_rnd256(i) \ -{ \ - u32 t0 = g0_fun256 (data[0]); \ - u32 t1 = g1_fun256 (data[1]); \ - data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \ - data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \ - u32 t2 = g0_fun256 (data[2]); \ - u32 t3 = g1_fun256 (data[3]); \ - data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \ - data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \ -} - CONSTANT_AS u32a q_tab[2][256] = { { @@ -122,8 +58,6 @@ CONSTANT_AS u32a q_tab[2][256] = } }; -#define q(n,x) q_tab[n][x] - CONSTANT_AS u32a m_tab[4][256] = { { 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B, @@ -303,18 +237,6 @@ CONSTANT_AS u32a m_tab[4][256] = 0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 } }; -#define mds(n,x) m_tab[n][x] - -#define q20(x,k) q (0, q (0, x) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0) -#define q21(x,k) q (0, q (1, x) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1) -#define q22(x,k) q (1, q (0, x) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2) -#define q23(x,k) q (1, q (1, x) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3) - -#define q40(x,k) q (0, q (0, q (1, q (1, x) ^ extract_byte (k[3], 0)) ^ extract_byte (k[2], 0)) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0) -#define q41(x,k) q (0, q (1, q (1, q (0, x) ^ extract_byte (k[3], 1)) ^ extract_byte (k[2], 1)) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1) -#define q42(x,k) q (1, q (0, q (0, q (0, x) ^ extract_byte (k[3], 2)) ^ extract_byte (k[2], 2)) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2) -#define q43(x,k) q (1, q (1, q (0, q (1, x) ^ extract_byte (k[3], 3)) ^ extract_byte (k[2], 3)) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3) - DECLSPEC u32 mds_rem (u32 p0, u32 p1); DECLSPEC u32 h_fun128 (u32 *sk, u32 *lk, const u32 x, const u32 *key); DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey); diff --git a/OpenCL/inc_rp.cl b/OpenCL/inc_rp.cl index 7bf9ca910..9ac992abe 100644 --- a/OpenCL/inc_rp.cl +++ b/OpenCL/inc_rp.cl @@ -8,6 +8,21 @@ #include "inc_common.h" #include "inc_rp.h" +#ifdef REAL_SHM +#define COPY_PW(x) \ + LOCAL_AS pw_t s_pws[64]; \ + s_pws[get_local_id (0)] = (x); +#else +#define COPY_PW(x) \ + pw_t pw = (x); +#endif + +#ifdef REAL_SHM +#define PASTE_PW s_pws[get_local_id(0)]; +#else +#define PASTE_PW pw; +#endif + DECLSPEC u32 generate_cmask (const u32 value) { const u32 rmask = ((value & 0x40404040u) >> 1u) @@ -750,3 +765,6 @@ DECLSPEC int apply_rules (CONSTANT_AS const u32 *cmds, u32 *buf, const int in_le return out_len; } + +#undef COPY_PW +#undef PASTE_PW diff --git a/OpenCL/inc_rp.h b/OpenCL/inc_rp.h index da40bd05b..b486444a1 100644 --- a/OpenCL/inc_rp.h +++ b/OpenCL/inc_rp.h @@ -10,21 +10,6 @@ #define MAYBE_UNUSED #endif -#ifdef REAL_SHM -#define COPY_PW(x) \ - LOCAL_AS pw_t s_pws[64]; \ - s_pws[get_local_id (0)] = (x); -#else -#define COPY_PW(x) \ - pw_t pw = (x); -#endif - -#ifdef REAL_SHM -#define PASTE_PW s_pws[get_local_id(0)]; -#else -#define PASTE_PW pw; -#endif - #define RULE_OP_MANGLE_NOOP ':' #define RULE_OP_MANGLE_LREST 'l' #define RULE_OP_MANGLE_UREST 'u' diff --git a/OpenCL/inc_veracrypt_xts.cl b/OpenCL/inc_veracrypt_xts.cl index 1a916e582..461e50a2d 100644 --- a/OpenCL/inc_veracrypt_xts.cl +++ b/OpenCL/inc_veracrypt_xts.cl @@ -11,7 +11,7 @@ #include "inc_cipher_twofish.h" #include "inc_cipher_camellia.h" #include "inc_cipher_kuznyechik.h" -#include "inc_hash_veracrypt_xts.h" +#include "inc_veracrypt_xts.h" DECLSPEC void camellia256_decrypt_xts_first (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out, u32 *S, u32 *T, u32 *ks) {