Move some macros from .h to .cl sources

pull/1971/head
jsteube 5 years ago
parent e80b1838e8
commit 977199698f

@ -20,6 +20,15 @@
#include "inc_common.h"
#include "inc_cipher_camellia.h"
#define c_sbox1(n) c_sbox[(n)]
#define c_sbox2(n) (((c_sbox[(n)] >> 7) ^ (c_sbox[(n)] << 1)) & 0xff)
#define c_sbox3(n) (((c_sbox[(n)] >> 1) ^ (c_sbox[(n)] << 7)) & 0xff)
#define c_sbox4(n) c_sbox[(((n) << 1) ^ ((n) >> 7)) & 0xff]
#define cam_rotate(a,b,n) hc_swap32_S ((u[(a)] << (n)) ^ (u[(b)] >> (32 - (n))))
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
DECLSPEC void cam_feistel (const u32 *x, const u32 *k, u32 *y)
{
u32 b[8];
@ -323,3 +332,12 @@ DECLSPEC void camellia256_decrypt (const u32 *ks, const u32 *in, u32 *out)
out[2] = tmp[0] ^ ks[2];
out[3] = tmp[1] ^ ks[3];
}
#undef c_sbox1
#undef c_sbox2
#undef c_sbox3
#undef c_sbox4
#undef cam_rotate
#undef extract_byte

@ -42,15 +42,6 @@ CONSTANT_AS const u32a c_sbox[256] =
0x15, 0xe3, 0xad, 0xf4, 0x77, 0xc7, 0x80, 0x9e
};
#define c_sbox1(n) c_sbox[(n)]
#define c_sbox2(n) (((c_sbox[(n)] >> 7) ^ (c_sbox[(n)] << 1)) & 0xff)
#define c_sbox3(n) (((c_sbox[(n)] >> 1) ^ (c_sbox[(n)] << 7)) & 0xff)
#define c_sbox4(n) c_sbox[(((n) << 1) ^ ((n) >> 7)) & 0xff]
#define cam_rotate(a,b,n) hc_swap32_S ((u[(a)] << (n)) ^ (u[(b)] >> (32 - (n))))
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
DECLSPEC void cam_feistel (const u32 *x, const u32 *k, u32 *y);
DECLSPEC void cam_fl (u32 *x, const u32 *kl, const u32 *kr);
DECLSPEC void camellia256_set_key (u32 *ks, const u32 *ukey);

@ -8,6 +8,58 @@
#include "inc_common.h"
#include "inc_cipher_des.h"
#define PERM_OP(a,b,n,m) \
{ \
u32x t; \
t = a >> n; \
t = t ^ b; \
t = t & m; \
b = b ^ t; \
t = t << n; \
a = a ^ t; \
}
#define HPERM_OP(a,n,m) \
{ \
u32x t; \
t = a << (16 + n); \
t = t ^ a; \
t = t & m; \
a = a ^ t; \
t = t >> (16 + n); \
a = a ^ t; \
}
#define DES_IP(l,r) \
{ \
PERM_OP (r, l, 4, 0x0f0f0f0f); \
PERM_OP (l, r, 16, 0x0000ffff); \
PERM_OP (r, l, 2, 0x33333333); \
PERM_OP (l, r, 8, 0x00ff00ff); \
PERM_OP (r, l, 1, 0x55555555); \
}
#define DES_FP(l,r) \
{ \
PERM_OP (l, r, 1, 0x55555555); \
PERM_OP (r, l, 8, 0x00ff00ff); \
PERM_OP (l, r, 2, 0x33333333); \
PERM_OP (r, l, 16, 0x0000ffff); \
PERM_OP (l, r, 4, 0x0f0f0f0f); \
}
#if VECT_SIZE == 1
#define DES_BOX(i,n,S) (S)[(n)][(i)]
#elif VECT_SIZE == 2
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#elif VECT_SIZE == 4
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#elif VECT_SIZE == 8
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#elif VECT_SIZE == 16
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64])
{
u32x r = data[0];
@ -172,3 +224,10 @@ DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE
Kd[i] = hc_rotl32 (Kd[i], 2u);
}
}
#undef PERM_OP
#undef HPERM_OP
#undef DES_IP
#undef DES_FP
#undef DES_BOX

@ -310,58 +310,6 @@ CONSTANT_AS u32a c_skb[8][64] =
}
};
#define PERM_OP(a,b,n,m) \
{ \
u32x t; \
t = a >> n; \
t = t ^ b; \
t = t & m; \
b = b ^ t; \
t = t << n; \
a = a ^ t; \
}
#define HPERM_OP(a,n,m) \
{ \
u32x t; \
t = a << (16 + n); \
t = t ^ a; \
t = t & m; \
a = a ^ t; \
t = t >> (16 + n); \
a = a ^ t; \
}
#define DES_IP(l,r) \
{ \
PERM_OP (r, l, 4, 0x0f0f0f0f); \
PERM_OP (l, r, 16, 0x0000ffff); \
PERM_OP (r, l, 2, 0x33333333); \
PERM_OP (l, r, 8, 0x00ff00ff); \
PERM_OP (r, l, 1, 0x55555555); \
}
#define DES_FP(l,r) \
{ \
PERM_OP (l, r, 1, 0x55555555); \
PERM_OP (r, l, 8, 0x00ff00ff); \
PERM_OP (l, r, 2, 0x33333333); \
PERM_OP (r, l, 16, 0x0000ffff); \
PERM_OP (l, r, 4, 0x0f0f0f0f); \
}
#if VECT_SIZE == 1
#define DES_BOX(i,n,S) (S)[(n)][(i)]
#elif VECT_SIZE == 2
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1])
#elif VECT_SIZE == 4
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3])
#elif VECT_SIZE == 8
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7])
#elif VECT_SIZE == 16
#define DES_BOX(i,n,S) (u32x) ((S)[(n)][(i).s0], (S)[(n)][(i).s1], (S)[(n)][(i).s2], (S)[(n)][(i).s3], (S)[(n)][(i).s4], (S)[(n)][(i).s5], (S)[(n)][(i).s6], (S)[(n)][(i).s7], (S)[(n)][(i).s8], (S)[(n)][(i).s9], (S)[(n)][(i).sa], (S)[(n)][(i).sb], (S)[(n)][(i).sc], (S)[(n)][(i).sd], (S)[(n)][(i).se], (S)[(n)][(i).sf])
#endif
DECLSPEC void _des_crypt_encrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]);
DECLSPEC void _des_crypt_decrypt (u32x *iv, u32x *data, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_SPtrans)[64]);
DECLSPEC void _des_crypt_keysetup (u32x c, u32x d, u32x *Kc, u32x *Kd, SHM_TYPE u32 (*s_skb)[64]);

@ -17,6 +17,23 @@
#include "inc_common.h"
#include "inc_cipher_kuznyechik.h"
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
#define k_lookup(w,sbox) \
for (int i = 0; i < 4; i++) \
w[i] = sbox[extract_byte (w[i], 0)] << 0 \
| sbox[extract_byte (w[i], 1)] << 8 \
| sbox[extract_byte (w[i], 2)] << 16 \
| sbox[extract_byte (w[i], 3)] << 24
#define k_xor(n) \
for (int i = (n); i > 0; i /= 2) \
{ \
z ^= x * (i % 2); \
x = (x << 1) ^ ((x >> 7) * 0xc3); \
x &= 0xff; \
}
DECLSPEC void kuznyechik_linear (u32 *w)
{
// used in k_xor macro
@ -213,3 +230,7 @@ DECLSPEC void kuznyechik_decrypt (const u32 *ks, const u32 *in, u32 *out)
out[3] ^= ks[4 * i + 3];
}
}
#undef k_xor
#undef k_lookup
#undef extract_byte

@ -6,23 +6,6 @@
#ifndef _INC_CIPHER_KUZNYECHIK_H
#define _INC_CIPHER_KUZNYECHIK_H
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
#define k_lookup(w,sbox) \
for (int i = 0; i < 4; i++) \
w[i] = sbox[extract_byte (w[i], 0)] << 0 \
| sbox[extract_byte (w[i], 1)] << 8 \
| sbox[extract_byte (w[i], 2)] << 16 \
| sbox[extract_byte (w[i], 3)] << 24
#define k_xor(n) \
for (int i = (n); i > 0; i /= 2) \
{ \
z ^= x * (i % 2); \
x = (x << 1) ^ ((x >> 7) * 0xc3); \
x &= 0xff; \
}
CONSTANT_AS const u32a k_sbox[256] =
{
0xfc, 0xee, 0xdd, 0x11, 0xcf, 0x6e, 0x31, 0x16,

@ -21,6 +21,391 @@
#include "inc_common.h"
#include "inc_cipher_serpent.h"
/* 15 terms */
#define sb0(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = a & d; \
t3 = c ^ t1; \
t6 = b & t1; \
t4 = b ^ t3; \
t10 = ~t3; \
h = t2 ^ t4; \
t7 = a ^ t6; \
t14 = ~t7; \
t8 = c | t7; \
t11 = t3 ^ t7; \
g = t4 ^ t8; \
t12 = h & t11; \
f = t10 ^ t12; \
e = t12 ^ t14
/* 15 terms */
#define ib0(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = t1 | t2; \
t4 = d ^ t3; \
t7 = d & t2; \
t5 = c ^ t4; \
t8 = t1 ^ t7; \
g = t2 ^ t5; \
t11 = a & t4; \
t9 = g & t8; \
t14 = t5 ^ t8; \
f = t4 ^ t9; \
t12 = t5 | f; \
h = t11 ^ t12; \
e = h ^ t14
/* 14 terms! */
#define sb1(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = b ^ t1; \
t3 = a | t2; \
t4 = d | t2; \
t5 = c ^ t3; \
g = d ^ t5; \
t7 = b ^ t4; \
t8 = t2 ^ g; \
t9 = t5 & t7; \
h = t8 ^ t9; \
t11 = t5 ^ t7; \
f = h ^ t11; \
t13 = t8 & t11; \
e = t5 ^ t13
/* 17 terms */
#define ib1(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = a & b; \
t3 = b ^ c; \
t4 = a ^ t3; \
t5 = b | d; \
t7 = c | t1; \
h = t4 ^ t5; \
t8 = b ^ t7; \
t11 = ~t2; \
t9 = t4 & t8; \
f = t1 ^ t9; \
t13 = t9 ^ t11; \
t12 = h & f; \
g = t12 ^ t13; \
t15 = a & d; \
t16 = c ^ t13; \
e = t15 ^ t16
/* 16 terms */
#define sb2(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = b ^ d; \
t3 = c & t1; \
t13 = d | t1; \
e = t2 ^ t3; \
t5 = c ^ t1; \
t6 = c ^ e; \
t7 = b & t6; \
t10 = e | t5; \
h = t5 ^ t7; \
t9 = d | t7; \
t11 = t9 & t10; \
t14 = t2 ^ h; \
g = a ^ t11; \
t15 = g ^ t13; \
f = t14 ^ t15
/* 16 terms */
#define ib2(a,b,c,d,e,f,g,h) \
t1 = b ^ d; \
t2 = ~t1; \
t3 = a ^ c; \
t4 = c ^ t1; \
t7 = a | t2; \
t5 = b & t4; \
t8 = d ^ t7; \
t11 = ~t4; \
e = t3 ^ t5; \
t9 = t3 | t8; \
t14 = d & t11; \
h = t1 ^ t9; \
t12 = e | h; \
f = t11 ^ t12; \
t15 = t3 ^ t12; \
g = t14 ^ t15
/* 17 terms */
#define sb3(a,b,c,d,e,f,g,h) \
t1 = a ^ c; \
t2 = d ^ t1; \
t3 = a & t2; \
t4 = d ^ t3; \
t5 = b & t4; \
g = t2 ^ t5; \
t7 = a | g; \
t8 = b | d; \
t11 = a | d; \
t9 = t4 & t7; \
f = t8 ^ t9; \
t12 = b ^ t11; \
t13 = g ^ t9; \
t15 = t3 ^ t8; \
h = t12 ^ t13; \
t16 = c & t15; \
e = t12 ^ t16
/* 16 term solution that performs less well than 17 term one
in my environment (PPro/PII)
#define sb3(a,b,c,d,e,f,g,h) \
t1 = a ^ b; \
t2 = a & c; \
t3 = a | d; \
t4 = c ^ d; \
t5 = t1 & t3; \
t6 = t2 | t5; \
g = t4 ^ t6; \
t8 = b ^ t3; \
t9 = t6 ^ t8; \
t10 = t4 & t9; \
e = t1 ^ t10; \
t12 = g & e; \
f = t9 ^ t12; \
t14 = b | d; \
t15 = t4 ^ t12; \
h = t14 ^ t15
*/
/* 17 terms */
#define ib3(a,b,c,d,e,f,g,h) \
t1 = b ^ c; \
t2 = b | c; \
t3 = a ^ c; \
t7 = a ^ d; \
t4 = t2 ^ t3; \
t5 = d | t4; \
t9 = t2 ^ t7; \
e = t1 ^ t5; \
t8 = t1 | t5; \
t11 = a & t4; \
g = t8 ^ t9; \
t12 = e | t9; \
f = t11 ^ t12; \
t14 = a & g; \
t15 = t2 ^ t14; \
t16 = e & t15; \
h = t4 ^ t16
/* 15 terms */
#define sb4(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = d & t1; \
t3 = c ^ t2; \
t4 = b | t3; \
h = t1 ^ t4; \
t6 = ~b; \
t7 = t1 | t6; \
e = t3 ^ t7; \
t9 = a & e; \
t10 = t1 ^ t6; \
t11 = t4 & t10; \
g = t9 ^ t11; \
t13 = a ^ t3; \
t14 = t10 & g; \
f = t13 ^ t14
/* 17 terms */
#define ib4(a,b,c,d,e,f,g,h) \
t1 = c ^ d; \
t2 = c | d; \
t3 = b ^ t2; \
t4 = a & t3; \
f = t1 ^ t4; \
t6 = a ^ d; \
t7 = b | d; \
t8 = t6 & t7; \
h = t3 ^ t8; \
t10 = ~a; \
t11 = c ^ h; \
t12 = t10 | t11;\
e = t3 ^ t12; \
t14 = c | t4; \
t15 = t7 ^ t14; \
t16 = h | t10; \
g = t15 ^ t16
/* 16 terms */
#define sb5(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = a ^ d; \
t4 = c ^ t1; \
t5 = t2 | t3; \
e = t4 ^ t5; \
t7 = d & e; \
t8 = t2 ^ e; \
t10 = t1 | e; \
f = t7 ^ t8; \
t11 = t2 | t7; \
t12 = t3 ^ t10; \
t14 = b ^ t7; \
g = t11 ^ t12; \
t15 = f & t12; \
h = t14 ^ t15
/* 16 terms */
#define ib5(a,b,c,d,e,f,g,h) \
t1 = ~c; \
t2 = b & t1; \
t3 = d ^ t2; \
t4 = a & t3; \
t5 = b ^ t1; \
h = t4 ^ t5; \
t7 = b | h; \
t8 = a & t7; \
f = t3 ^ t8; \
t10 = a | d; \
t11 = t1 ^ t7; \
e = t10 ^ t11; \
t13 = a ^ c; \
t14 = b & t10; \
t15 = t4 | t13; \
g = t14 ^ t15
/* 15 terms */
#define sb6(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ d; \
t3 = b ^ t2; \
t4 = t1 | t2; \
t5 = c ^ t4; \
f = b ^ t5; \
t13 = ~t5; \
t7 = t2 | f; \
t8 = d ^ t7; \
t9 = t5 & t8; \
g = t3 ^ t9; \
t11 = t5 ^ t8; \
e = g ^ t11; \
t14 = t3 & t11; \
h = t13 ^ t14
/* 15 terms */
#define ib6(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = c ^ t2; \
t4 = c | t1; \
t5 = d ^ t4; \
t13 = d & t1; \
f = t3 ^ t5; \
t7 = t3 & t5; \
t8 = t2 ^ t7; \
t9 = b | t8; \
h = t5 ^ t9; \
t11 = b | h; \
e = t8 ^ t11; \
t14 = t3 ^ t11; \
g = t13 ^ t14
/* 17 terms */
#define sb7(a,b,c,d,e,f,g,h) \
t1 = ~c; \
t2 = b ^ c; \
t3 = b | t1; \
t4 = d ^ t3; \
t5 = a & t4; \
t7 = a ^ d; \
h = t2 ^ t5; \
t8 = b ^ t5; \
t9 = t2 | t8; \
t11 = d & t3; \
f = t7 ^ t9; \
t12 = t5 ^ f; \
t15 = t1 | t4; \
t13 = h & t12; \
g = t11 ^ t13; \
t16 = t12 ^ g; \
e = t15 ^ t16
/* 17 terms */
#define ib7(a,b,c,d,e,f,g,h) \
t1 = a & b; \
t2 = a | b; \
t3 = c | t1; \
t4 = d & t2; \
h = t3 ^ t4; \
t6 = ~d; \
t7 = b ^ t4; \
t8 = h ^ t6; \
t11 = c ^ t7; \
t9 = t7 | t8; \
f = a ^ t9; \
t12 = d | f; \
e = t11 ^ t12; \
t14 = a & h; \
t15 = t3 ^ f; \
t16 = e ^ t14; \
g = t15 ^ t16
#define k_xor(r,a,b,c,d) \
a ^= ks[4 * r + 8]; \
b ^= ks[4 * r + 9]; \
c ^= ks[4 * r + 10]; \
d ^= ks[4 * r + 11]
#define k_set(r,a,b,c,d) \
a = ks[4 * r + 8]; \
b = ks[4 * r + 9]; \
c = ks[4 * r + 10]; \
d = ks[4 * r + 11]
#define k_get(r,a,b,c,d) \
ks[4 * r + 8] = a; \
ks[4 * r + 9] = b; \
ks[4 * r + 10] = c; \
ks[4 * r + 11] = d
/* the linear transformation and its inverse */
#define rot(a,b,c,d) \
a = hc_rotl32_S(a, 13); \
c = hc_rotl32_S(c, 3); \
d ^= c ^ (a << 3); \
b ^= a ^ c; \
d = hc_rotl32_S(d, 7); \
b = hc_rotl32_S(b, 1); \
a ^= b ^ d; \
c ^= d ^ (b << 7); \
a = hc_rotl32_S(a, 5); \
c = hc_rotl32_S(c, 22)
#define irot(a,b,c,d) \
c = hc_rotr32_S(c, 22); \
a = hc_rotr32_S(a, 5); \
c ^= d ^ (b << 7); \
a ^= b ^ d; \
d = hc_rotr32_S(d, 7); \
b = hc_rotr32_S(b, 1); \
d ^= c ^ (a << 3); \
b ^= a ^ c; \
c = hc_rotr32_S(c, 3); \
a = hc_rotr32_S(a, 13)
// 128 bit key
DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey)
@ -586,3 +971,25 @@ DECLSPEC void serpent256_decrypt (const u32 *ks, const u32 *in, u32 *out)
out[2] = c;
out[3] = d;
}
#undef sb0
#undef ib0
#undef sb1
#undef ib1
#undef sb2
#undef ib2
#undef sb3
#undef ib3
#undef sb4
#undef ib4
#undef sb5
#undef ib5
#undef sb6
#undef ib6
#undef sb7
#undef ib7
#undef k_xor
#undef k_set
#undef k_get
#undef rot
#undef irot

@ -6,391 +6,6 @@
#ifndef _INC_CIPHER_SERPENT_H
#define _INC_CIPHER_SERPENT_H
/* 15 terms */
#define sb0(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = a & d; \
t3 = c ^ t1; \
t6 = b & t1; \
t4 = b ^ t3; \
t10 = ~t3; \
h = t2 ^ t4; \
t7 = a ^ t6; \
t14 = ~t7; \
t8 = c | t7; \
t11 = t3 ^ t7; \
g = t4 ^ t8; \
t12 = h & t11; \
f = t10 ^ t12; \
e = t12 ^ t14
/* 15 terms */
#define ib0(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = t1 | t2; \
t4 = d ^ t3; \
t7 = d & t2; \
t5 = c ^ t4; \
t8 = t1 ^ t7; \
g = t2 ^ t5; \
t11 = a & t4; \
t9 = g & t8; \
t14 = t5 ^ t8; \
f = t4 ^ t9; \
t12 = t5 | f; \
h = t11 ^ t12; \
e = h ^ t14
/* 14 terms! */
#define sb1(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = b ^ t1; \
t3 = a | t2; \
t4 = d | t2; \
t5 = c ^ t3; \
g = d ^ t5; \
t7 = b ^ t4; \
t8 = t2 ^ g; \
t9 = t5 & t7; \
h = t8 ^ t9; \
t11 = t5 ^ t7; \
f = h ^ t11; \
t13 = t8 & t11; \
e = t5 ^ t13
/* 17 terms */
#define ib1(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = a & b; \
t3 = b ^ c; \
t4 = a ^ t3; \
t5 = b | d; \
t7 = c | t1; \
h = t4 ^ t5; \
t8 = b ^ t7; \
t11 = ~t2; \
t9 = t4 & t8; \
f = t1 ^ t9; \
t13 = t9 ^ t11; \
t12 = h & f; \
g = t12 ^ t13; \
t15 = a & d; \
t16 = c ^ t13; \
e = t15 ^ t16
/* 16 terms */
#define sb2(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = b ^ d; \
t3 = c & t1; \
t13 = d | t1; \
e = t2 ^ t3; \
t5 = c ^ t1; \
t6 = c ^ e; \
t7 = b & t6; \
t10 = e | t5; \
h = t5 ^ t7; \
t9 = d | t7; \
t11 = t9 & t10; \
t14 = t2 ^ h; \
g = a ^ t11; \
t15 = g ^ t13; \
f = t14 ^ t15
/* 16 terms */
#define ib2(a,b,c,d,e,f,g,h) \
t1 = b ^ d; \
t2 = ~t1; \
t3 = a ^ c; \
t4 = c ^ t1; \
t7 = a | t2; \
t5 = b & t4; \
t8 = d ^ t7; \
t11 = ~t4; \
e = t3 ^ t5; \
t9 = t3 | t8; \
t14 = d & t11; \
h = t1 ^ t9; \
t12 = e | h; \
f = t11 ^ t12; \
t15 = t3 ^ t12; \
g = t14 ^ t15
/* 17 terms */
#define sb3(a,b,c,d,e,f,g,h) \
t1 = a ^ c; \
t2 = d ^ t1; \
t3 = a & t2; \
t4 = d ^ t3; \
t5 = b & t4; \
g = t2 ^ t5; \
t7 = a | g; \
t8 = b | d; \
t11 = a | d; \
t9 = t4 & t7; \
f = t8 ^ t9; \
t12 = b ^ t11; \
t13 = g ^ t9; \
t15 = t3 ^ t8; \
h = t12 ^ t13; \
t16 = c & t15; \
e = t12 ^ t16
/* 16 term solution that performs less well than 17 term one
in my environment (PPro/PII)
#define sb3(a,b,c,d,e,f,g,h) \
t1 = a ^ b; \
t2 = a & c; \
t3 = a | d; \
t4 = c ^ d; \
t5 = t1 & t3; \
t6 = t2 | t5; \
g = t4 ^ t6; \
t8 = b ^ t3; \
t9 = t6 ^ t8; \
t10 = t4 & t9; \
e = t1 ^ t10; \
t12 = g & e; \
f = t9 ^ t12; \
t14 = b | d; \
t15 = t4 ^ t12; \
h = t14 ^ t15
*/
/* 17 terms */
#define ib3(a,b,c,d,e,f,g,h) \
t1 = b ^ c; \
t2 = b | c; \
t3 = a ^ c; \
t7 = a ^ d; \
t4 = t2 ^ t3; \
t5 = d | t4; \
t9 = t2 ^ t7; \
e = t1 ^ t5; \
t8 = t1 | t5; \
t11 = a & t4; \
g = t8 ^ t9; \
t12 = e | t9; \
f = t11 ^ t12; \
t14 = a & g; \
t15 = t2 ^ t14; \
t16 = e & t15; \
h = t4 ^ t16
/* 15 terms */
#define sb4(a,b,c,d,e,f,g,h) \
t1 = a ^ d; \
t2 = d & t1; \
t3 = c ^ t2; \
t4 = b | t3; \
h = t1 ^ t4; \
t6 = ~b; \
t7 = t1 | t6; \
e = t3 ^ t7; \
t9 = a & e; \
t10 = t1 ^ t6; \
t11 = t4 & t10; \
g = t9 ^ t11; \
t13 = a ^ t3; \
t14 = t10 & g; \
f = t13 ^ t14
/* 17 terms */
#define ib4(a,b,c,d,e,f,g,h) \
t1 = c ^ d; \
t2 = c | d; \
t3 = b ^ t2; \
t4 = a & t3; \
f = t1 ^ t4; \
t6 = a ^ d; \
t7 = b | d; \
t8 = t6 & t7; \
h = t3 ^ t8; \
t10 = ~a; \
t11 = c ^ h; \
t12 = t10 | t11;\
e = t3 ^ t12; \
t14 = c | t4; \
t15 = t7 ^ t14; \
t16 = h | t10; \
g = t15 ^ t16
/* 16 terms */
#define sb5(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = a ^ d; \
t4 = c ^ t1; \
t5 = t2 | t3; \
e = t4 ^ t5; \
t7 = d & e; \
t8 = t2 ^ e; \
t10 = t1 | e; \
f = t7 ^ t8; \
t11 = t2 | t7; \
t12 = t3 ^ t10; \
t14 = b ^ t7; \
g = t11 ^ t12; \
t15 = f & t12; \
h = t14 ^ t15
/* 16 terms */
#define ib5(a,b,c,d,e,f,g,h) \
t1 = ~c; \
t2 = b & t1; \
t3 = d ^ t2; \
t4 = a & t3; \
t5 = b ^ t1; \
h = t4 ^ t5; \
t7 = b | h; \
t8 = a & t7; \
f = t3 ^ t8; \
t10 = a | d; \
t11 = t1 ^ t7; \
e = t10 ^ t11; \
t13 = a ^ c; \
t14 = b & t10; \
t15 = t4 | t13; \
g = t14 ^ t15
/* 15 terms */
#define sb6(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ d; \
t3 = b ^ t2; \
t4 = t1 | t2; \
t5 = c ^ t4; \
f = b ^ t5; \
t13 = ~t5; \
t7 = t2 | f; \
t8 = d ^ t7; \
t9 = t5 & t8; \
g = t3 ^ t9; \
t11 = t5 ^ t8; \
e = g ^ t11; \
t14 = t3 & t11; \
h = t13 ^ t14
/* 15 terms */
#define ib6(a,b,c,d,e,f,g,h) \
t1 = ~a; \
t2 = a ^ b; \
t3 = c ^ t2; \
t4 = c | t1; \
t5 = d ^ t4; \
t13 = d & t1; \
f = t3 ^ t5; \
t7 = t3 & t5; \
t8 = t2 ^ t7; \
t9 = b | t8; \
h = t5 ^ t9; \
t11 = b | h; \
e = t8 ^ t11; \
t14 = t3 ^ t11; \
g = t13 ^ t14
/* 17 terms */
#define sb7(a,b,c,d,e,f,g,h) \
t1 = ~c; \
t2 = b ^ c; \
t3 = b | t1; \
t4 = d ^ t3; \
t5 = a & t4; \
t7 = a ^ d; \
h = t2 ^ t5; \
t8 = b ^ t5; \
t9 = t2 | t8; \
t11 = d & t3; \
f = t7 ^ t9; \
t12 = t5 ^ f; \
t15 = t1 | t4; \
t13 = h & t12; \
g = t11 ^ t13; \
t16 = t12 ^ g; \
e = t15 ^ t16
/* 17 terms */
#define ib7(a,b,c,d,e,f,g,h) \
t1 = a & b; \
t2 = a | b; \
t3 = c | t1; \
t4 = d & t2; \
h = t3 ^ t4; \
t6 = ~d; \
t7 = b ^ t4; \
t8 = h ^ t6; \
t11 = c ^ t7; \
t9 = t7 | t8; \
f = a ^ t9; \
t12 = d | f; \
e = t11 ^ t12; \
t14 = a & h; \
t15 = t3 ^ f; \
t16 = e ^ t14; \
g = t15 ^ t16
#define k_xor(r,a,b,c,d) \
a ^= ks[4 * r + 8]; \
b ^= ks[4 * r + 9]; \
c ^= ks[4 * r + 10]; \
d ^= ks[4 * r + 11]
#define k_set(r,a,b,c,d) \
a = ks[4 * r + 8]; \
b = ks[4 * r + 9]; \
c = ks[4 * r + 10]; \
d = ks[4 * r + 11]
#define k_get(r,a,b,c,d) \
ks[4 * r + 8] = a; \
ks[4 * r + 9] = b; \
ks[4 * r + 10] = c; \
ks[4 * r + 11] = d
/* the linear transformation and its inverse */
#define rot(a,b,c,d) \
a = hc_rotl32_S(a, 13); \
c = hc_rotl32_S(c, 3); \
d ^= c ^ (a << 3); \
b ^= a ^ c; \
d = hc_rotl32_S(d, 7); \
b = hc_rotl32_S(b, 1); \
a ^= b ^ d; \
c ^= d ^ (b << 7); \
a = hc_rotl32_S(a, 5); \
c = hc_rotl32_S(c, 22)
#define irot(a,b,c,d) \
c = hc_rotr32_S(c, 22); \
a = hc_rotr32_S(a, 5); \
c ^= d ^ (b << 7); \
a ^= b ^ d; \
d = hc_rotr32_S(d, 7); \
b = hc_rotr32_S(b, 1); \
d ^= c ^ (a << 3); \
b ^= a ^ c; \
c = hc_rotr32_S(c, 3); \
a = hc_rotr32_S(a, 13)
DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey);
DECLSPEC void serpent128_encrypt (const u32 *ks, const u32 *in, u32 *out);
DECLSPEC void serpent128_decrypt (const u32 *ks, const u32 *in, u32 *out);

@ -24,6 +24,82 @@
#include "inc_common.h"
#include "inc_cipher_twofish.h"
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
#define g1_fun128(x) \
(mds (0, q20 (extract_byte (x, 3), sk)) ^ \
mds (1, q21 (extract_byte (x, 0), sk)) ^ \
mds (2, q22 (extract_byte (x, 1), sk)) ^ \
mds (3, q23 (extract_byte (x, 2), sk)))
#define g0_fun128(x) \
(mds (0, q20 (extract_byte (x, 0), sk)) ^ \
mds (1, q21 (extract_byte (x, 1), sk)) ^ \
mds (2, q22 (extract_byte (x, 2), sk)) ^ \
mds (3, q23 (extract_byte (x, 3), sk)))
#define f_rnd128(i) \
{ \
u32 t0 = g0_fun128 (data[0]); \
u32 t1 = g1_fun128 (data[1]); \
data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \
data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \
u32 t2 = g0_fun128 (data[2]); \
u32 t3 = g1_fun128 (data[3]); \
data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \
data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \
}
#define i_rnd128(i) \
{ \
u32 t0 = g0_fun128 (data[0]); \
u32 t1 = g1_fun128 (data[1]); \
data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \
data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \
u32 t2 = g0_fun128 (data[2]); \
u32 t3 = g1_fun128 (data[3]); \
data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \
data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \
}
#define f_rnd256(i) \
{ \
u32 t0 = g0_fun256 (data[0]); \
u32 t1 = g1_fun256 (data[1]); \
data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \
data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \
u32 t2 = g0_fun256 (data[2]); \
u32 t3 = g1_fun256 (data[3]); \
data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \
data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \
}
#define i_rnd256(i) \
{ \
u32 t0 = g0_fun256 (data[0]); \
u32 t1 = g1_fun256 (data[1]); \
data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \
data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \
u32 t2 = g0_fun256 (data[2]); \
u32 t3 = g1_fun256 (data[3]); \
data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \
data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \
}
#define q(n,x) q_tab[n][x]
#define mds(n,x) m_tab[n][x]
#define q20(x,k) q (0, q (0, x) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0)
#define q21(x,k) q (0, q (1, x) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1)
#define q22(x,k) q (1, q (0, x) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2)
#define q23(x,k) q (1, q (1, x) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3)
#define q40(x,k) q (0, q (0, q (1, q (1, x) ^ extract_byte (k[3], 0)) ^ extract_byte (k[2], 0)) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0)
#define q41(x,k) q (0, q (1, q (1, q (0, x) ^ extract_byte (k[3], 1)) ^ extract_byte (k[2], 1)) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1)
#define q42(x,k) q (1, q (0, q (0, q (0, x) ^ extract_byte (k[3], 2)) ^ extract_byte (k[2], 2)) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2)
#define q43(x,k) q (1, q (1, q (0, q (1, x) ^ extract_byte (k[3], 3)) ^ extract_byte (k[2], 3)) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3)
DECLSPEC u32 mds_rem (u32 p0, u32 p1)
{
#define G_MOD 0x14d
@ -274,3 +350,23 @@ DECLSPEC void twofish256_decrypt (const u32 *sk, const u32 *lk, const u32 *in, u
out[2] = data[0] ^ lk[2];
out[3] = data[1] ^ lk[3];
}
#define g1_fun128
#define g0_fun128
#define f_rnd128
#define i_rnd128
#define f_rnd256
#define i_rnd256
#define q
#define mds
#define q20
#define q21
#define q22
#define q23
#define q40
#define q41
#define q42
#define q43

@ -6,70 +6,6 @@
#ifndef _INC_CIPHER_TWOFISH_H
#define _INC_CIPHER_TWOFISH_H
#define extract_byte(x,n) (((x) >> (8 * (n))) & 0xff)
// 128 bit key
#define g1_fun128(x) \
(mds (0, q20 (extract_byte (x, 3), sk)) ^ \
mds (1, q21 (extract_byte (x, 0), sk)) ^ \
mds (2, q22 (extract_byte (x, 1), sk)) ^ \
mds (3, q23 (extract_byte (x, 2), sk)))
#define g0_fun128(x) \
(mds (0, q20 (extract_byte (x, 0), sk)) ^ \
mds (1, q21 (extract_byte (x, 1), sk)) ^ \
mds (2, q22 (extract_byte (x, 2), sk)) ^ \
mds (3, q23 (extract_byte (x, 3), sk)))
#define f_rnd128(i) \
{ \
u32 t0 = g0_fun128 (data[0]); \
u32 t1 = g1_fun128 (data[1]); \
data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \
data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \
u32 t2 = g0_fun128 (data[2]); \
u32 t3 = g1_fun128 (data[3]); \
data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \
data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \
}
#define i_rnd128(i) \
{ \
u32 t0 = g0_fun128 (data[0]); \
u32 t1 = g1_fun128 (data[1]); \
data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \
data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \
u32 t2 = g0_fun128 (data[2]); \
u32 t3 = g1_fun128 (data[3]); \
data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \
data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \
}
#define f_rnd256(i) \
{ \
u32 t0 = g0_fun256 (data[0]); \
u32 t1 = g1_fun256 (data[1]); \
data[2] = hc_rotr32_S (data[2] ^ (t0 + t1 + lk[4 * (i) + 8]), 1); \
data[3] = hc_rotl32_S (data[3], 1) ^ (t0 + 2 * t1 + lk[4 * (i) + 9]); \
u32 t2 = g0_fun256 (data[2]); \
u32 t3 = g1_fun256 (data[3]); \
data[0] = hc_rotr32_S (data[0] ^ (t2 + t3 + lk[4 * (i) + 10]), 1); \
data[1] = hc_rotl32_S (data[1], 1) ^ (t2 + 2 * t3 + lk[4 * (i) + 11]); \
}
#define i_rnd256(i) \
{ \
u32 t0 = g0_fun256 (data[0]); \
u32 t1 = g1_fun256 (data[1]); \
data[2] = hc_rotl32_S (data[2], 1) ^ (t0 + t1 + lk[4 * (i) + 10]); \
data[3] = hc_rotr32_S (data[3] ^ (t0 + 2 * t1 + lk[4 * (i) + 11]), 1); \
u32 t2 = g0_fun256 (data[2]); \
u32 t3 = g1_fun256 (data[3]); \
data[0] = hc_rotl32_S (data[0], 1) ^ (t2 + t3 + lk[4 * (i) + 8]); \
data[1] = hc_rotr32_S (data[1] ^ (t2 + 2 * t3 + lk[4 * (i) + 9]), 1); \
}
CONSTANT_AS u32a q_tab[2][256] =
{
{
@ -122,8 +58,6 @@ CONSTANT_AS u32a q_tab[2][256] =
}
};
#define q(n,x) q_tab[n][x]
CONSTANT_AS u32a m_tab[4][256] =
{
{ 0xBCBC3275, 0xECEC21F3, 0x202043C6, 0xB3B3C9F4, 0xDADA03DB, 0x02028B7B,
@ -303,18 +237,6 @@ CONSTANT_AS u32a m_tab[4][256] =
0xECC94AEC, 0xFDD25EFD, 0xAB7FC1AB, 0xD8A8E0D8 }
};
#define mds(n,x) m_tab[n][x]
#define q20(x,k) q (0, q (0, x) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0)
#define q21(x,k) q (0, q (1, x) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1)
#define q22(x,k) q (1, q (0, x) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2)
#define q23(x,k) q (1, q (1, x) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3)
#define q40(x,k) q (0, q (0, q (1, q (1, x) ^ extract_byte (k[3], 0)) ^ extract_byte (k[2], 0)) ^ extract_byte (k[1], 0)) ^ extract_byte (k[0], 0)
#define q41(x,k) q (0, q (1, q (1, q (0, x) ^ extract_byte (k[3], 1)) ^ extract_byte (k[2], 1)) ^ extract_byte (k[1], 1)) ^ extract_byte (k[0], 1)
#define q42(x,k) q (1, q (0, q (0, q (0, x) ^ extract_byte (k[3], 2)) ^ extract_byte (k[2], 2)) ^ extract_byte (k[1], 2)) ^ extract_byte (k[0], 2)
#define q43(x,k) q (1, q (1, q (0, q (1, x) ^ extract_byte (k[3], 3)) ^ extract_byte (k[2], 3)) ^ extract_byte (k[1], 3)) ^ extract_byte (k[0], 3)
DECLSPEC u32 mds_rem (u32 p0, u32 p1);
DECLSPEC u32 h_fun128 (u32 *sk, u32 *lk, const u32 x, const u32 *key);
DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey);

@ -8,6 +8,21 @@
#include "inc_common.h"
#include "inc_rp.h"
#ifdef REAL_SHM
#define COPY_PW(x) \
LOCAL_AS pw_t s_pws[64]; \
s_pws[get_local_id (0)] = (x);
#else
#define COPY_PW(x) \
pw_t pw = (x);
#endif
#ifdef REAL_SHM
#define PASTE_PW s_pws[get_local_id(0)];
#else
#define PASTE_PW pw;
#endif
DECLSPEC u32 generate_cmask (const u32 value)
{
const u32 rmask = ((value & 0x40404040u) >> 1u)
@ -750,3 +765,6 @@ DECLSPEC int apply_rules (CONSTANT_AS const u32 *cmds, u32 *buf, const int in_le
return out_len;
}
#undef COPY_PW
#undef PASTE_PW

@ -10,21 +10,6 @@
#define MAYBE_UNUSED
#endif
#ifdef REAL_SHM
#define COPY_PW(x) \
LOCAL_AS pw_t s_pws[64]; \
s_pws[get_local_id (0)] = (x);
#else
#define COPY_PW(x) \
pw_t pw = (x);
#endif
#ifdef REAL_SHM
#define PASTE_PW s_pws[get_local_id(0)];
#else
#define PASTE_PW pw;
#endif
#define RULE_OP_MANGLE_NOOP ':'
#define RULE_OP_MANGLE_LREST 'l'
#define RULE_OP_MANGLE_UREST 'u'

@ -11,7 +11,7 @@
#include "inc_cipher_twofish.h"
#include "inc_cipher_camellia.h"
#include "inc_cipher_kuznyechik.h"
#include "inc_hash_veracrypt_xts.h"
#include "inc_veracrypt_xts.h"
DECLSPEC void camellia256_decrypt_xts_first (const u32 *ukey1, const u32 *ukey2, const u32 *in, u32 *out, u32 *S, u32 *T, u32 *ks)
{

Loading…
Cancel
Save