|
|
|
@ -700,9 +700,6 @@ DECLSPEC void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S
|
|
|
|
|
ks[2] = ukey[2];
|
|
|
|
|
ks[3] = ukey[3];
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 0, j = 0; i < 10; i += 1, j += 4)
|
|
|
|
|
{
|
|
|
|
|
u32 temp = ks[j + 3];
|
|
|
|
@ -748,9 +745,6 @@ DECLSPEC void aes128_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te
|
|
|
|
|
temp = ks[18]; ks[18] = ks[26]; ks[26] = temp;
|
|
|
|
|
temp = ks[19]; ks[19] = ks[27]; ks[27] = temp;
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 1, j = 4; i < 10; i += 1, j += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (ks[j + 0] >> 0) & 0xff;
|
|
|
|
@ -827,83 +821,82 @@ DECLSPEC void aes128_set_decrypt_key (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_
|
|
|
|
|
DECLSPEC void aes128_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4);
|
|
|
|
|
DECLSPEC void aes128_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
|
|
|
|
{
|
|
|
|
|
const u32 in_s0 = swap32_S (in[0]);
|
|
|
|
|
const u32 in_s1 = swap32_S (in[1]);
|
|
|
|
|
const u32 in_s2 = swap32_S (in[2]);
|
|
|
|
|
const u32 in_s3 = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
u32 t0 = in_s0 ^ ks[0];
|
|
|
|
|
u32 t1 = in_s1 ^ ks[1];
|
|
|
|
|
u32 t2 = in_s2 ^ ks[2];
|
|
|
|
|
u32 t3 = in_s3 ^ ks[3];
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 4; i < 40; i += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
t0 = s_te0[x0s3] ^ s_te1[x1s2] ^ s_te2[x2s1] ^ s_te3[x3s0] ^ ks[i + 0];
|
|
|
|
|
t1 = s_te0[x1s3] ^ s_te1[x2s2] ^ s_te2[x3s1] ^ s_te3[x0s0] ^ ks[i + 1];
|
|
|
|
|
t2 = s_te0[x2s3] ^ s_te1[x3s2] ^ s_te2[x0s1] ^ s_te3[x1s0] ^ ks[i + 2];
|
|
|
|
|
t3 = s_te0[x3s3] ^ s_te1[x0s2] ^ s_te2[x1s1] ^ s_te3[x2s0] ^ ks[i + 3];
|
|
|
|
|
}
|
|
|
|
|
u32 in_s[4];
|
|
|
|
|
|
|
|
|
|
in_s[0] = swap32_S (in[0]);
|
|
|
|
|
in_s[1] = swap32_S (in[1]);
|
|
|
|
|
in_s[2] = swap32_S (in[2]);
|
|
|
|
|
in_s[3] = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
out[0] = (s_te4[x0s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x1s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x2s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x3s0] & 0x000000ff)
|
|
|
|
|
u32 s0 = in_s[0] ^ ks[0];
|
|
|
|
|
u32 s1 = in_s[1] ^ ks[1];
|
|
|
|
|
u32 s2 = in_s[2] ^ ks[2];
|
|
|
|
|
u32 s3 = in_s[3] ^ ks[3];
|
|
|
|
|
|
|
|
|
|
u32 t0;
|
|
|
|
|
u32 t1;
|
|
|
|
|
u32 t2;
|
|
|
|
|
u32 t3;
|
|
|
|
|
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[ 4];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[ 5];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[ 6];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[ 7];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[ 8];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[ 9];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[10];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[11];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[12];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[13];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[14];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[15];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[16];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[17];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[18];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[19];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[20];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[21];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[22];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[23];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[24];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[25];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[26];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[27];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[28];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[29];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[30];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[31];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[32];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[33];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[34];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[35];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[36];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[37];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[38];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[39];
|
|
|
|
|
|
|
|
|
|
out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[40];
|
|
|
|
|
|
|
|
|
|
out[1] = (s_te4[x1s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x2s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x3s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x0s0] & 0x000000ff)
|
|
|
|
|
out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[41];
|
|
|
|
|
|
|
|
|
|
out[2] = (s_te4[x2s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x3s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x0s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x1s0] & 0x000000ff)
|
|
|
|
|
out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[42];
|
|
|
|
|
|
|
|
|
|
out[3] = (s_te4[x3s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x0s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x1s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x2s0] & 0x000000ff)
|
|
|
|
|
out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[43];
|
|
|
|
|
|
|
|
|
|
out[0] = swap32_S (out[0]);
|
|
|
|
@ -915,83 +908,82 @@ DECLSPEC void aes128_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u
|
|
|
|
|
DECLSPEC void aes128_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4);
|
|
|
|
|
DECLSPEC void aes128_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4)
|
|
|
|
|
{
|
|
|
|
|
const u32 in_s0 = swap32_S (in[0]);
|
|
|
|
|
const u32 in_s1 = swap32_S (in[1]);
|
|
|
|
|
const u32 in_s2 = swap32_S (in[2]);
|
|
|
|
|
const u32 in_s3 = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
u32 t0 = in_s0 ^ ks[0];
|
|
|
|
|
u32 t1 = in_s1 ^ ks[1];
|
|
|
|
|
u32 t2 = in_s2 ^ ks[2];
|
|
|
|
|
u32 t3 = in_s3 ^ ks[3];
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 4; i < 40; i += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
t0 = s_td0[x0s3] ^ s_td1[x3s2] ^ s_td2[x2s1] ^ s_td3[x1s0] ^ ks[i + 0];
|
|
|
|
|
t1 = s_td0[x1s3] ^ s_td1[x0s2] ^ s_td2[x3s1] ^ s_td3[x2s0] ^ ks[i + 1];
|
|
|
|
|
t2 = s_td0[x2s3] ^ s_td1[x1s2] ^ s_td2[x0s1] ^ s_td3[x3s0] ^ ks[i + 2];
|
|
|
|
|
t3 = s_td0[x3s3] ^ s_td1[x2s2] ^ s_td2[x1s1] ^ s_td3[x0s0] ^ ks[i + 3];
|
|
|
|
|
}
|
|
|
|
|
u32 in_s[4];
|
|
|
|
|
|
|
|
|
|
in_s[0] = swap32_S (in[0]);
|
|
|
|
|
in_s[1] = swap32_S (in[1]);
|
|
|
|
|
in_s[2] = swap32_S (in[2]);
|
|
|
|
|
in_s[3] = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
out[0] = (s_td4[x0s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x3s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x2s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x1s0] & 0x000000ff)
|
|
|
|
|
u32 s0 = in_s[0] ^ ks[0];
|
|
|
|
|
u32 s1 = in_s[1] ^ ks[1];
|
|
|
|
|
u32 s2 = in_s[2] ^ ks[2];
|
|
|
|
|
u32 s3 = in_s[3] ^ ks[3];
|
|
|
|
|
|
|
|
|
|
u32 t0;
|
|
|
|
|
u32 t1;
|
|
|
|
|
u32 t2;
|
|
|
|
|
u32 t3;
|
|
|
|
|
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[ 4];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[ 5];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[ 6];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[ 7];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[ 8];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[ 9];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[10];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[11];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[12];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[13];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[14];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[15];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[16];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[17];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[18];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[19];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[20];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[21];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[22];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[23];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[24];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[25];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[26];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[27];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[28];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[29];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[30];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[31];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[32];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[33];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[34];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[35];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[36];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[37];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[38];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[39];
|
|
|
|
|
|
|
|
|
|
out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[40];
|
|
|
|
|
|
|
|
|
|
out[1] = (s_td4[x1s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x0s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x3s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x2s0] & 0x000000ff)
|
|
|
|
|
out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[41];
|
|
|
|
|
|
|
|
|
|
out[2] = (s_td4[x2s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x1s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x0s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x3s0] & 0x000000ff)
|
|
|
|
|
out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[42];
|
|
|
|
|
|
|
|
|
|
out[3] = (s_td4[x3s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x2s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x1s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x0s0] & 0x000000ff)
|
|
|
|
|
out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[43];
|
|
|
|
|
|
|
|
|
|
out[0] = swap32_S (out[0]);
|
|
|
|
@ -1017,9 +1009,6 @@ DECLSPEC void aes256_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S
|
|
|
|
|
int i;
|
|
|
|
|
int j;
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 0, j = 0; i < 7; i += 1, j += 8)
|
|
|
|
|
{
|
|
|
|
|
const u32 temp1 = ks[j + 7];
|
|
|
|
@ -1085,9 +1074,6 @@ DECLSPEC void aes256_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te
|
|
|
|
|
temp = ks[26]; ks[26] = ks[34]; ks[34] = temp;
|
|
|
|
|
temp = ks[27]; ks[27] = ks[35]; ks[35] = temp;
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 1, j = 4; i < 14; i += 1, j += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (ks[j + 0] >> 0) & 0xff;
|
|
|
|
@ -1172,83 +1158,98 @@ DECLSPEC void aes256_set_decrypt_key (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_
|
|
|
|
|
DECLSPEC void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4);
|
|
|
|
|
DECLSPEC void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)
|
|
|
|
|
{
|
|
|
|
|
const u32 in_s0 = swap32_S (in[0]);
|
|
|
|
|
const u32 in_s1 = swap32_S (in[1]);
|
|
|
|
|
const u32 in_s2 = swap32_S (in[2]);
|
|
|
|
|
const u32 in_s3 = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
u32 t0 = in_s0 ^ ks[0];
|
|
|
|
|
u32 t1 = in_s1 ^ ks[1];
|
|
|
|
|
u32 t2 = in_s2 ^ ks[2];
|
|
|
|
|
u32 t3 = in_s3 ^ ks[3];
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 4; i < 56; i += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
t0 = s_te0[x0s3] ^ s_te1[x1s2] ^ s_te2[x2s1] ^ s_te3[x3s0] ^ ks[i + 0];
|
|
|
|
|
t1 = s_te0[x1s3] ^ s_te1[x2s2] ^ s_te2[x3s1] ^ s_te3[x0s0] ^ ks[i + 1];
|
|
|
|
|
t2 = s_te0[x2s3] ^ s_te1[x3s2] ^ s_te2[x0s1] ^ s_te3[x1s0] ^ ks[i + 2];
|
|
|
|
|
t3 = s_te0[x3s3] ^ s_te1[x0s2] ^ s_te2[x1s1] ^ s_te3[x2s0] ^ ks[i + 3];
|
|
|
|
|
}
|
|
|
|
|
u32 in_s[4];
|
|
|
|
|
|
|
|
|
|
in_s[0] = swap32_S (in[0]);
|
|
|
|
|
in_s[1] = swap32_S (in[1]);
|
|
|
|
|
in_s[2] = swap32_S (in[2]);
|
|
|
|
|
in_s[3] = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
out[0] = (s_te4[x0s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x1s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x2s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x3s0] & 0x000000ff)
|
|
|
|
|
u32 s0 = in_s[0] ^ ks[0];
|
|
|
|
|
u32 s1 = in_s[1] ^ ks[1];
|
|
|
|
|
u32 s2 = in_s[2] ^ ks[2];
|
|
|
|
|
u32 s3 = in_s[3] ^ ks[3];
|
|
|
|
|
|
|
|
|
|
u32 t0;
|
|
|
|
|
u32 t1;
|
|
|
|
|
u32 t2;
|
|
|
|
|
u32 t3;
|
|
|
|
|
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[ 4];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[ 5];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[ 6];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[ 7];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[ 8];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[ 9];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[10];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[11];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[12];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[13];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[14];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[15];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[16];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[17];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[18];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[19];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[20];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[21];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[22];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[23];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[24];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[25];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[26];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[27];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[28];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[29];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[30];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[31];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[32];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[33];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[34];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[35];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[36];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[37];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[38];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[39];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[40];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[41];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[42];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[43];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[44];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[45];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[46];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[47];
|
|
|
|
|
s0 = s_te0[t0 >> 24] ^ s_te1[(t1 >> 16) & 0xff] ^ s_te2[(t2 >> 8) & 0xff] ^ s_te3[t3 & 0xff] ^ ks[48];
|
|
|
|
|
s1 = s_te0[t1 >> 24] ^ s_te1[(t2 >> 16) & 0xff] ^ s_te2[(t3 >> 8) & 0xff] ^ s_te3[t0 & 0xff] ^ ks[49];
|
|
|
|
|
s2 = s_te0[t2 >> 24] ^ s_te1[(t3 >> 16) & 0xff] ^ s_te2[(t0 >> 8) & 0xff] ^ s_te3[t1 & 0xff] ^ ks[50];
|
|
|
|
|
s3 = s_te0[t3 >> 24] ^ s_te1[(t0 >> 16) & 0xff] ^ s_te2[(t1 >> 8) & 0xff] ^ s_te3[t2 & 0xff] ^ ks[51];
|
|
|
|
|
t0 = s_te0[s0 >> 24] ^ s_te1[(s1 >> 16) & 0xff] ^ s_te2[(s2 >> 8) & 0xff] ^ s_te3[s3 & 0xff] ^ ks[52];
|
|
|
|
|
t1 = s_te0[s1 >> 24] ^ s_te1[(s2 >> 16) & 0xff] ^ s_te2[(s3 >> 8) & 0xff] ^ s_te3[s0 & 0xff] ^ ks[53];
|
|
|
|
|
t2 = s_te0[s2 >> 24] ^ s_te1[(s3 >> 16) & 0xff] ^ s_te2[(s0 >> 8) & 0xff] ^ s_te3[s1 & 0xff] ^ ks[54];
|
|
|
|
|
t3 = s_te0[s3 >> 24] ^ s_te1[(s0 >> 16) & 0xff] ^ s_te2[(s1 >> 8) & 0xff] ^ s_te3[s2 & 0xff] ^ ks[55];
|
|
|
|
|
|
|
|
|
|
out[0] = (s_te4[(t0 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t1 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t2 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t3 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[56];
|
|
|
|
|
|
|
|
|
|
out[1] = (s_te4[x1s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x2s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x3s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x0s0] & 0x000000ff)
|
|
|
|
|
out[1] = (s_te4[(t1 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t2 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t3 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t0 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[57];
|
|
|
|
|
|
|
|
|
|
out[2] = (s_te4[x2s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x3s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x0s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x1s0] & 0x000000ff)
|
|
|
|
|
out[2] = (s_te4[(t2 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t3 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t0 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t1 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[58];
|
|
|
|
|
|
|
|
|
|
out[3] = (s_te4[x3s3] & 0xff000000)
|
|
|
|
|
^ (s_te4[x0s2] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[x1s1] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[x2s0] & 0x000000ff)
|
|
|
|
|
out[3] = (s_te4[(t3 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_te4[(t0 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_te4[(t1 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_te4[(t2 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[59];
|
|
|
|
|
|
|
|
|
|
out[0] = swap32_S (out[0]);
|
|
|
|
@ -1260,83 +1261,98 @@ DECLSPEC void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u
|
|
|
|
|
DECLSPEC void aes256_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4);
|
|
|
|
|
DECLSPEC void aes256_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4)
|
|
|
|
|
{
|
|
|
|
|
const u32 in_s0 = swap32_S (in[0]);
|
|
|
|
|
const u32 in_s1 = swap32_S (in[1]);
|
|
|
|
|
const u32 in_s2 = swap32_S (in[2]);
|
|
|
|
|
const u32 in_s3 = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
u32 t0 = in_s0 ^ ks[0];
|
|
|
|
|
u32 t1 = in_s1 ^ ks[1];
|
|
|
|
|
u32 t2 = in_s2 ^ ks[2];
|
|
|
|
|
u32 t3 = in_s3 ^ ks[3];
|
|
|
|
|
|
|
|
|
|
#ifdef _unroll
|
|
|
|
|
#pragma unroll
|
|
|
|
|
#endif
|
|
|
|
|
for (int i = 4; i < 56; i += 4)
|
|
|
|
|
{
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
t0 = s_td0[x0s3] ^ s_td1[x3s2] ^ s_td2[x2s1] ^ s_td3[x1s0] ^ ks[i + 0];
|
|
|
|
|
t1 = s_td0[x1s3] ^ s_td1[x0s2] ^ s_td2[x3s1] ^ s_td3[x2s0] ^ ks[i + 1];
|
|
|
|
|
t2 = s_td0[x2s3] ^ s_td1[x1s2] ^ s_td2[x0s1] ^ s_td3[x3s0] ^ ks[i + 2];
|
|
|
|
|
t3 = s_td0[x3s3] ^ s_td1[x2s2] ^ s_td2[x1s1] ^ s_td3[x0s0] ^ ks[i + 3];
|
|
|
|
|
}
|
|
|
|
|
u32 in_s[4];
|
|
|
|
|
|
|
|
|
|
in_s[0] = swap32_S (in[0]);
|
|
|
|
|
in_s[1] = swap32_S (in[1]);
|
|
|
|
|
in_s[2] = swap32_S (in[2]);
|
|
|
|
|
in_s[3] = swap32_S (in[3]);
|
|
|
|
|
|
|
|
|
|
const u32 x0s0 = (t0 >> 0) & 0xff;
|
|
|
|
|
const u32 x0s1 = (t0 >> 8) & 0xff;
|
|
|
|
|
const u32 x0s2 = (t0 >> 16) & 0xff;
|
|
|
|
|
const u32 x0s3 = (t0 >> 24) & 0xff;
|
|
|
|
|
const u32 x1s0 = (t1 >> 0) & 0xff;
|
|
|
|
|
const u32 x1s1 = (t1 >> 8) & 0xff;
|
|
|
|
|
const u32 x1s2 = (t1 >> 16) & 0xff;
|
|
|
|
|
const u32 x1s3 = (t1 >> 24) & 0xff;
|
|
|
|
|
const u32 x2s0 = (t2 >> 0) & 0xff;
|
|
|
|
|
const u32 x2s1 = (t2 >> 8) & 0xff;
|
|
|
|
|
const u32 x2s2 = (t2 >> 16) & 0xff;
|
|
|
|
|
const u32 x2s3 = (t2 >> 24) & 0xff;
|
|
|
|
|
const u32 x3s0 = (t3 >> 0) & 0xff;
|
|
|
|
|
const u32 x3s1 = (t3 >> 8) & 0xff;
|
|
|
|
|
const u32 x3s2 = (t3 >> 16) & 0xff;
|
|
|
|
|
const u32 x3s3 = (t3 >> 24) & 0xff;
|
|
|
|
|
|
|
|
|
|
out[0] = (s_td4[x0s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x3s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x2s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x1s0] & 0x000000ff)
|
|
|
|
|
u32 s0 = in_s[0] ^ ks[0];
|
|
|
|
|
u32 s1 = in_s[1] ^ ks[1];
|
|
|
|
|
u32 s2 = in_s[2] ^ ks[2];
|
|
|
|
|
u32 s3 = in_s[3] ^ ks[3];
|
|
|
|
|
|
|
|
|
|
u32 t0;
|
|
|
|
|
u32 t1;
|
|
|
|
|
u32 t2;
|
|
|
|
|
u32 t3;
|
|
|
|
|
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[ 4];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[ 5];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[ 6];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[ 7];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[ 8];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[ 9];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[10];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[11];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[12];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[13];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[14];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[15];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[16];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[17];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[18];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[19];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[20];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[21];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[22];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[23];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[24];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[25];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[26];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[27];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[28];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[29];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[30];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[31];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[32];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[33];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[34];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[35];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[36];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[37];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[38];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[39];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[40];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[41];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[42];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[43];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[44];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[45];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[46];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[47];
|
|
|
|
|
s0 = s_td0[t0 >> 24] ^ s_td1[(t3 >> 16) & 0xff] ^ s_td2[(t2 >> 8) & 0xff] ^ s_td3[t1 & 0xff] ^ ks[48];
|
|
|
|
|
s1 = s_td0[t1 >> 24] ^ s_td1[(t0 >> 16) & 0xff] ^ s_td2[(t3 >> 8) & 0xff] ^ s_td3[t2 & 0xff] ^ ks[49];
|
|
|
|
|
s2 = s_td0[t2 >> 24] ^ s_td1[(t1 >> 16) & 0xff] ^ s_td2[(t0 >> 8) & 0xff] ^ s_td3[t3 & 0xff] ^ ks[50];
|
|
|
|
|
s3 = s_td0[t3 >> 24] ^ s_td1[(t2 >> 16) & 0xff] ^ s_td2[(t1 >> 8) & 0xff] ^ s_td3[t0 & 0xff] ^ ks[51];
|
|
|
|
|
t0 = s_td0[s0 >> 24] ^ s_td1[(s3 >> 16) & 0xff] ^ s_td2[(s2 >> 8) & 0xff] ^ s_td3[s1 & 0xff] ^ ks[52];
|
|
|
|
|
t1 = s_td0[s1 >> 24] ^ s_td1[(s0 >> 16) & 0xff] ^ s_td2[(s3 >> 8) & 0xff] ^ s_td3[s2 & 0xff] ^ ks[53];
|
|
|
|
|
t2 = s_td0[s2 >> 24] ^ s_td1[(s1 >> 16) & 0xff] ^ s_td2[(s0 >> 8) & 0xff] ^ s_td3[s3 & 0xff] ^ ks[54];
|
|
|
|
|
t3 = s_td0[s3 >> 24] ^ s_td1[(s2 >> 16) & 0xff] ^ s_td2[(s1 >> 8) & 0xff] ^ s_td3[s0 & 0xff] ^ ks[55];
|
|
|
|
|
|
|
|
|
|
out[0] = (s_td4[(t0 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t3 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t2 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t1 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[56];
|
|
|
|
|
|
|
|
|
|
out[1] = (s_td4[x1s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x0s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x3s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x2s0] & 0x000000ff)
|
|
|
|
|
out[1] = (s_td4[(t1 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t0 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t3 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t2 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[57];
|
|
|
|
|
|
|
|
|
|
out[2] = (s_td4[x2s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x1s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x0s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x3s0] & 0x000000ff)
|
|
|
|
|
out[2] = (s_td4[(t2 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t1 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t0 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t3 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[58];
|
|
|
|
|
|
|
|
|
|
out[3] = (s_td4[x3s3] & 0xff000000)
|
|
|
|
|
^ (s_td4[x2s2] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[x1s1] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[x0s0] & 0x000000ff)
|
|
|
|
|
out[3] = (s_td4[(t3 >> 24) & 0xff] & 0xff000000)
|
|
|
|
|
^ (s_td4[(t2 >> 16) & 0xff] & 0x00ff0000)
|
|
|
|
|
^ (s_td4[(t1 >> 8) & 0xff] & 0x0000ff00)
|
|
|
|
|
^ (s_td4[(t0 >> 0) & 0xff] & 0x000000ff)
|
|
|
|
|
^ ks[59];
|
|
|
|
|
|
|
|
|
|
out[0] = swap32_S (out[0]);
|
|
|
|
|