1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-22 08:08:10 +00:00

Some cleanup before going to optimize more code in inc_rp.c

This commit is contained in:
jsteube 2016-11-01 13:01:15 +01:00
parent ae5109a5a2
commit f3dd6fe741
3 changed files with 416 additions and 414 deletions

View File

@ -3,123 +3,119 @@
* License.....: MIT
*/
inline u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);
inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4]);
inline u32 generate_cmask (u32 buf)
inline u32 generate_cmask (const u32 value)
{
const u32 rmask = ((buf & 0x40404040) >> 1)
& ~((buf & 0x80808080) >> 2);
const u32 rmask = ((value & 0x40404040u) >> 1u)
& ~((value & 0x80808080u) >> 2u);
const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
const u32 hmask = (value & 0x1f1f1f1fu) + 0x05050505u;
const u32 lmask = (value & 0x1f1f1f1fu) + 0x1f1f1f1fu;
return rmask & ~hmask & lmask;
}
inline void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
inline void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = (1 << ((len % 4) * 8)) - 1;
const u32 tmp = (1u << ((offset & 3u) * 8u)) - 1u;
switch (len / 4)
switch (offset / 4)
{
case 0: w0[0] &= tmp;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 0: buf0[0] &= tmp;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 1: w0[1] &= tmp;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 1: buf0[1] &= tmp;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 2: w0[2] &= tmp;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 2: buf0[2] &= tmp;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 3: w0[3] &= tmp;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 3: buf0[3] &= tmp;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 4: w1[0] &= tmp;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 4: buf1[0] &= tmp;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 5: w1[1] &= tmp;
w1[2] = 0;
w1[3] = 0;
case 5: buf1[1] &= tmp;
buf1[2] = 0;
buf1[3] = 0;
break;
case 6: w1[2] &= tmp;
w1[3] = 0;
case 6: buf1[2] &= tmp;
buf1[3] = 0;
break;
case 7: w1[3] &= tmp;
case 7: buf1[3] &= tmp;
break;
}
}
inline void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
inline void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = ~((1 << ((len % 4) * 8)) - 1);
const u32 tmp = ~((1u << ((offset & 3u) * 8u)) - 1u);
switch (len / 4)
switch (offset / 4)
{
case 0: w0[0] &= tmp;
case 0: buf0[0] &= tmp;
break;
case 1: w0[0] = 0;
w0[1] &= tmp;
case 1: buf0[0] = 0;
buf0[1] &= tmp;
break;
case 2: w0[0] = 0;
w0[1] = 0;
w0[2] &= tmp;
case 2: buf0[0] = 0;
buf0[1] = 0;
buf0[2] &= tmp;
break;
case 3: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] &= tmp;
case 3: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] &= tmp;
break;
case 4: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] &= tmp;
case 4: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] &= tmp;
break;
case 5: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] &= tmp;
case 5: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] &= tmp;
break;
case 6: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] &= tmp;
case 6: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] &= tmp;
break;
case 7: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] &= tmp;
case 7: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] &= tmp;
break;
}
}
@ -738,23 +734,27 @@ inline void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32
}
}
inline void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
inline void append_block1 (const u32 offset, u32 buf0[4], u32 dst1[4], const u32 src_r0)
{
// this version works with 1 byte append only
const u32 tmp = (src_r0 & 0xff) << ((offset & 3) * 8);
const u32 value = src_r0 & 0xff;
dst0[0] |= (offset < 4) ? tmp : 0;
dst0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0;
dst0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0;
dst0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0;
const u32 shift = (offset & 3) * 8;
const u32 tmp = value << shift;
buf0[0] |= (offset < 4) ? tmp : 0;
buf0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0;
buf0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0;
buf0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0;
dst1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0;
dst1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0;
dst1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0;
dst1[3] |= (offset >= 28) ? tmp : 0;
}
inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
inline void append_block8 (const u32 offset, u32 buf0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
{
switch (offset)
{
@ -835,136 +835,136 @@ inline void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[3] = src_l0[3] | src_r0[0] << 24;
buf0[3] = src_l0[3] | src_r0[0] << 24;
break;
case 14:
dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[3] = src_l0[3] | src_r0[0] << 16;
buf0[3] = src_l0[3] | src_r0[0] << 16;
break;
case 13:
dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[3] = src_l0[3] | src_r0[0] << 8;
buf0[3] = src_l0[3] | src_r0[0] << 8;
break;
case 12:
dst1[3] = src_r1[0];
dst1[2] = src_r0[3];
dst1[1] = src_r0[2];
dst1[0] = src_r0[1];
dst0[3] = src_r0[0];
buf0[3] = src_r0[0];
break;
case 11:
dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[2] = src_l0[2] | src_r0[0] << 24;
buf0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
buf0[2] = src_l0[2] | src_r0[0] << 24;
break;
case 10:
dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[2] = src_l0[2] | src_r0[0] << 16;
buf0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
buf0[2] = src_l0[2] | src_r0[0] << 16;
break;
case 9:
dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[2] = src_l0[2] | src_r0[0] << 8;
buf0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
buf0[2] = src_l0[2] | src_r0[0] << 8;
break;
case 8:
dst1[3] = src_r1[1];
dst1[2] = src_r1[0];
dst1[1] = src_r0[3];
dst1[0] = src_r0[2];
dst0[3] = src_r0[1];
dst0[2] = src_r0[0];
buf0[3] = src_r0[1];
buf0[2] = src_r0[0];
break;
case 7:
dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[1] = src_l0[1] | src_r0[0] << 24;
buf0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
buf0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
buf0[1] = src_l0[1] | src_r0[0] << 24;
break;
case 6:
dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[1] = src_l0[1] | src_r0[0] << 16;
buf0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
buf0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
buf0[1] = src_l0[1] | src_r0[0] << 16;
break;
case 5:
dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[1] = src_l0[1] | src_r0[0] << 8;
buf0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
buf0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
buf0[1] = src_l0[1] | src_r0[0] << 8;
break;
case 4:
dst1[3] = src_r1[2];
dst1[2] = src_r1[1];
dst1[1] = src_r1[0];
dst1[0] = src_r0[3];
dst0[3] = src_r0[2];
dst0[2] = src_r0[1];
dst0[1] = src_r0[0];
buf0[3] = src_r0[2];
buf0[2] = src_r0[1];
buf0[1] = src_r0[0];
break;
case 3:
dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1);
dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1);
dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1);
dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1);
dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
dst0[0] = src_l0[0] | src_r0[0] << 24;
buf0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1);
buf0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1);
buf0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1);
buf0[0] = src_l0[0] | src_r0[0] << 24;
break;
case 2:
dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2);
dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2);
dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2);
dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2);
dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
dst0[0] = src_l0[0] | src_r0[0] << 16;
buf0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2);
buf0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2);
buf0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2);
buf0[0] = src_l0[0] | src_r0[0] << 16;
break;
case 1:
dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3);
dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3);
dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3);
dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3);
dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
dst0[0] = src_l0[0] | src_r0[0] << 8;
buf0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3);
buf0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3);
buf0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3);
buf0[0] = src_l0[0] | src_r0[0] << 8;
break;
case 0:
dst1[3] = src_r1[3];
dst1[2] = src_r1[2];
dst1[1] = src_r1[1];
dst1[0] = src_r1[0];
dst0[3] = src_r0[3];
dst0[2] = src_r0[2];
dst0[1] = src_r0[1];
dst0[0] = src_r0[0];
buf0[3] = src_r0[3];
buf0[2] = src_r0[2];
buf0[1] = src_r0[1];
buf0[0] = src_r0[0];
break;
}
}
@ -2531,20 +2531,20 @@ inline u32 apply_rules (const __global u32 *cmds, u32 buf0[4], u32 buf1[4], cons
return out_len;
}
inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x w0[4], u32x w1[4])
inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const u32 pw_len, const __global kernel_rule_t *rules_buf, const u32 il_pos, u32x buf0[4], u32x buf1[4])
{
#if VECT_SIZE == 1
w0[0] = pw_buf0[0];
w0[1] = pw_buf0[1];
w0[2] = pw_buf0[2];
w0[3] = pw_buf0[3];
w1[0] = pw_buf1[0];
w1[1] = pw_buf1[1];
w1[2] = pw_buf1[2];
w1[3] = pw_buf1[3];
buf0[0] = pw_buf0[0];
buf0[1] = pw_buf0[1];
buf0[2] = pw_buf0[2];
buf0[3] = pw_buf0[3];
buf1[0] = pw_buf1[0];
buf1[1] = pw_buf1[1];
buf1[2] = pw_buf1[2];
buf1[3] = pw_buf1[3];
return apply_rules (rules_buf[il_pos].cmds, w0, w1, pw_len);
return apply_rules (rules_buf[il_pos].cmds, buf0, buf1, pw_len);
#else
@ -2573,200 +2573,200 @@ inline u32x apply_rules_vect (const u32 pw_buf0[4], const u32 pw_buf1[4], const
{
#if VECT_SIZE >= 2
case 0:
w0[0].s0 = tmp0[0];
w0[1].s0 = tmp0[1];
w0[2].s0 = tmp0[2];
w0[3].s0 = tmp0[3];
w1[0].s0 = tmp1[0];
w1[1].s0 = tmp1[1];
w1[2].s0 = tmp1[2];
w1[3].s0 = tmp1[3];
buf0[0].s0 = tmp0[0];
buf0[1].s0 = tmp0[1];
buf0[2].s0 = tmp0[2];
buf0[3].s0 = tmp0[3];
buf1[0].s0 = tmp1[0];
buf1[1].s0 = tmp1[1];
buf1[2].s0 = tmp1[2];
buf1[3].s0 = tmp1[3];
out_len.s0 = tmp_len;
break;
case 1:
w0[0].s1 = tmp0[0];
w0[1].s1 = tmp0[1];
w0[2].s1 = tmp0[2];
w0[3].s1 = tmp0[3];
w1[0].s1 = tmp1[0];
w1[1].s1 = tmp1[1];
w1[2].s1 = tmp1[2];
w1[3].s1 = tmp1[3];
buf0[0].s1 = tmp0[0];
buf0[1].s1 = tmp0[1];
buf0[2].s1 = tmp0[2];
buf0[3].s1 = tmp0[3];
buf1[0].s1 = tmp1[0];
buf1[1].s1 = tmp1[1];
buf1[2].s1 = tmp1[2];
buf1[3].s1 = tmp1[3];
out_len.s1 = tmp_len;
break;
#endif
#if VECT_SIZE >= 4
case 2:
w0[0].s2 = tmp0[0];
w0[1].s2 = tmp0[1];
w0[2].s2 = tmp0[2];
w0[3].s2 = tmp0[3];
w1[0].s2 = tmp1[0];
w1[1].s2 = tmp1[1];
w1[2].s2 = tmp1[2];
w1[3].s2 = tmp1[3];
buf0[0].s2 = tmp0[0];
buf0[1].s2 = tmp0[1];
buf0[2].s2 = tmp0[2];
buf0[3].s2 = tmp0[3];
buf1[0].s2 = tmp1[0];
buf1[1].s2 = tmp1[1];
buf1[2].s2 = tmp1[2];
buf1[3].s2 = tmp1[3];
out_len.s2 = tmp_len;
break;
case 3:
w0[0].s3 = tmp0[0];
w0[1].s3 = tmp0[1];
w0[2].s3 = tmp0[2];
w0[3].s3 = tmp0[3];
w1[0].s3 = tmp1[0];
w1[1].s3 = tmp1[1];
w1[2].s3 = tmp1[2];
w1[3].s3 = tmp1[3];
buf0[0].s3 = tmp0[0];
buf0[1].s3 = tmp0[1];
buf0[2].s3 = tmp0[2];
buf0[3].s3 = tmp0[3];
buf1[0].s3 = tmp1[0];
buf1[1].s3 = tmp1[1];
buf1[2].s3 = tmp1[2];
buf1[3].s3 = tmp1[3];
out_len.s3 = tmp_len;
break;
#endif
#if VECT_SIZE >= 8
case 4:
w0[0].s4 = tmp0[0];
w0[1].s4 = tmp0[1];
w0[2].s4 = tmp0[2];
w0[3].s4 = tmp0[3];
w1[0].s4 = tmp1[0];
w1[1].s4 = tmp1[1];
w1[2].s4 = tmp1[2];
w1[3].s4 = tmp1[3];
buf0[0].s4 = tmp0[0];
buf0[1].s4 = tmp0[1];
buf0[2].s4 = tmp0[2];
buf0[3].s4 = tmp0[3];
buf1[0].s4 = tmp1[0];
buf1[1].s4 = tmp1[1];
buf1[2].s4 = tmp1[2];
buf1[3].s4 = tmp1[3];
out_len.s4 = tmp_len;
break;
case 5:
w0[0].s5 = tmp0[0];
w0[1].s5 = tmp0[1];
w0[2].s5 = tmp0[2];
w0[3].s5 = tmp0[3];
w1[0].s5 = tmp1[0];
w1[1].s5 = tmp1[1];
w1[2].s5 = tmp1[2];
w1[3].s5 = tmp1[3];
buf0[0].s5 = tmp0[0];
buf0[1].s5 = tmp0[1];
buf0[2].s5 = tmp0[2];
buf0[3].s5 = tmp0[3];
buf1[0].s5 = tmp1[0];
buf1[1].s5 = tmp1[1];
buf1[2].s5 = tmp1[2];
buf1[3].s5 = tmp1[3];
out_len.s5 = tmp_len;
break;
case 6:
w0[0].s6 = tmp0[0];
w0[1].s6 = tmp0[1];
w0[2].s6 = tmp0[2];
w0[3].s6 = tmp0[3];
w1[0].s6 = tmp1[0];
w1[1].s6 = tmp1[1];
w1[2].s6 = tmp1[2];
w1[3].s6 = tmp1[3];
buf0[0].s6 = tmp0[0];
buf0[1].s6 = tmp0[1];
buf0[2].s6 = tmp0[2];
buf0[3].s6 = tmp0[3];
buf1[0].s6 = tmp1[0];
buf1[1].s6 = tmp1[1];
buf1[2].s6 = tmp1[2];
buf1[3].s6 = tmp1[3];
out_len.s6 = tmp_len;
break;
case 7:
w0[0].s7 = tmp0[0];
w0[1].s7 = tmp0[1];
w0[2].s7 = tmp0[2];
w0[3].s7 = tmp0[3];
w1[0].s7 = tmp1[0];
w1[1].s7 = tmp1[1];
w1[2].s7 = tmp1[2];
w1[3].s7 = tmp1[3];
buf0[0].s7 = tmp0[0];
buf0[1].s7 = tmp0[1];
buf0[2].s7 = tmp0[2];
buf0[3].s7 = tmp0[3];
buf1[0].s7 = tmp1[0];
buf1[1].s7 = tmp1[1];
buf1[2].s7 = tmp1[2];
buf1[3].s7 = tmp1[3];
out_len.s7 = tmp_len;
break;
#endif
#if VECT_SIZE >= 16
case 8:
w0[0].s8 = tmp0[0];
w0[1].s8 = tmp0[1];
w0[2].s8 = tmp0[2];
w0[3].s8 = tmp0[3];
w1[0].s8 = tmp1[0];
w1[1].s8 = tmp1[1];
w1[2].s8 = tmp1[2];
w1[3].s8 = tmp1[3];
buf0[0].s8 = tmp0[0];
buf0[1].s8 = tmp0[1];
buf0[2].s8 = tmp0[2];
buf0[3].s8 = tmp0[3];
buf1[0].s8 = tmp1[0];
buf1[1].s8 = tmp1[1];
buf1[2].s8 = tmp1[2];
buf1[3].s8 = tmp1[3];
out_len.s8 = tmp_len;
break;
case 9:
w0[0].s9 = tmp0[0];
w0[1].s9 = tmp0[1];
w0[2].s9 = tmp0[2];
w0[3].s9 = tmp0[3];
w1[0].s9 = tmp1[0];
w1[1].s9 = tmp1[1];
w1[2].s9 = tmp1[2];
w1[3].s9 = tmp1[3];
buf0[0].s9 = tmp0[0];
buf0[1].s9 = tmp0[1];
buf0[2].s9 = tmp0[2];
buf0[3].s9 = tmp0[3];
buf1[0].s9 = tmp1[0];
buf1[1].s9 = tmp1[1];
buf1[2].s9 = tmp1[2];
buf1[3].s9 = tmp1[3];
out_len.s9 = tmp_len;
break;
case 10:
w0[0].sa = tmp0[0];
w0[1].sa = tmp0[1];
w0[2].sa = tmp0[2];
w0[3].sa = tmp0[3];
w1[0].sa = tmp1[0];
w1[1].sa = tmp1[1];
w1[2].sa = tmp1[2];
w1[3].sa = tmp1[3];
buf0[0].sa = tmp0[0];
buf0[1].sa = tmp0[1];
buf0[2].sa = tmp0[2];
buf0[3].sa = tmp0[3];
buf1[0].sa = tmp1[0];
buf1[1].sa = tmp1[1];
buf1[2].sa = tmp1[2];
buf1[3].sa = tmp1[3];
out_len.sa = tmp_len;
break;
case 11:
w0[0].sb = tmp0[0];
w0[1].sb = tmp0[1];
w0[2].sb = tmp0[2];
w0[3].sb = tmp0[3];
w1[0].sb = tmp1[0];
w1[1].sb = tmp1[1];
w1[2].sb = tmp1[2];
w1[3].sb = tmp1[3];
buf0[0].sb = tmp0[0];
buf0[1].sb = tmp0[1];
buf0[2].sb = tmp0[2];
buf0[3].sb = tmp0[3];
buf1[0].sb = tmp1[0];
buf1[1].sb = tmp1[1];
buf1[2].sb = tmp1[2];
buf1[3].sb = tmp1[3];
out_len.sb = tmp_len;
break;
case 12:
w0[0].sc = tmp0[0];
w0[1].sc = tmp0[1];
w0[2].sc = tmp0[2];
w0[3].sc = tmp0[3];
w1[0].sc = tmp1[0];
w1[1].sc = tmp1[1];
w1[2].sc = tmp1[2];
w1[3].sc = tmp1[3];
buf0[0].sc = tmp0[0];
buf0[1].sc = tmp0[1];
buf0[2].sc = tmp0[2];
buf0[3].sc = tmp0[3];
buf1[0].sc = tmp1[0];
buf1[1].sc = tmp1[1];
buf1[2].sc = tmp1[2];
buf1[3].sc = tmp1[3];
out_len.sc = tmp_len;
break;
case 13:
w0[0].sd = tmp0[0];
w0[1].sd = tmp0[1];
w0[2].sd = tmp0[2];
w0[3].sd = tmp0[3];
w1[0].sd = tmp1[0];
w1[1].sd = tmp1[1];
w1[2].sd = tmp1[2];
w1[3].sd = tmp1[3];
buf0[0].sd = tmp0[0];
buf0[1].sd = tmp0[1];
buf0[2].sd = tmp0[2];
buf0[3].sd = tmp0[3];
buf1[0].sd = tmp1[0];
buf1[1].sd = tmp1[1];
buf1[2].sd = tmp1[2];
buf1[3].sd = tmp1[3];
out_len.sd = tmp_len;
break;
case 14:
w0[0].se = tmp0[0];
w0[1].se = tmp0[1];
w0[2].se = tmp0[2];
w0[3].se = tmp0[3];
w1[0].se = tmp1[0];
w1[1].se = tmp1[1];
w1[2].se = tmp1[2];
w1[3].se = tmp1[3];
buf0[0].se = tmp0[0];
buf0[1].se = tmp0[1];
buf0[2].se = tmp0[2];
buf0[3].se = tmp0[3];
buf1[0].se = tmp1[0];
buf1[1].se = tmp1[1];
buf1[2].se = tmp1[2];
buf1[3].se = tmp1[3];
out_len.se = tmp_len;
break;
case 15:
w0[0].sf = tmp0[0];
w0[1].sf = tmp0[1];
w0[2].sf = tmp0[2];
w0[3].sf = tmp0[3];
w1[0].sf = tmp1[0];
w1[1].sf = tmp1[1];
w1[2].sf = tmp1[2];
w1[3].sf = tmp1[3];
buf0[0].sf = tmp0[0];
buf0[1].sf = tmp0[1];
buf0[2].sf = tmp0[2];
buf0[3].sf = tmp0[3];
buf1[0].sf = tmp1[0];
buf1[1].sf = tmp1[1];
buf1[2].sf = tmp1[2];
buf1[3].sf = tmp1[3];
out_len.sf = tmp_len;
break;
#endif

View File

@ -6,8 +6,6 @@
#ifndef _RP_KERNEL_ON_CPU_H
#define _RP_KERNEL_ON_CPU_H
u32 swap_workaround (const u32 n);
u32 apply_rule (const u32 name, const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len);
u32 apply_rules (u32 *cmds, u32 buf0[4], u32 buf1[4], const u32 len);

View File

@ -9,124 +9,124 @@
#include "rp.h"
#include "rp_kernel_on_cpu.h"
u32 swap_workaround (const u32 n)
static u32 swap32_S (const u32 value)
{
return byte_swap_32 (n);
return byte_swap_32 (value);
}
static u32 generate_cmask (u32 buf)
static u32 generate_cmask (const u32 value)
{
const u32 rmask = ((buf & 0x40404040) >> 1)
& ~((buf & 0x80808080) >> 2);
const u32 rmask = ((value & 0x40404040u) >> 1u)
& ~((value & 0x80808080u) >> 2u);
const u32 hmask = (buf & 0x1f1f1f1f) + 0x05050505;
const u32 lmask = (buf & 0x1f1f1f1f) + 0x1f1f1f1f;
const u32 hmask = (value & 0x1f1f1f1fu) + 0x05050505u;
const u32 lmask = (value & 0x1f1f1f1fu) + 0x1f1f1f1fu;
return rmask & ~hmask & lmask;
}
static void truncate_right (u32 w0[4], u32 w1[4], const u32 len)
static void truncate_right (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = (1u << ((len % 4) * 8)) - 1;
const u32 tmp = (1u << ((offset & 3u) * 8u)) - 1u;
switch (len / 4)
switch (offset / 4)
{
case 0: w0[0] &= tmp;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 0: buf0[0] &= tmp;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 1: w0[1] &= tmp;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 1: buf0[1] &= tmp;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 2: w0[2] &= tmp;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 2: buf0[2] &= tmp;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 3: w0[3] &= tmp;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 3: buf0[3] &= tmp;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 4: w1[0] &= tmp;
w1[1] = 0;
w1[2] = 0;
w1[3] = 0;
case 4: buf1[0] &= tmp;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] = 0;
break;
case 5: w1[1] &= tmp;
w1[2] = 0;
w1[3] = 0;
case 5: buf1[1] &= tmp;
buf1[2] = 0;
buf1[3] = 0;
break;
case 6: w1[2] &= tmp;
w1[3] = 0;
case 6: buf1[2] &= tmp;
buf1[3] = 0;
break;
case 7: w1[3] &= tmp;
case 7: buf1[3] &= tmp;
break;
}
}
static void truncate_left (u32 w0[4], u32 w1[4], const u32 len)
static void truncate_left (u32 buf0[4], u32 buf1[4], const u32 offset)
{
const u32 tmp = ~((1u << ((len % 4) * 8)) - 1);
const u32 tmp = ~((1u << ((offset & 3u) * 8u)) - 1u);
switch (len / 4)
switch (offset / 4)
{
case 0: w0[0] &= tmp;
case 0: buf0[0] &= tmp;
break;
case 1: w0[0] = 0;
w0[1] &= tmp;
case 1: buf0[0] = 0;
buf0[1] &= tmp;
break;
case 2: w0[0] = 0;
w0[1] = 0;
w0[2] &= tmp;
case 2: buf0[0] = 0;
buf0[1] = 0;
buf0[2] &= tmp;
break;
case 3: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] &= tmp;
case 3: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] &= tmp;
break;
case 4: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] &= tmp;
case 4: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] &= tmp;
break;
case 5: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] &= tmp;
case 5: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] &= tmp;
break;
case 6: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] &= tmp;
case 6: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] &= tmp;
break;
case 7: w0[0] = 0;
w0[1] = 0;
w0[2] = 0;
w0[3] = 0;
w1[0] = 0;
w1[1] = 0;
w1[2] = 0;
w1[3] &= tmp;
case 7: buf0[0] = 0;
buf0[1] = 0;
buf0[2] = 0;
buf0[3] = 0;
buf1[0] = 0;
buf1[1] = 0;
buf1[2] = 0;
buf1[3] &= tmp;
break;
}
}
@ -746,31 +746,35 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32
}
}
static void append_block1 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_r0)
static void append_block1 (const u32 offset, u32 buf0[4], u32 dst1[4], const u32 src_r0)
{
// this version works with 1 byte append only
const u32 tmp = (src_r0 & 0xff) << ((offset & 3) * 8);
const u32 value = src_r0 & 0xff;
dst0[0] |= (offset < 4) ? tmp : 0;
dst0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0;
dst0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0;
dst0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0;
const u32 shift = (offset & 3) * 8;
const u32 tmp = value << shift;
buf0[0] |= (offset < 4) ? tmp : 0;
buf0[1] |= ((offset >= 4) && (offset < 8)) ? tmp : 0;
buf0[2] |= ((offset >= 8) && (offset < 12)) ? tmp : 0;
buf0[3] |= ((offset >= 12) && (offset < 16)) ? tmp : 0;
dst1[0] |= ((offset >= 16) && (offset < 20)) ? tmp : 0;
dst1[1] |= ((offset >= 20) && (offset < 24)) ? tmp : 0;
dst1[2] |= ((offset >= 24) && (offset < 28)) ? tmp : 0;
dst1[3] |= (offset >= 28) ? tmp : 0;
}
static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
static void append_block8 (const u32 offset, u32 buf0[4], u32 dst1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
{
switch (offset)
{
case 0:
dst0[0] = src_r0[0];
dst0[1] = src_r0[1];
dst0[2] = src_r0[2];
dst0[3] = src_r0[3];
buf0[0] = src_r0[0];
buf0[1] = src_r0[1];
buf0[2] = src_r0[2];
buf0[3] = src_r0[3];
dst1[0] = src_r1[0];
dst1[1] = src_r1[1];
dst1[2] = src_r1[2];
@ -778,10 +782,10 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 1:
dst0[0] = src_l0[0] | src_r0[0] << 8;
dst0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
dst0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
dst0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
buf0[0] = src_l0[0] | src_r0[0] << 8;
buf0[1] = src_r0[0] >> 24 | src_r0[1] << 8;
buf0[2] = src_r0[1] >> 24 | src_r0[2] << 8;
buf0[3] = src_r0[2] >> 24 | src_r0[3] << 8;
dst1[0] = src_r0[3] >> 24 | src_r1[0] << 8;
dst1[1] = src_r1[0] >> 24 | src_r1[1] << 8;
dst1[2] = src_r1[1] >> 24 | src_r1[2] << 8;
@ -789,10 +793,10 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 2:
dst0[0] = src_l0[0] | src_r0[0] << 16;
dst0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
dst0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
dst0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
buf0[0] = src_l0[0] | src_r0[0] << 16;
buf0[1] = src_r0[0] >> 16 | src_r0[1] << 16;
buf0[2] = src_r0[1] >> 16 | src_r0[2] << 16;
buf0[3] = src_r0[2] >> 16 | src_r0[3] << 16;
dst1[0] = src_r0[3] >> 16 | src_r1[0] << 16;
dst1[1] = src_r1[0] >> 16 | src_r1[1] << 16;
dst1[2] = src_r1[1] >> 16 | src_r1[2] << 16;
@ -800,10 +804,10 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 3:
dst0[0] = src_l0[0] | src_r0[0] << 24;
dst0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
dst0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
dst0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
buf0[0] = src_l0[0] | src_r0[0] << 24;
buf0[1] = src_r0[0] >> 8 | src_r0[1] << 24;
buf0[2] = src_r0[1] >> 8 | src_r0[2] << 24;
buf0[3] = src_r0[2] >> 8 | src_r0[3] << 24;
dst1[0] = src_r0[3] >> 8 | src_r1[0] << 24;
dst1[1] = src_r1[0] >> 8 | src_r1[1] << 24;
dst1[2] = src_r1[1] >> 8 | src_r1[2] << 24;
@ -811,9 +815,9 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 4:
dst0[1] = src_r0[0];
dst0[2] = src_r0[1];
dst0[3] = src_r0[2];
buf0[1] = src_r0[0];
buf0[2] = src_r0[1];
buf0[3] = src_r0[2];
dst1[0] = src_r0[3];
dst1[1] = src_r1[0];
dst1[2] = src_r1[1];
@ -821,9 +825,9 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 5:
dst0[1] = src_l0[1] | src_r0[0] << 8;
dst0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
dst0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
buf0[1] = src_l0[1] | src_r0[0] << 8;
buf0[2] = src_r0[0] >> 24 | src_r0[1] << 8;
buf0[3] = src_r0[1] >> 24 | src_r0[2] << 8;
dst1[0] = src_r0[2] >> 24 | src_r0[3] << 8;
dst1[1] = src_r0[3] >> 24 | src_r1[0] << 8;
dst1[2] = src_r1[0] >> 24 | src_r1[1] << 8;
@ -831,9 +835,9 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 6:
dst0[1] = src_l0[1] | src_r0[0] << 16;
dst0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
dst0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
buf0[1] = src_l0[1] | src_r0[0] << 16;
buf0[2] = src_r0[0] >> 16 | src_r0[1] << 16;
buf0[3] = src_r0[1] >> 16 | src_r0[2] << 16;
dst1[0] = src_r0[2] >> 16 | src_r0[3] << 16;
dst1[1] = src_r0[3] >> 16 | src_r1[0] << 16;
dst1[2] = src_r1[0] >> 16 | src_r1[1] << 16;
@ -841,9 +845,9 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 7:
dst0[1] = src_l0[1] | src_r0[0] << 24;
dst0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
dst0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
buf0[1] = src_l0[1] | src_r0[0] << 24;
buf0[2] = src_r0[0] >> 8 | src_r0[1] << 24;
buf0[3] = src_r0[1] >> 8 | src_r0[2] << 24;
dst1[0] = src_r0[2] >> 8 | src_r0[3] << 24;
dst1[1] = src_r0[3] >> 8 | src_r1[0] << 24;
dst1[2] = src_r1[0] >> 8 | src_r1[1] << 24;
@ -851,8 +855,8 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 8:
dst0[2] = src_r0[0];
dst0[3] = src_r0[1];
buf0[2] = src_r0[0];
buf0[3] = src_r0[1];
dst1[0] = src_r0[2];
dst1[1] = src_r0[3];
dst1[2] = src_r1[0];
@ -860,8 +864,8 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 9:
dst0[2] = src_l0[2] | src_r0[0] << 8;
dst0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
buf0[2] = src_l0[2] | src_r0[0] << 8;
buf0[3] = src_r0[0] >> 24 | src_r0[1] << 8;
dst1[0] = src_r0[1] >> 24 | src_r0[2] << 8;
dst1[1] = src_r0[2] >> 24 | src_r0[3] << 8;
dst1[2] = src_r0[3] >> 24 | src_r1[0] << 8;
@ -869,8 +873,8 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 10:
dst0[2] = src_l0[2] | src_r0[0] << 16;
dst0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
buf0[2] = src_l0[2] | src_r0[0] << 16;
buf0[3] = src_r0[0] >> 16 | src_r0[1] << 16;
dst1[0] = src_r0[1] >> 16 | src_r0[2] << 16;
dst1[1] = src_r0[2] >> 16 | src_r0[3] << 16;
dst1[2] = src_r0[3] >> 16 | src_r1[0] << 16;
@ -878,8 +882,8 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 11:
dst0[2] = src_l0[2] | src_r0[0] << 24;
dst0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
buf0[2] = src_l0[2] | src_r0[0] << 24;
buf0[3] = src_r0[0] >> 8 | src_r0[1] << 24;
dst1[0] = src_r0[1] >> 8 | src_r0[2] << 24;
dst1[1] = src_r0[2] >> 8 | src_r0[3] << 24;
dst1[2] = src_r0[3] >> 8 | src_r1[0] << 24;
@ -887,7 +891,7 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 12:
dst0[3] = src_r0[0];
buf0[3] = src_r0[0];
dst1[0] = src_r0[1];
dst1[1] = src_r0[2];
dst1[2] = src_r0[3];
@ -895,7 +899,7 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 13:
dst0[3] = src_l0[3] | src_r0[0] << 8;
buf0[3] = src_l0[3] | src_r0[0] << 8;
dst1[0] = src_r0[0] >> 24 | src_r0[1] << 8;
dst1[1] = src_r0[1] >> 24 | src_r0[2] << 8;
dst1[2] = src_r0[2] >> 24 | src_r0[3] << 8;
@ -903,7 +907,7 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 14:
dst0[3] = src_l0[3] | src_r0[0] << 16;
buf0[3] = src_l0[3] | src_r0[0] << 16;
dst1[0] = src_r0[0] >> 16 | src_r0[1] << 16;
dst1[1] = src_r0[1] >> 16 | src_r0[2] << 16;
dst1[2] = src_r0[2] >> 16 | src_r0[3] << 16;
@ -911,7 +915,7 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32
break;
case 15:
dst0[3] = src_l0[3] | src_r0[0] << 24;
buf0[3] = src_l0[3] | src_r0[0] << 24;
dst1[0] = src_r0[0] >> 8 | src_r0[1] << 24;
dst1[1] = src_r0[1] >> 8 | src_r0[2] << 24;
dst1[2] = src_r0[2] >> 8 | src_r0[3] << 24;
@ -1024,14 +1028,14 @@ static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], con
tib41[2] = out0[1];
tib41[3] = out0[0];
out0[0] = swap_workaround (tib40[0]);
out0[1] = swap_workaround (tib40[1]);
out0[2] = swap_workaround (tib40[2]);
out0[3] = swap_workaround (tib40[3]);
out1[0] = swap_workaround (tib41[0]);
out1[1] = swap_workaround (tib41[1]);
out1[2] = swap_workaround (tib41[2]);
out1[3] = swap_workaround (tib41[3]);
out0[0] = swap32_S (tib40[0]);
out0[1] = swap32_S (tib40[1]);
out0[2] = swap32_S (tib40[2]);
out0[3] = swap32_S (tib40[3]);
out1[0] = swap32_S (tib41[0]);
out1[1] = swap32_S (tib41[1]);
out1[2] = swap32_S (tib41[2]);
out1[3] = swap32_S (tib41[3]);
}
static u32 rule_op_mangle_lrest (MAYBE_UNUSED const u32 p0, MAYBE_UNUSED const u32 p1, MAYBE_UNUSED u32 buf0[4], MAYBE_UNUSED u32 buf1[4], const u32 in_len)