diff --git a/OpenCL/rp.c b/OpenCL/rp.c index 4c29dcd16..b495d15dc 100644 --- a/OpenCL/rp.c +++ b/OpenCL/rp.c @@ -127,50 +127,50 @@ static void truncate_left (u32 w0[4], u32 w1[4], const u32 len) static void lshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { #ifdef IS_NV - out0[0] = __byte_perm (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm (in1[2], in1[3], 0x4321); - out1[3] = __byte_perm (in1[3], 0, 0x4321); + out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321); + out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[3] = __byte_perm_S (in1[3], 0, 0x4321); #endif #if defined IS_AMD || defined IS_GENERIC - out0[0] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[3], in1[2], 1); - out1[3] = amd_bytealign ( 0, in1[3], 1); + out0[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[3], in1[2], 1); + out1[3] = amd_bytealign_S ( 0, in1[3], 1); #endif } static void rshift_block (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[4]) { #ifdef IS_NV - out1[3] = __byte_perm (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm (in0[0], in0[1], 0x6543); - out0[0] = __byte_perm ( 0, in0[0], 0x6543); + out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543); + out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[0] = __byte_perm_S ( 0, in0[0], 0x6543); #endif #if defined IS_AMD || defined IS_GENERIC - out1[3] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[1], in0[0], 3); - out0[0] = amd_bytealign (in0[0], 0, 3); + out1[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[1], in0[0], 3); + out0[0] = amd_bytealign_S (in0[0], 0, 3); #endif } @@ -188,32 +188,32 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[2]; out1[3] = in1[3]; break; - case 1: out0[0] = __byte_perm (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm (in1[2], in1[3], 0x4321); - out1[3] = __byte_perm (in1[3], 0, 0x4321); + case 1: out0[0] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[1] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[2] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[3] = __byte_perm_S (in0[3], in1[0], 0x4321); + out1[0] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[1] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[2] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[3] = __byte_perm_S (in1[3], 0, 0x4321); break; - case 2: out0[0] = __byte_perm (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm (in1[2], in1[3], 0x5432); - out1[3] = __byte_perm (in1[3], 0, 0x5432); + case 2: out0[0] = __byte_perm_S (in0[0], in0[1], 0x5432); + out0[1] = __byte_perm_S (in0[1], in0[2], 0x5432); + out0[2] = __byte_perm_S (in0[2], in0[3], 0x5432); + out0[3] = __byte_perm_S (in0[3], in1[0], 0x5432); + out1[0] = __byte_perm_S (in1[0], in1[1], 0x5432); + out1[1] = __byte_perm_S (in1[1], in1[2], 0x5432); + out1[2] = __byte_perm_S (in1[2], in1[3], 0x5432); + out1[3] = __byte_perm_S (in1[3], 0, 0x5432); break; - case 3: out0[0] = __byte_perm (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm (in1[2], in1[3], 0x6543); - out1[3] = __byte_perm (in1[3], 0, 0x6543); + case 3: out0[0] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[1] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[2] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[3] = __byte_perm_S (in0[3], in1[0], 0x6543); + out1[0] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[1] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[2] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[3] = __byte_perm_S (in1[3], 0, 0x6543); break; case 4: out0[0] = in0[1]; out0[1] = in0[2]; @@ -224,31 +224,31 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[3]; out1[3] = 0; break; - case 5: out0[0] = __byte_perm (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm (in1[3], 0, 0x4321); + case 5: out0[0] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[1] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[2] = __byte_perm_S (in0[3], in1[0], 0x4321); + out0[3] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[0] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[1] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[2] = __byte_perm_S (in1[3], 0, 0x4321); out1[3] = 0; break; - case 6: out0[0] = __byte_perm (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm (in1[3], 0, 0x5432); + case 6: out0[0] = __byte_perm_S (in0[1], in0[2], 0x5432); + out0[1] = __byte_perm_S (in0[2], in0[3], 0x5432); + out0[2] = __byte_perm_S (in0[3], in1[0], 0x5432); + out0[3] = __byte_perm_S (in1[0], in1[1], 0x5432); + out1[0] = __byte_perm_S (in1[1], in1[2], 0x5432); + out1[1] = __byte_perm_S (in1[2], in1[3], 0x5432); + out1[2] = __byte_perm_S (in1[3], 0, 0x5432); out1[3] = 0; break; - case 7: out0[0] = __byte_perm (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm (in1[3], 0, 0x6543); + case 7: out0[0] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[1] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[2] = __byte_perm_S (in0[3], in1[0], 0x6543); + out0[3] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[0] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[1] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[2] = __byte_perm_S (in1[3], 0, 0x6543); out1[3] = 0; break; case 8: out0[0] = in0[2]; @@ -260,30 +260,30 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 9: out0[0] = __byte_perm (in0[2], in0[3], 0x4321); - out0[1] = __byte_perm (in0[3], in1[0], 0x4321); - out0[2] = __byte_perm (in1[0], in1[1], 0x4321); - out0[3] = __byte_perm (in1[1], in1[2], 0x4321); - out1[0] = __byte_perm (in1[2], in1[3], 0x4321); - out1[1] = __byte_perm (in1[3], 0, 0x4321); + case 9: out0[0] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[1] = __byte_perm_S (in0[3], in1[0], 0x4321); + out0[2] = __byte_perm_S (in1[0], in1[1], 0x4321); + out0[3] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[0] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[1] = __byte_perm_S (in1[3], 0, 0x4321); out1[2] = 0; out1[3] = 0; break; - case 10: out0[0] = __byte_perm (in0[2], in0[3], 0x5432); - out0[1] = __byte_perm (in0[3], in1[0], 0x5432); - out0[2] = __byte_perm (in1[0], in1[1], 0x5432); - out0[3] = __byte_perm (in1[1], in1[2], 0x5432); - out1[0] = __byte_perm (in1[2], in1[3], 0x5432); - out1[1] = __byte_perm (in1[3], 0, 0x5432); + case 10: out0[0] = __byte_perm_S (in0[2], in0[3], 0x5432); + out0[1] = __byte_perm_S (in0[3], in1[0], 0x5432); + out0[2] = __byte_perm_S (in1[0], in1[1], 0x5432); + out0[3] = __byte_perm_S (in1[1], in1[2], 0x5432); + out1[0] = __byte_perm_S (in1[2], in1[3], 0x5432); + out1[1] = __byte_perm_S (in1[3], 0, 0x5432); out1[2] = 0; out1[3] = 0; break; - case 11: out0[0] = __byte_perm (in0[2], in0[3], 0x6543); - out0[1] = __byte_perm (in0[3], in1[0], 0x6543); - out0[2] = __byte_perm (in1[0], in1[1], 0x6543); - out0[3] = __byte_perm (in1[1], in1[2], 0x6543); - out1[0] = __byte_perm (in1[2], in1[3], 0x6543); - out1[1] = __byte_perm (in1[3], 0, 0x6543); + case 11: out0[0] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[1] = __byte_perm_S (in0[3], in1[0], 0x6543); + out0[2] = __byte_perm_S (in1[0], in1[1], 0x6543); + out0[3] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[0] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[1] = __byte_perm_S (in1[3], 0, 0x6543); out1[2] = 0; out1[3] = 0; break; @@ -297,29 +297,29 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[3] = 0; break; case 13: - out0[0] = __byte_perm (in0[3], in1[0], 0x4321); - out0[1] = __byte_perm (in1[0], in1[1], 0x4321); - out0[2] = __byte_perm (in1[1], in1[2], 0x4321); - out0[3] = __byte_perm (in1[2], in1[3], 0x4321); - out1[0] = __byte_perm (in1[3], 0, 0x4321); + out0[0] = __byte_perm_S (in0[3], in1[0], 0x4321); + out0[1] = __byte_perm_S (in1[0], in1[1], 0x4321); + out0[2] = __byte_perm_S (in1[1], in1[2], 0x4321); + out0[3] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[0] = __byte_perm_S (in1[3], 0, 0x4321); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 14: out0[0] = __byte_perm (in0[3], in1[0], 0x5432); - out0[1] = __byte_perm (in1[0], in1[1], 0x5432); - out0[2] = __byte_perm (in1[1], in1[2], 0x5432); - out0[3] = __byte_perm (in1[2], in1[3], 0x5432); - out1[0] = __byte_perm (in1[3], 0, 0x5432); + case 14: out0[0] = __byte_perm_S (in0[3], in1[0], 0x5432); + out0[1] = __byte_perm_S (in1[0], in1[1], 0x5432); + out0[2] = __byte_perm_S (in1[1], in1[2], 0x5432); + out0[3] = __byte_perm_S (in1[2], in1[3], 0x5432); + out1[0] = __byte_perm_S (in1[3], 0, 0x5432); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 15: out0[0] = __byte_perm (in0[3], in1[0], 0x6543); - out0[1] = __byte_perm (in1[0], in1[1], 0x6543); - out0[2] = __byte_perm (in1[1], in1[2], 0x6543); - out0[3] = __byte_perm (in1[2], in1[3], 0x6543); - out1[0] = __byte_perm (in1[3], 0, 0x6543); + case 15: out0[0] = __byte_perm_S (in0[3], in1[0], 0x6543); + out0[1] = __byte_perm_S (in1[0], in1[1], 0x6543); + out0[2] = __byte_perm_S (in1[1], in1[2], 0x6543); + out0[3] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[0] = __byte_perm_S (in1[3], 0, 0x6543); out1[1] = 0; out1[2] = 0; out1[3] = 0; @@ -333,28 +333,28 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 17: out0[0] = __byte_perm (in1[0], in1[1], 0x4321); - out0[1] = __byte_perm (in1[1], in1[2], 0x4321); - out0[2] = __byte_perm (in1[2], in1[3], 0x4321); - out0[3] = __byte_perm (in1[3], 0, 0x4321); + case 17: out0[0] = __byte_perm_S (in1[0], in1[1], 0x4321); + out0[1] = __byte_perm_S (in1[1], in1[2], 0x4321); + out0[2] = __byte_perm_S (in1[2], in1[3], 0x4321); + out0[3] = __byte_perm_S (in1[3], 0, 0x4321); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 18: out0[0] = __byte_perm (in1[0], in1[1], 0x5432); - out0[1] = __byte_perm (in1[1], in1[2], 0x5432); - out0[2] = __byte_perm (in1[2], in1[3], 0x5432); - out0[3] = __byte_perm (in1[3], 0, 0x5432); + case 18: out0[0] = __byte_perm_S (in1[0], in1[1], 0x5432); + out0[1] = __byte_perm_S (in1[1], in1[2], 0x5432); + out0[2] = __byte_perm_S (in1[2], in1[3], 0x5432); + out0[3] = __byte_perm_S (in1[3], 0, 0x5432); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 19: out0[0] = __byte_perm (in1[0], in1[1], 0x6543); - out0[1] = __byte_perm (in1[1], in1[2], 0x6543); - out0[2] = __byte_perm (in1[2], in1[3], 0x6543); - out0[3] = __byte_perm (in1[3], 0, 0x6543); + case 19: out0[0] = __byte_perm_S (in1[0], in1[1], 0x6543); + out0[1] = __byte_perm_S (in1[1], in1[2], 0x6543); + out0[2] = __byte_perm_S (in1[2], in1[3], 0x6543); + out0[3] = __byte_perm_S (in1[3], 0, 0x6543); out1[0] = 0; out1[1] = 0; out1[2] = 0; @@ -369,27 +369,27 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 21: out0[0] = __byte_perm (in1[1], in1[2], 0x4321); - out0[1] = __byte_perm (in1[2], in1[3], 0x4321); - out0[2] = __byte_perm (in1[3], 0, 0x4321); + case 21: out0[0] = __byte_perm_S (in1[1], in1[2], 0x4321); + out0[1] = __byte_perm_S (in1[2], in1[3], 0x4321); + out0[2] = __byte_perm_S (in1[3], 0, 0x4321); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 22: out0[0] = __byte_perm (in1[1], in1[2], 0x5432); - out0[1] = __byte_perm (in1[2], in1[3], 0x5432); - out0[2] = __byte_perm (in1[3], 0, 0x5432); + case 22: out0[0] = __byte_perm_S (in1[1], in1[2], 0x5432); + out0[1] = __byte_perm_S (in1[2], in1[3], 0x5432); + out0[2] = __byte_perm_S (in1[3], 0, 0x5432); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 23: out0[0] = __byte_perm (in1[1], in1[2], 0x6543); - out0[1] = __byte_perm (in1[2], in1[3], 0x6543); - out0[2] = __byte_perm (in1[3], 0, 0x6543); + case 23: out0[0] = __byte_perm_S (in1[1], in1[2], 0x6543); + out0[1] = __byte_perm_S (in1[2], in1[3], 0x6543); + out0[2] = __byte_perm_S (in1[3], 0, 0x6543); out0[3] = 0; out1[0] = 0; out1[1] = 0; @@ -405,8 +405,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 25: out0[0] = __byte_perm (in1[2], in1[3], 0x4321); - out0[1] = __byte_perm (in1[3], 0, 0x4321); + case 25: out0[0] = __byte_perm_S (in1[2], in1[3], 0x4321); + out0[1] = __byte_perm_S (in1[3], 0, 0x4321); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -414,8 +414,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 26: out0[0] = __byte_perm (in1[2], in1[3], 0x5432); - out0[1] = __byte_perm (in1[3], 0, 0x5432); + case 26: out0[0] = __byte_perm_S (in1[2], in1[3], 0x5432); + out0[1] = __byte_perm_S (in1[3], 0, 0x5432); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -423,8 +423,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 27: out0[0] = __byte_perm (in1[2], in1[3], 0x6543); - out0[1] = __byte_perm (in1[3], 0, 0x6543); + case 27: out0[0] = __byte_perm_S (in1[2], in1[3], 0x6543); + out0[1] = __byte_perm_S (in1[3], 0, 0x6543); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -441,7 +441,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 29: out0[0] = __byte_perm (in1[3], 0, 0x4321); + case 29: out0[0] = __byte_perm_S (in1[3], 0, 0x4321); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -450,7 +450,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 30: out0[0] = __byte_perm (in1[3], 0, 0x5432); + case 30: out0[0] = __byte_perm_S (in1[3], 0, 0x5432); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -459,7 +459,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 31: out0[0] = __byte_perm (in1[3], 0, 0x6543); + case 31: out0[0] = __byte_perm_S (in1[3], 0, 0x6543); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -483,32 +483,32 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[2]; out1[3] = in1[3]; break; - case 1: out0[0] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[3], in1[2], 1); - out1[3] = amd_bytealign ( 0, in1[3], 1); + case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[3], in1[2], 1); + out1[3] = amd_bytealign_S ( 0, in1[3], 1); break; - case 2: out0[0] = amd_bytealign (in0[1], in0[0], 2); - out0[1] = amd_bytealign (in0[2], in0[1], 2); - out0[2] = amd_bytealign (in0[3], in0[2], 2); - out0[3] = amd_bytealign (in1[0], in0[3], 2); - out1[0] = amd_bytealign (in1[1], in1[0], 2); - out1[1] = amd_bytealign (in1[2], in1[1], 2); - out1[2] = amd_bytealign (in1[3], in1[2], 2); - out1[3] = amd_bytealign ( 0, in1[3], 2); + case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2); + out0[1] = amd_bytealign_S (in0[2], in0[1], 2); + out0[2] = amd_bytealign_S (in0[3], in0[2], 2); + out0[3] = amd_bytealign_S (in1[0], in0[3], 2); + out1[0] = amd_bytealign_S (in1[1], in1[0], 2); + out1[1] = amd_bytealign_S (in1[2], in1[1], 2); + out1[2] = amd_bytealign_S (in1[3], in1[2], 2); + out1[3] = amd_bytealign_S ( 0, in1[3], 2); break; - case 3: out0[0] = amd_bytealign (in0[1], in0[0], 3); - out0[1] = amd_bytealign (in0[2], in0[1], 3); - out0[2] = amd_bytealign (in0[3], in0[2], 3); - out0[3] = amd_bytealign (in1[0], in0[3], 3); - out1[0] = amd_bytealign (in1[1], in1[0], 3); - out1[1] = amd_bytealign (in1[2], in1[1], 3); - out1[2] = amd_bytealign (in1[3], in1[2], 3); - out1[3] = amd_bytealign ( 0, in1[3], 3); + case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3); + out0[1] = amd_bytealign_S (in0[2], in0[1], 3); + out0[2] = amd_bytealign_S (in0[3], in0[2], 3); + out0[3] = amd_bytealign_S (in1[0], in0[3], 3); + out1[0] = amd_bytealign_S (in1[1], in1[0], 3); + out1[1] = amd_bytealign_S (in1[2], in1[1], 3); + out1[2] = amd_bytealign_S (in1[3], in1[2], 3); + out1[3] = amd_bytealign_S ( 0, in1[3], 3); break; case 4: out0[0] = in0[1]; out0[1] = in0[2]; @@ -519,31 +519,31 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = in1[3]; out1[3] = 0; break; - case 5: out0[0] = amd_bytealign (in0[2], in0[1], 1); - out0[1] = amd_bytealign (in0[3], in0[2], 1); - out0[2] = amd_bytealign (in1[0], in0[3], 1); - out0[3] = amd_bytealign (in1[1], in1[0], 1); - out1[0] = amd_bytealign (in1[2], in1[1], 1); - out1[1] = amd_bytealign (in1[3], in1[2], 1); - out1[2] = amd_bytealign ( 0, in1[3], 1); + case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1); + out0[1] = amd_bytealign_S (in0[3], in0[2], 1); + out0[2] = amd_bytealign_S (in1[0], in0[3], 1); + out0[3] = amd_bytealign_S (in1[1], in1[0], 1); + out1[0] = amd_bytealign_S (in1[2], in1[1], 1); + out1[1] = amd_bytealign_S (in1[3], in1[2], 1); + out1[2] = amd_bytealign_S ( 0, in1[3], 1); out1[3] = 0; break; - case 6: out0[0] = amd_bytealign (in0[2], in0[1], 2); - out0[1] = amd_bytealign (in0[3], in0[2], 2); - out0[2] = amd_bytealign (in1[0], in0[3], 2); - out0[3] = amd_bytealign (in1[1], in1[0], 2); - out1[0] = amd_bytealign (in1[2], in1[1], 2); - out1[1] = amd_bytealign (in1[3], in1[2], 2); - out1[2] = amd_bytealign ( 0, in1[3], 2); + case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2); + out0[1] = amd_bytealign_S (in0[3], in0[2], 2); + out0[2] = amd_bytealign_S (in1[0], in0[3], 2); + out0[3] = amd_bytealign_S (in1[1], in1[0], 2); + out1[0] = amd_bytealign_S (in1[2], in1[1], 2); + out1[1] = amd_bytealign_S (in1[3], in1[2], 2); + out1[2] = amd_bytealign_S ( 0, in1[3], 2); out1[3] = 0; break; - case 7: out0[0] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign ( 0, in1[3], 3); + case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S ( 0, in1[3], 3); out1[3] = 0; break; case 8: out0[0] = in0[2]; @@ -555,30 +555,30 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 9: out0[0] = amd_bytealign (in0[3], in0[2], 1); - out0[1] = amd_bytealign (in1[0], in0[3], 1); - out0[2] = amd_bytealign (in1[1], in1[0], 1); - out0[3] = amd_bytealign (in1[2], in1[1], 1); - out1[0] = amd_bytealign (in1[3], in1[2], 1); - out1[1] = amd_bytealign ( 0, in1[3], 1); + case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1); + out0[1] = amd_bytealign_S (in1[0], in0[3], 1); + out0[2] = amd_bytealign_S (in1[1], in1[0], 1); + out0[3] = amd_bytealign_S (in1[2], in1[1], 1); + out1[0] = amd_bytealign_S (in1[3], in1[2], 1); + out1[1] = amd_bytealign_S ( 0, in1[3], 1); out1[2] = 0; out1[3] = 0; break; - case 10: out0[0] = amd_bytealign (in0[3], in0[2], 2); - out0[1] = amd_bytealign (in1[0], in0[3], 2); - out0[2] = amd_bytealign (in1[1], in1[0], 2); - out0[3] = amd_bytealign (in1[2], in1[1], 2); - out1[0] = amd_bytealign (in1[3], in1[2], 2); - out1[1] = amd_bytealign ( 0, in1[3], 2); + case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2); + out0[1] = amd_bytealign_S (in1[0], in0[3], 2); + out0[2] = amd_bytealign_S (in1[1], in1[0], 2); + out0[3] = amd_bytealign_S (in1[2], in1[1], 2); + out1[0] = amd_bytealign_S (in1[3], in1[2], 2); + out1[1] = amd_bytealign_S ( 0, in1[3], 2); out1[2] = 0; out1[3] = 0; break; - case 11: out0[0] = amd_bytealign (in0[3], in0[2], 3); - out0[1] = amd_bytealign (in1[0], in0[3], 3); - out0[2] = amd_bytealign (in1[1], in1[0], 3); - out0[3] = amd_bytealign (in1[2], in1[1], 3); - out1[0] = amd_bytealign (in1[3], in1[2], 3); - out1[1] = amd_bytealign ( 0, in1[3], 3); + case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3); + out0[1] = amd_bytealign_S (in1[0], in0[3], 3); + out0[2] = amd_bytealign_S (in1[1], in1[0], 3); + out0[3] = amd_bytealign_S (in1[2], in1[1], 3); + out1[0] = amd_bytealign_S (in1[3], in1[2], 3); + out1[1] = amd_bytealign_S ( 0, in1[3], 3); out1[2] = 0; out1[3] = 0; break; @@ -591,29 +591,29 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 13: out0[0] = amd_bytealign (in1[0], in0[3], 1); - out0[1] = amd_bytealign (in1[1], in1[0], 1); - out0[2] = amd_bytealign (in1[2], in1[1], 1); - out0[3] = amd_bytealign (in1[3], in1[2], 1); - out1[0] = amd_bytealign ( 0, in1[3], 1); + case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1); + out0[1] = amd_bytealign_S (in1[1], in1[0], 1); + out0[2] = amd_bytealign_S (in1[2], in1[1], 1); + out0[3] = amd_bytealign_S (in1[3], in1[2], 1); + out1[0] = amd_bytealign_S ( 0, in1[3], 1); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 14: out0[0] = amd_bytealign (in1[0], in0[3], 2); - out0[1] = amd_bytealign (in1[1], in1[0], 2); - out0[2] = amd_bytealign (in1[2], in1[1], 2); - out0[3] = amd_bytealign (in1[3], in1[2], 2); - out1[0] = amd_bytealign ( 0, in1[3], 2); + case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2); + out0[1] = amd_bytealign_S (in1[1], in1[0], 2); + out0[2] = amd_bytealign_S (in1[2], in1[1], 2); + out0[3] = amd_bytealign_S (in1[3], in1[2], 2); + out1[0] = amd_bytealign_S ( 0, in1[3], 2); out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 15: out0[0] = amd_bytealign (in1[0], in0[3], 3); - out0[1] = amd_bytealign (in1[1], in1[0], 3); - out0[2] = amd_bytealign (in1[2], in1[1], 3); - out0[3] = amd_bytealign (in1[3], in1[2], 3); - out1[0] = amd_bytealign ( 0, in1[3], 3); + case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[1] = amd_bytealign_S (in1[1], in1[0], 3); + out0[2] = amd_bytealign_S (in1[2], in1[1], 3); + out0[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[0] = amd_bytealign_S ( 0, in1[3], 3); out1[1] = 0; out1[2] = 0; out1[3] = 0; @@ -627,28 +627,28 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 17: out0[0] = amd_bytealign (in1[1], in1[0], 1); - out0[1] = amd_bytealign (in1[2], in1[1], 1); - out0[2] = amd_bytealign (in1[3], in1[2], 1); - out0[3] = amd_bytealign ( 0, in1[3], 1); + case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1); + out0[1] = amd_bytealign_S (in1[2], in1[1], 1); + out0[2] = amd_bytealign_S (in1[3], in1[2], 1); + out0[3] = amd_bytealign_S ( 0, in1[3], 1); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 18: out0[0] = amd_bytealign (in1[1], in1[0], 2); - out0[1] = amd_bytealign (in1[2], in1[1], 2); - out0[2] = amd_bytealign (in1[3], in1[2], 2); - out0[3] = amd_bytealign ( 0, in1[3], 2); + case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2); + out0[1] = amd_bytealign_S (in1[2], in1[1], 2); + out0[2] = amd_bytealign_S (in1[3], in1[2], 2); + out0[3] = amd_bytealign_S ( 0, in1[3], 2); out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 19: out0[0] = amd_bytealign (in1[1], in1[0], 3); - out0[1] = amd_bytealign (in1[2], in1[1], 3); - out0[2] = amd_bytealign (in1[3], in1[2], 3); - out0[3] = amd_bytealign ( 0, in1[3], 3); + case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3); + out0[1] = amd_bytealign_S (in1[2], in1[1], 3); + out0[2] = amd_bytealign_S (in1[3], in1[2], 3); + out0[3] = amd_bytealign_S ( 0, in1[3], 3); out1[0] = 0; out1[1] = 0; out1[2] = 0; @@ -663,27 +663,27 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 21: out0[0] = amd_bytealign (in1[2], in1[1], 1); - out0[1] = amd_bytealign (in1[3], in1[2], 1); - out0[2] = amd_bytealign ( 0, in1[3], 1); + case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1); + out0[1] = amd_bytealign_S (in1[3], in1[2], 1); + out0[2] = amd_bytealign_S ( 0, in1[3], 1); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 22: out0[0] = amd_bytealign (in1[2], in1[1], 2); - out0[1] = amd_bytealign (in1[3], in1[2], 2); - out0[2] = amd_bytealign ( 0, in1[3], 2); + case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2); + out0[1] = amd_bytealign_S (in1[3], in1[2], 2); + out0[2] = amd_bytealign_S ( 0, in1[3], 2); out0[3] = 0; out1[0] = 0; out1[1] = 0; out1[2] = 0; out1[3] = 0; break; - case 23: out0[0] = amd_bytealign (in1[2], in1[1], 3); - out0[1] = amd_bytealign (in1[3], in1[2], 3); - out0[2] = amd_bytealign ( 0, in1[3], 3); + case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3); + out0[1] = amd_bytealign_S (in1[3], in1[2], 3); + out0[2] = amd_bytealign_S ( 0, in1[3], 3); out0[3] = 0; out1[0] = 0; out1[1] = 0; @@ -699,8 +699,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 25: out0[0] = amd_bytealign (in1[3], in1[2], 1); - out0[1] = amd_bytealign ( 0, in1[3], 1); + case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1); + out0[1] = amd_bytealign_S ( 0, in1[3], 1); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -708,8 +708,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 26: out0[0] = amd_bytealign (in1[3], in1[2], 2); - out0[1] = amd_bytealign ( 0, in1[3], 2); + case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2); + out0[1] = amd_bytealign_S ( 0, in1[3], 2); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -717,8 +717,8 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 27: out0[0] = amd_bytealign (in1[3], in1[2], 3); - out0[1] = amd_bytealign ( 0, in1[3], 3); + case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3); + out0[1] = amd_bytealign_S ( 0, in1[3], 3); out0[2] = 0; out0[3] = 0; out1[0] = 0; @@ -735,7 +735,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 29: out0[0] = amd_bytealign ( 0, in1[3], 1); + case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -744,7 +744,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 30: out0[0] = amd_bytealign ( 0, in1[3], 2); + case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -753,7 +753,7 @@ static void lshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out1[2] = 0; out1[3] = 0; break; - case 31: out0[0] = amd_bytealign ( 0, in1[3], 3); + case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3); out0[1] = 0; out0[2] = 0; out0[3] = 0; @@ -780,32 +780,32 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = in0[1]; out0[0] = in0[0]; break; - case 1: out1[3] = __byte_perm (in1[2], in1[3], 0x6543); - out1[2] = __byte_perm (in1[1], in1[2], 0x6543); - out1[1] = __byte_perm (in1[0], in1[1], 0x6543); - out1[0] = __byte_perm (in0[3], in1[0], 0x6543); - out0[3] = __byte_perm (in0[2], in0[3], 0x6543); - out0[2] = __byte_perm (in0[1], in0[2], 0x6543); - out0[1] = __byte_perm (in0[0], in0[1], 0x6543); - out0[0] = __byte_perm ( 0, in0[0], 0x6543); + case 1: out1[3] = __byte_perm_S (in1[2], in1[3], 0x6543); + out1[2] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[1] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[0] = __byte_perm_S (in0[3], in1[0], 0x6543); + out0[3] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[2] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[1] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[0] = __byte_perm_S ( 0, in0[0], 0x6543); break; - case 2: out1[3] = __byte_perm (in1[2], in1[3], 0x5432); - out1[2] = __byte_perm (in1[1], in1[2], 0x5432); - out1[1] = __byte_perm (in1[0], in1[1], 0x5432); - out1[0] = __byte_perm (in0[3], in1[0], 0x5432); - out0[3] = __byte_perm (in0[2], in0[3], 0x5432); - out0[2] = __byte_perm (in0[1], in0[2], 0x5432); - out0[1] = __byte_perm (in0[0], in0[1], 0x5432); - out0[0] = __byte_perm ( 0, in0[0], 0x5432); + case 2: out1[3] = __byte_perm_S (in1[2], in1[3], 0x5432); + out1[2] = __byte_perm_S (in1[1], in1[2], 0x5432); + out1[1] = __byte_perm_S (in1[0], in1[1], 0x5432); + out1[0] = __byte_perm_S (in0[3], in1[0], 0x5432); + out0[3] = __byte_perm_S (in0[2], in0[3], 0x5432); + out0[2] = __byte_perm_S (in0[1], in0[2], 0x5432); + out0[1] = __byte_perm_S (in0[0], in0[1], 0x5432); + out0[0] = __byte_perm_S ( 0, in0[0], 0x5432); break; - case 3: out1[3] = __byte_perm (in1[2], in1[3], 0x4321); - out1[2] = __byte_perm (in1[1], in1[2], 0x4321); - out1[1] = __byte_perm (in1[0], in1[1], 0x4321); - out1[0] = __byte_perm (in0[3], in1[0], 0x4321); - out0[3] = __byte_perm (in0[2], in0[3], 0x4321); - out0[2] = __byte_perm (in0[1], in0[2], 0x4321); - out0[1] = __byte_perm (in0[0], in0[1], 0x4321); - out0[0] = __byte_perm ( 0, in0[0], 0x4321); + case 3: out1[3] = __byte_perm_S (in1[2], in1[3], 0x4321); + out1[2] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[1] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[0] = __byte_perm_S (in0[3], in1[0], 0x4321); + out0[3] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[2] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[1] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[0] = __byte_perm_S ( 0, in0[0], 0x4321); break; case 4: out1[3] = in1[2]; out1[2] = in1[1]; @@ -816,31 +816,31 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = in0[0]; out0[0] = 0; break; - case 5: out1[3] = __byte_perm (in1[1], in1[2], 0x6543); - out1[2] = __byte_perm (in1[0], in1[1], 0x6543); - out1[1] = __byte_perm (in0[3], in1[0], 0x6543); - out1[0] = __byte_perm (in0[2], in0[3], 0x6543); - out0[3] = __byte_perm (in0[1], in0[2], 0x6543); - out0[2] = __byte_perm (in0[0], in0[1], 0x6543); - out0[1] = __byte_perm ( 0, in0[0], 0x6543); + case 5: out1[3] = __byte_perm_S (in1[1], in1[2], 0x6543); + out1[2] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[1] = __byte_perm_S (in0[3], in1[0], 0x6543); + out1[0] = __byte_perm_S (in0[2], in0[3], 0x6543); + out0[3] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[2] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[1] = __byte_perm_S ( 0, in0[0], 0x6543); out0[0] = 0; break; - case 6: out1[3] = __byte_perm (in1[1], in1[2], 0x5432); - out1[2] = __byte_perm (in1[0], in1[1], 0x5432); - out1[1] = __byte_perm (in0[3], in1[0], 0x5432); - out1[0] = __byte_perm (in0[2], in0[3], 0x5432); - out0[3] = __byte_perm (in0[1], in0[2], 0x5432); - out0[2] = __byte_perm (in0[0], in0[1], 0x5432); - out0[1] = __byte_perm ( 0, in0[0], 0x5432); + case 6: out1[3] = __byte_perm_S (in1[1], in1[2], 0x5432); + out1[2] = __byte_perm_S (in1[0], in1[1], 0x5432); + out1[1] = __byte_perm_S (in0[3], in1[0], 0x5432); + out1[0] = __byte_perm_S (in0[2], in0[3], 0x5432); + out0[3] = __byte_perm_S (in0[1], in0[2], 0x5432); + out0[2] = __byte_perm_S (in0[0], in0[1], 0x5432); + out0[1] = __byte_perm_S ( 0, in0[0], 0x5432); out0[0] = 0; break; - case 7: out1[3] = __byte_perm (in1[1], in1[2], 0x4321); - out1[2] = __byte_perm (in1[0], in1[1], 0x4321); - out1[1] = __byte_perm (in0[3], in1[0], 0x4321); - out1[0] = __byte_perm (in0[2], in0[3], 0x4321); - out0[3] = __byte_perm (in0[1], in0[2], 0x4321); - out0[2] = __byte_perm (in0[0], in0[1], 0x4321); - out0[1] = __byte_perm ( 0, in0[0], 0x4321); + case 7: out1[3] = __byte_perm_S (in1[1], in1[2], 0x4321); + out1[2] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[1] = __byte_perm_S (in0[3], in1[0], 0x4321); + out1[0] = __byte_perm_S (in0[2], in0[3], 0x4321); + out0[3] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[2] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[1] = __byte_perm_S ( 0, in0[0], 0x4321); out0[0] = 0; break; case 8: out1[3] = in1[1]; @@ -852,30 +852,30 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 9: out1[3] = __byte_perm (in1[0], in1[1], 0x6543); - out1[2] = __byte_perm (in0[3], in1[0], 0x6543); - out1[1] = __byte_perm (in0[2], in0[3], 0x6543); - out1[0] = __byte_perm (in0[1], in0[2], 0x6543); - out0[3] = __byte_perm (in0[0], in0[1], 0x6543); - out0[2] = __byte_perm ( 0, in0[0], 0x6543); + case 9: out1[3] = __byte_perm_S (in1[0], in1[1], 0x6543); + out1[2] = __byte_perm_S (in0[3], in1[0], 0x6543); + out1[1] = __byte_perm_S (in0[2], in0[3], 0x6543); + out1[0] = __byte_perm_S (in0[1], in0[2], 0x6543); + out0[3] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[2] = __byte_perm_S ( 0, in0[0], 0x6543); out0[1] = 0; out0[0] = 0; break; - case 10: out1[3] = __byte_perm (in1[0], in1[1], 0x5432); - out1[2] = __byte_perm (in0[3], in1[0], 0x5432); - out1[1] = __byte_perm (in0[2], in0[3], 0x5432); - out1[0] = __byte_perm (in0[1], in0[2], 0x5432); - out0[3] = __byte_perm (in0[0], in0[1], 0x5432); - out0[2] = __byte_perm ( 0, in0[0], 0x5432); + case 10: out1[3] = __byte_perm_S (in1[0], in1[1], 0x5432); + out1[2] = __byte_perm_S (in0[3], in1[0], 0x5432); + out1[1] = __byte_perm_S (in0[2], in0[3], 0x5432); + out1[0] = __byte_perm_S (in0[1], in0[2], 0x5432); + out0[3] = __byte_perm_S (in0[0], in0[1], 0x5432); + out0[2] = __byte_perm_S ( 0, in0[0], 0x5432); out0[1] = 0; out0[0] = 0; break; - case 11: out1[3] = __byte_perm (in1[0], in1[1], 0x4321); - out1[2] = __byte_perm (in0[3], in1[0], 0x4321); - out1[1] = __byte_perm (in0[2], in0[3], 0x4321); - out1[0] = __byte_perm (in0[1], in0[2], 0x4321); - out0[3] = __byte_perm (in0[0], in0[1], 0x4321); - out0[2] = __byte_perm ( 0, in0[0], 0x4321); + case 11: out1[3] = __byte_perm_S (in1[0], in1[1], 0x4321); + out1[2] = __byte_perm_S (in0[3], in1[0], 0x4321); + out1[1] = __byte_perm_S (in0[2], in0[3], 0x4321); + out1[0] = __byte_perm_S (in0[1], in0[2], 0x4321); + out0[3] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[2] = __byte_perm_S ( 0, in0[0], 0x4321); out0[1] = 0; out0[0] = 0; break; @@ -888,29 +888,29 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 13: out1[3] = __byte_perm (in0[3], in1[0], 0x6543); - out1[2] = __byte_perm (in0[2], in0[3], 0x6543); - out1[1] = __byte_perm (in0[1], in0[2], 0x6543); - out1[0] = __byte_perm (in0[0], in0[1], 0x6543); - out0[3] = __byte_perm ( 0, in0[0], 0x6543); + case 13: out1[3] = __byte_perm_S (in0[3], in1[0], 0x6543); + out1[2] = __byte_perm_S (in0[2], in0[3], 0x6543); + out1[1] = __byte_perm_S (in0[1], in0[2], 0x6543); + out1[0] = __byte_perm_S (in0[0], in0[1], 0x6543); + out0[3] = __byte_perm_S ( 0, in0[0], 0x6543); out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 14: out1[3] = __byte_perm (in0[3], in1[0], 0x5432); - out1[2] = __byte_perm (in0[2], in0[3], 0x5432); - out1[1] = __byte_perm (in0[1], in0[2], 0x5432); - out1[0] = __byte_perm (in0[0], in0[1], 0x5432); - out0[3] = __byte_perm ( 0, in0[0], 0x5432); + case 14: out1[3] = __byte_perm_S (in0[3], in1[0], 0x5432); + out1[2] = __byte_perm_S (in0[2], in0[3], 0x5432); + out1[1] = __byte_perm_S (in0[1], in0[2], 0x5432); + out1[0] = __byte_perm_S (in0[0], in0[1], 0x5432); + out0[3] = __byte_perm_S ( 0, in0[0], 0x5432); out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 15: out1[3] = __byte_perm (in0[3], in1[0], 0x4321); - out1[2] = __byte_perm (in0[2], in0[3], 0x4321); - out1[1] = __byte_perm (in0[1], in0[2], 0x4321); - out1[0] = __byte_perm (in0[0], in0[1], 0x4321); - out0[3] = __byte_perm ( 0, in0[0], 0x4321); + case 15: out1[3] = __byte_perm_S (in0[3], in1[0], 0x4321); + out1[2] = __byte_perm_S (in0[2], in0[3], 0x4321); + out1[1] = __byte_perm_S (in0[1], in0[2], 0x4321); + out1[0] = __byte_perm_S (in0[0], in0[1], 0x4321); + out0[3] = __byte_perm_S ( 0, in0[0], 0x4321); out0[2] = 0; out0[1] = 0; out0[0] = 0; @@ -924,28 +924,28 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 17: out1[3] = __byte_perm (in0[2], in0[3], 0x6543); - out1[2] = __byte_perm (in0[1], in0[2], 0x6543); - out1[1] = __byte_perm (in0[0], in0[1], 0x6543); - out1[0] = __byte_perm ( 0, in0[0], 0x6543); + case 17: out1[3] = __byte_perm_S (in0[2], in0[3], 0x6543); + out1[2] = __byte_perm_S (in0[1], in0[2], 0x6543); + out1[1] = __byte_perm_S (in0[0], in0[1], 0x6543); + out1[0] = __byte_perm_S ( 0, in0[0], 0x6543); out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 18: out1[3] = __byte_perm (in0[2], in0[3], 0x5432); - out1[2] = __byte_perm (in0[1], in0[2], 0x5432); - out1[1] = __byte_perm (in0[0], in0[1], 0x5432); - out1[0] = __byte_perm ( 0, in0[0], 0x5432); + case 18: out1[3] = __byte_perm_S (in0[2], in0[3], 0x5432); + out1[2] = __byte_perm_S (in0[1], in0[2], 0x5432); + out1[1] = __byte_perm_S (in0[0], in0[1], 0x5432); + out1[0] = __byte_perm_S ( 0, in0[0], 0x5432); out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 19: out1[3] = __byte_perm (in0[2], in0[3], 0x4321); - out1[2] = __byte_perm (in0[1], in0[2], 0x4321); - out1[1] = __byte_perm (in0[0], in0[1], 0x4321); - out1[0] = __byte_perm ( 0, in0[0], 0x4321); + case 19: out1[3] = __byte_perm_S (in0[2], in0[3], 0x4321); + out1[2] = __byte_perm_S (in0[1], in0[2], 0x4321); + out1[1] = __byte_perm_S (in0[0], in0[1], 0x4321); + out1[0] = __byte_perm_S ( 0, in0[0], 0x4321); out0[3] = 0; out0[2] = 0; out0[1] = 0; @@ -960,27 +960,27 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 21: out1[3] = __byte_perm (in0[1], in0[2], 0x6543); - out1[2] = __byte_perm (in0[0], in0[1], 0x6543); - out1[1] = __byte_perm ( 0, in0[0], 0x6543); + case 21: out1[3] = __byte_perm_S (in0[1], in0[2], 0x6543); + out1[2] = __byte_perm_S (in0[0], in0[1], 0x6543); + out1[1] = __byte_perm_S ( 0, in0[0], 0x6543); out1[0] = 0; out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 22: out1[3] = __byte_perm (in0[1], in0[2], 0x5432); - out1[2] = __byte_perm (in0[0], in0[1], 0x5432); - out1[1] = __byte_perm ( 0, in0[0], 0x5432); + case 22: out1[3] = __byte_perm_S (in0[1], in0[2], 0x5432); + out1[2] = __byte_perm_S (in0[0], in0[1], 0x5432); + out1[1] = __byte_perm_S ( 0, in0[0], 0x5432); out1[0] = 0; out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 23: out1[3] = __byte_perm (in0[1], in0[2], 0x4321); - out1[2] = __byte_perm (in0[0], in0[1], 0x4321); - out1[1] = __byte_perm ( 0, in0[0], 0x4321); + case 23: out1[3] = __byte_perm_S (in0[1], in0[2], 0x4321); + out1[2] = __byte_perm_S (in0[0], in0[1], 0x4321); + out1[1] = __byte_perm_S ( 0, in0[0], 0x4321); out1[0] = 0; out0[3] = 0; out0[2] = 0; @@ -996,8 +996,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 25: out1[3] = __byte_perm (in0[0], in0[1], 0x6543); - out1[2] = __byte_perm ( 0, in0[0], 0x6543); + case 25: out1[3] = __byte_perm_S (in0[0], in0[1], 0x6543); + out1[2] = __byte_perm_S ( 0, in0[0], 0x6543); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1005,8 +1005,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 26: out1[3] = __byte_perm (in0[0], in0[1], 0x5432); - out1[2] = __byte_perm ( 0, in0[0], 0x5432); + case 26: out1[3] = __byte_perm_S (in0[0], in0[1], 0x5432); + out1[2] = __byte_perm_S ( 0, in0[0], 0x5432); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1014,8 +1014,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 27: out1[3] = __byte_perm (in0[0], in0[1], 0x4321); - out1[2] = __byte_perm ( 0, in0[0], 0x4321); + case 27: out1[3] = __byte_perm_S (in0[0], in0[1], 0x4321); + out1[2] = __byte_perm_S ( 0, in0[0], 0x4321); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1032,7 +1032,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 29: out1[3] = __byte_perm ( 0, in0[0], 0x6543); + case 29: out1[3] = __byte_perm_S ( 0, in0[0], 0x6543); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1041,7 +1041,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 30: out1[3] = __byte_perm ( 0, in0[0], 0x5432); + case 30: out1[3] = __byte_perm_S ( 0, in0[0], 0x5432); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1050,7 +1050,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 31: out1[3] = __byte_perm ( 0, in0[0], 0x4321); + case 31: out1[3] = __byte_perm_S ( 0, in0[0], 0x4321); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1074,32 +1074,32 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = in0[1]; out0[0] = in0[0]; break; - case 1: out1[3] = amd_bytealign (in1[3], in1[2], 3); - out1[2] = amd_bytealign (in1[2], in1[1], 3); - out1[1] = amd_bytealign (in1[1], in1[0], 3); - out1[0] = amd_bytealign (in1[0], in0[3], 3); - out0[3] = amd_bytealign (in0[3], in0[2], 3); - out0[2] = amd_bytealign (in0[2], in0[1], 3); - out0[1] = amd_bytealign (in0[1], in0[0], 3); - out0[0] = amd_bytealign (in0[0], 0, 3); + case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3); + out1[2] = amd_bytealign_S (in1[2], in1[1], 3); + out1[1] = amd_bytealign_S (in1[1], in1[0], 3); + out1[0] = amd_bytealign_S (in1[0], in0[3], 3); + out0[3] = amd_bytealign_S (in0[3], in0[2], 3); + out0[2] = amd_bytealign_S (in0[2], in0[1], 3); + out0[1] = amd_bytealign_S (in0[1], in0[0], 3); + out0[0] = amd_bytealign_S (in0[0], 0, 3); break; - case 2: out1[3] = amd_bytealign (in1[3], in1[2], 2); - out1[2] = amd_bytealign (in1[2], in1[1], 2); - out1[1] = amd_bytealign (in1[1], in1[0], 2); - out1[0] = amd_bytealign (in1[0], in0[3], 2); - out0[3] = amd_bytealign (in0[3], in0[2], 2); - out0[2] = amd_bytealign (in0[2], in0[1], 2); - out0[1] = amd_bytealign (in0[1], in0[0], 2); - out0[0] = amd_bytealign (in0[0], 0, 2); + case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2); + out1[2] = amd_bytealign_S (in1[2], in1[1], 2); + out1[1] = amd_bytealign_S (in1[1], in1[0], 2); + out1[0] = amd_bytealign_S (in1[0], in0[3], 2); + out0[3] = amd_bytealign_S (in0[3], in0[2], 2); + out0[2] = amd_bytealign_S (in0[2], in0[1], 2); + out0[1] = amd_bytealign_S (in0[1], in0[0], 2); + out0[0] = amd_bytealign_S (in0[0], 0, 2); break; - case 3: out1[3] = amd_bytealign (in1[3], in1[2], 1); - out1[2] = amd_bytealign (in1[2], in1[1], 1); - out1[1] = amd_bytealign (in1[1], in1[0], 1); - out1[0] = amd_bytealign (in1[0], in0[3], 1); - out0[3] = amd_bytealign (in0[3], in0[2], 1); - out0[2] = amd_bytealign (in0[2], in0[1], 1); - out0[1] = amd_bytealign (in0[1], in0[0], 1); - out0[0] = amd_bytealign (in0[0], 0, 1); + case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1); + out1[2] = amd_bytealign_S (in1[2], in1[1], 1); + out1[1] = amd_bytealign_S (in1[1], in1[0], 1); + out1[0] = amd_bytealign_S (in1[0], in0[3], 1); + out0[3] = amd_bytealign_S (in0[3], in0[2], 1); + out0[2] = amd_bytealign_S (in0[2], in0[1], 1); + out0[1] = amd_bytealign_S (in0[1], in0[0], 1); + out0[0] = amd_bytealign_S (in0[0], 0, 1); break; case 4: out1[3] = in1[2]; out1[2] = in1[1]; @@ -1110,31 +1110,31 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = in0[0]; out0[0] = 0; break; - case 5: out1[3] = amd_bytealign (in1[2], in1[1], 3); - out1[2] = amd_bytealign (in1[1], in1[0], 3); - out1[1] = amd_bytealign (in1[0], in0[3], 3); - out1[0] = amd_bytealign (in0[3], in0[2], 3); - out0[3] = amd_bytealign (in0[2], in0[1], 3); - out0[2] = amd_bytealign (in0[1], in0[0], 3); - out0[1] = amd_bytealign (in0[0], 0, 3); + case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3); + out1[2] = amd_bytealign_S (in1[1], in1[0], 3); + out1[1] = amd_bytealign_S (in1[0], in0[3], 3); + out1[0] = amd_bytealign_S (in0[3], in0[2], 3); + out0[3] = amd_bytealign_S (in0[2], in0[1], 3); + out0[2] = amd_bytealign_S (in0[1], in0[0], 3); + out0[1] = amd_bytealign_S (in0[0], 0, 3); out0[0] = 0; break; - case 6: out1[3] = amd_bytealign (in1[2], in1[1], 2); - out1[2] = amd_bytealign (in1[1], in1[0], 2); - out1[1] = amd_bytealign (in1[0], in0[3], 2); - out1[0] = amd_bytealign (in0[3], in0[2], 2); - out0[3] = amd_bytealign (in0[2], in0[1], 2); - out0[2] = amd_bytealign (in0[1], in0[0], 2); - out0[1] = amd_bytealign (in0[0], 0, 2); + case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2); + out1[2] = amd_bytealign_S (in1[1], in1[0], 2); + out1[1] = amd_bytealign_S (in1[0], in0[3], 2); + out1[0] = amd_bytealign_S (in0[3], in0[2], 2); + out0[3] = amd_bytealign_S (in0[2], in0[1], 2); + out0[2] = amd_bytealign_S (in0[1], in0[0], 2); + out0[1] = amd_bytealign_S (in0[0], 0, 2); out0[0] = 0; break; - case 7: out1[3] = amd_bytealign (in1[2], in1[1], 1); - out1[2] = amd_bytealign (in1[1], in1[0], 1); - out1[1] = amd_bytealign (in1[0], in0[3], 1); - out1[0] = amd_bytealign (in0[3], in0[2], 1); - out0[3] = amd_bytealign (in0[2], in0[1], 1); - out0[2] = amd_bytealign (in0[1], in0[0], 1); - out0[1] = amd_bytealign (in0[0], 0, 1); + case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1); + out1[2] = amd_bytealign_S (in1[1], in1[0], 1); + out1[1] = amd_bytealign_S (in1[0], in0[3], 1); + out1[0] = amd_bytealign_S (in0[3], in0[2], 1); + out0[3] = amd_bytealign_S (in0[2], in0[1], 1); + out0[2] = amd_bytealign_S (in0[1], in0[0], 1); + out0[1] = amd_bytealign_S (in0[0], 0, 1); out0[0] = 0; break; case 8: out1[3] = in1[1]; @@ -1146,30 +1146,30 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 9: out1[3] = amd_bytealign (in1[1], in1[0], 3); - out1[2] = amd_bytealign (in1[0], in0[3], 3); - out1[1] = amd_bytealign (in0[3], in0[2], 3); - out1[0] = amd_bytealign (in0[2], in0[1], 3); - out0[3] = amd_bytealign (in0[1], in0[0], 3); - out0[2] = amd_bytealign (in0[0], 0, 3); + case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3); + out1[2] = amd_bytealign_S (in1[0], in0[3], 3); + out1[1] = amd_bytealign_S (in0[3], in0[2], 3); + out1[0] = amd_bytealign_S (in0[2], in0[1], 3); + out0[3] = amd_bytealign_S (in0[1], in0[0], 3); + out0[2] = amd_bytealign_S (in0[0], 0, 3); out0[1] = 0; out0[0] = 0; break; - case 10: out1[3] = amd_bytealign (in1[1], in1[0], 2); - out1[2] = amd_bytealign (in1[0], in0[3], 2); - out1[1] = amd_bytealign (in0[3], in0[2], 2); - out1[0] = amd_bytealign (in0[2], in0[1], 2); - out0[3] = amd_bytealign (in0[1], in0[0], 2); - out0[2] = amd_bytealign (in0[0], 0, 2); + case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2); + out1[2] = amd_bytealign_S (in1[0], in0[3], 2); + out1[1] = amd_bytealign_S (in0[3], in0[2], 2); + out1[0] = amd_bytealign_S (in0[2], in0[1], 2); + out0[3] = amd_bytealign_S (in0[1], in0[0], 2); + out0[2] = amd_bytealign_S (in0[0], 0, 2); out0[1] = 0; out0[0] = 0; break; - case 11: out1[3] = amd_bytealign (in1[1], in1[0], 1); - out1[2] = amd_bytealign (in1[0], in0[3], 1); - out1[1] = amd_bytealign (in0[3], in0[2], 1); - out1[0] = amd_bytealign (in0[2], in0[1], 1); - out0[3] = amd_bytealign (in0[1], in0[0], 1); - out0[2] = amd_bytealign (in0[0], 0, 1); + case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1); + out1[2] = amd_bytealign_S (in1[0], in0[3], 1); + out1[1] = amd_bytealign_S (in0[3], in0[2], 1); + out1[0] = amd_bytealign_S (in0[2], in0[1], 1); + out0[3] = amd_bytealign_S (in0[1], in0[0], 1); + out0[2] = amd_bytealign_S (in0[0], 0, 1); out0[1] = 0; out0[0] = 0; break; @@ -1182,29 +1182,29 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 13: out1[3] = amd_bytealign (in1[0], in0[3], 3); - out1[2] = amd_bytealign (in0[3], in0[2], 3); - out1[1] = amd_bytealign (in0[2], in0[1], 3); - out1[0] = amd_bytealign (in0[1], in0[0], 3); - out0[3] = amd_bytealign (in0[0], 0, 3); + case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3); + out1[2] = amd_bytealign_S (in0[3], in0[2], 3); + out1[1] = amd_bytealign_S (in0[2], in0[1], 3); + out1[0] = amd_bytealign_S (in0[1], in0[0], 3); + out0[3] = amd_bytealign_S (in0[0], 0, 3); out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 14: out1[3] = amd_bytealign (in1[0], in0[3], 2); - out1[2] = amd_bytealign (in0[3], in0[2], 2); - out1[1] = amd_bytealign (in0[2], in0[1], 2); - out1[0] = amd_bytealign (in0[1], in0[0], 2); - out0[3] = amd_bytealign (in0[0], 0, 2); + case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2); + out1[2] = amd_bytealign_S (in0[3], in0[2], 2); + out1[1] = amd_bytealign_S (in0[2], in0[1], 2); + out1[0] = amd_bytealign_S (in0[1], in0[0], 2); + out0[3] = amd_bytealign_S (in0[0], 0, 2); out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 15: out1[3] = amd_bytealign (in1[0], in0[3], 1); - out1[2] = amd_bytealign (in0[3], in0[2], 1); - out1[1] = amd_bytealign (in0[2], in0[1], 1); - out1[0] = amd_bytealign (in0[1], in0[0], 1); - out0[3] = amd_bytealign (in0[0], 0, 1); + case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1); + out1[2] = amd_bytealign_S (in0[3], in0[2], 1); + out1[1] = amd_bytealign_S (in0[2], in0[1], 1); + out1[0] = amd_bytealign_S (in0[1], in0[0], 1); + out0[3] = amd_bytealign_S (in0[0], 0, 1); out0[2] = 0; out0[1] = 0; out0[0] = 0; @@ -1218,28 +1218,28 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 17: out1[3] = amd_bytealign (in0[3], in0[2], 3); - out1[2] = amd_bytealign (in0[2], in0[1], 3); - out1[1] = amd_bytealign (in0[1], in0[0], 3); - out1[0] = amd_bytealign (in0[0], 0, 3); + case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3); + out1[2] = amd_bytealign_S (in0[2], in0[1], 3); + out1[1] = amd_bytealign_S (in0[1], in0[0], 3); + out1[0] = amd_bytealign_S (in0[0], 0, 3); out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 18: out1[3] = amd_bytealign (in0[3], in0[2], 2); - out1[2] = amd_bytealign (in0[2], in0[1], 2); - out1[1] = amd_bytealign (in0[1], in0[0], 2); - out1[0] = amd_bytealign (in0[0], 0, 2); + case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2); + out1[2] = amd_bytealign_S (in0[2], in0[1], 2); + out1[1] = amd_bytealign_S (in0[1], in0[0], 2); + out1[0] = amd_bytealign_S (in0[0], 0, 2); out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 19: out1[3] = amd_bytealign (in0[3], in0[2], 1); - out1[2] = amd_bytealign (in0[2], in0[1], 1); - out1[1] = amd_bytealign (in0[1], in0[0], 1); - out1[0] = amd_bytealign (in0[0], 0, 1); + case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1); + out1[2] = amd_bytealign_S (in0[2], in0[1], 1); + out1[1] = amd_bytealign_S (in0[1], in0[0], 1); + out1[0] = amd_bytealign_S (in0[0], 0, 1); out0[3] = 0; out0[2] = 0; out0[1] = 0; @@ -1254,27 +1254,27 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 21: out1[3] = amd_bytealign (in0[2], in0[1], 3); - out1[2] = amd_bytealign (in0[1], in0[0], 3); - out1[1] = amd_bytealign (in0[0], 0, 3); + case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3); + out1[2] = amd_bytealign_S (in0[1], in0[0], 3); + out1[1] = amd_bytealign_S (in0[0], 0, 3); out1[0] = 0; out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 22: out1[3] = amd_bytealign (in0[2], in0[1], 2); - out1[2] = amd_bytealign (in0[1], in0[0], 2); - out1[1] = amd_bytealign (in0[0], 0, 2); + case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2); + out1[2] = amd_bytealign_S (in0[1], in0[0], 2); + out1[1] = amd_bytealign_S (in0[0], 0, 2); out1[0] = 0; out0[3] = 0; out0[2] = 0; out0[1] = 0; out0[0] = 0; break; - case 23: out1[3] = amd_bytealign (in0[2], in0[1], 1); - out1[2] = amd_bytealign (in0[1], in0[0], 1); - out1[1] = amd_bytealign (in0[0], 0, 1); + case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1); + out1[2] = amd_bytealign_S (in0[1], in0[0], 1); + out1[1] = amd_bytealign_S (in0[0], 0, 1); out1[0] = 0; out0[3] = 0; out0[2] = 0; @@ -1290,8 +1290,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 25: out1[3] = amd_bytealign (in0[1], in0[0], 3); - out1[2] = amd_bytealign (in0[0], 0, 3); + case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3); + out1[2] = amd_bytealign_S (in0[0], 0, 3); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1299,8 +1299,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 26: out1[3] = amd_bytealign (in0[1], in0[0], 2); - out1[2] = amd_bytealign (in0[0], 0, 2); + case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2); + out1[2] = amd_bytealign_S (in0[0], 0, 2); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1308,8 +1308,8 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 27: out1[3] = amd_bytealign (in0[1], in0[0], 1); - out1[2] = amd_bytealign (in0[0], 0, 1); + case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1); + out1[2] = amd_bytealign_S (in0[0], 0, 1); out1[1] = 0; out1[0] = 0; out0[3] = 0; @@ -1326,7 +1326,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 29: out1[3] = amd_bytealign (in0[0], 0, 3); + case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1335,7 +1335,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 30: out1[3] = amd_bytealign (in0[0], 0, 2); + case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1344,7 +1344,7 @@ static void rshift_block_N (const u32 in0[4], const u32 in1[4], u32 out0[4], u32 out0[1] = 0; out0[0] = 0; break; - case 31: out1[3] = amd_bytealign (in0[0], 0, 1); + case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1); out1[2] = 0; out1[1] = 0; out1[0] = 0; @@ -1422,36 +1422,36 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 1: - dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x6540); - dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543); - dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543); - dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x6543); - dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x6543); - dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x6543); - dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x6543); + dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x6540); + dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); + dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); + dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); + dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); + dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543); + dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x6543); break; case 2: - dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x5410); - dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432); - dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432); - dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x5432); - dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x5432); - dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x5432); - dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x5432); + dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x5410); + dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); + dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); + dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); + dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); + dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432); + dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x5432); break; case 3: - dst0[0] = __byte_perm (src_l0[0], src_r0[0], 0x4210); - dst0[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst0[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321); - dst0[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321); - dst1[0] = __byte_perm (src_r0[3], src_r1[0], 0x4321); - dst1[1] = __byte_perm (src_r1[0], src_r1[1], 0x4321); - dst1[2] = __byte_perm (src_r1[1], src_r1[2], 0x4321); - dst1[3] = __byte_perm (src_r1[2], src_r1[3], 0x4321); + dst0[0] = __byte_perm_S (src_l0[0], src_r0[0], 0x4210); + dst0[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst0[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); + dst0[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); + dst1[0] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); + dst1[1] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); + dst1[2] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321); + dst1[3] = __byte_perm_S (src_r1[2], src_r1[3], 0x4321); break; case 4: @@ -1465,33 +1465,33 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 5: - dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x6540); - dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543); - dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x6543); - dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x6543); - dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x6543); - dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x6543); + dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x6540); + dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); + dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); + dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); + dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); + dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x6543); break; case 6: - dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x5410); - dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432); - dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x5432); - dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x5432); - dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x5432); - dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x5432); + dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x5410); + dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); + dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); + dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); + dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); + dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x5432); break; case 7: - dst0[1] = __byte_perm (src_l0[1], src_r0[0], 0x4210); - dst0[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst0[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321); - dst1[0] = __byte_perm (src_r0[2], src_r0[3], 0x4321); - dst1[1] = __byte_perm (src_r0[3], src_r1[0], 0x4321); - dst1[2] = __byte_perm (src_r1[0], src_r1[1], 0x4321); - dst1[3] = __byte_perm (src_r1[1], src_r1[2], 0x4321); + dst0[1] = __byte_perm_S (src_l0[1], src_r0[0], 0x4210); + dst0[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst0[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); + dst1[0] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); + dst1[1] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); + dst1[2] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); + dst1[3] = __byte_perm_S (src_r1[1], src_r1[2], 0x4321); break; case 8: @@ -1504,30 +1504,30 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 9: - dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x6540); - dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x6543); - dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x6543); - dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x6543); - dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x6543); + dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x6540); + dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); + dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); + dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); + dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x6543); break; case 10: - dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x5410); - dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x5432); - dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x5432); - dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x5432); - dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x5432); + dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x5410); + dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); + dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); + dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); + dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x5432); break; case 11: - dst0[2] = __byte_perm (src_l0[2], src_r0[0], 0x4210); - dst0[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst1[0] = __byte_perm (src_r0[1], src_r0[2], 0x4321); - dst1[1] = __byte_perm (src_r0[2], src_r0[3], 0x4321); - dst1[2] = __byte_perm (src_r0[3], src_r1[0], 0x4321); - dst1[3] = __byte_perm (src_r1[0], src_r1[1], 0x4321); + dst0[2] = __byte_perm_S (src_l0[2], src_r0[0], 0x4210); + dst0[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst1[0] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); + dst1[1] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); + dst1[2] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); + dst1[3] = __byte_perm_S (src_r1[0], src_r1[1], 0x4321); break; case 12: @@ -1539,27 +1539,27 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 13: - dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x6540); - dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x6543); - dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x6543); - dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x6543); + dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x6540); + dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); + dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); + dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x6543); break; case 14: - dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x5410); - dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x5432); - dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x5432); - dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x5432); + dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x5410); + dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); + dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); + dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x5432); break; case 15: - dst0[3] = __byte_perm (src_l0[3], src_r0[0], 0x4210); - dst1[0] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst1[1] = __byte_perm (src_r0[1], src_r0[2], 0x4321); - dst1[2] = __byte_perm (src_r0[2], src_r0[3], 0x4321); - dst1[3] = __byte_perm (src_r0[3], src_r1[0], 0x4321); + dst0[3] = __byte_perm_S (src_l0[3], src_r0[0], 0x4210); + dst1[0] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst1[1] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); + dst1[2] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); + dst1[3] = __byte_perm_S (src_r0[3], src_r1[0], 0x4321); break; case 16: @@ -1570,24 +1570,24 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 17: - dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x6540); - dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x6543); - dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x6543); + dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x6540); + dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); + dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x6543); break; case 18: - dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x5410); - dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x5432); - dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x5432); + dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x5410); + dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); + dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x5432); break; case 19: - dst1[0] = __byte_perm (src_l1[0], src_r0[0], 0x4210); - dst1[1] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst1[2] = __byte_perm (src_r0[1], src_r0[2], 0x4321); - dst1[3] = __byte_perm (src_r0[2], src_r0[3], 0x4321); + dst1[0] = __byte_perm_S (src_l1[0], src_r0[0], 0x4210); + dst1[1] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst1[2] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); + dst1[3] = __byte_perm_S (src_r0[2], src_r0[3], 0x4321); break; case 20: @@ -1597,21 +1597,21 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 21: - dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x6540); - dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x6543); - dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x6543); + dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x6540); + dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); + dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x6543); break; case 22: - dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x5410); - dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x5432); - dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x5432); + dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x5410); + dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); + dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x5432); break; case 23: - dst1[1] = __byte_perm (src_l1[1], src_r0[0], 0x4210); - dst1[2] = __byte_perm (src_r0[0], src_r0[1], 0x4321); - dst1[3] = __byte_perm (src_r0[1], src_r0[2], 0x4321); + dst1[1] = __byte_perm_S (src_l1[1], src_r0[0], 0x4210); + dst1[2] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); + dst1[3] = __byte_perm_S (src_r0[1], src_r0[2], 0x4321); break; case 24: @@ -1620,18 +1620,18 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 25: - dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x6540); - dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x6543); + dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x6540); + dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x6543); break; case 26: - dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x5410); - dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x5432); + dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x5410); + dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x5432); break; case 27: - dst1[2] = __byte_perm (src_l1[2], src_r0[0], 0x4210); - dst1[3] = __byte_perm (src_r0[0], src_r0[1], 0x4321); + dst1[2] = __byte_perm_S (src_l1[2], src_r0[0], 0x4210); + dst1[3] = __byte_perm_S (src_r0[0], src_r0[1], 0x4321); break; case 28: @@ -1639,15 +1639,15 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 break; case 29: - dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x6540); + dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x6540); break; case 30: - dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x5410); + dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x5410); break; case 31: - dst1[3] = __byte_perm (src_l1[3], src_r0[0], 0x4210); + dst1[3] = __byte_perm_S (src_l1[3], src_r0[0], 0x4210); break; } #endif @@ -1668,15 +1668,15 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst1[3] = src_r0[0]; break; case 27: - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst1[2] = src_l1[2] | src_r0[0] << 24; break; case 26: - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst1[2] = src_l1[2] | src_r0[0] << 16; break; case 25: - dst1[3] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst1[2] = src_l1[2] | src_r0[0] << 8; break; case 24: @@ -1684,18 +1684,18 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst1[2] = src_r0[0]; break; case 23: - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst1[1] = src_l1[1] | src_r0[0] << 24; break; case 22: - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst1[1] = src_l1[1] | src_r0[0] << 16; break; case 21: - dst1[3] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[2] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst1[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst1[1] = src_l1[1] | src_r0[0] << 8; break; case 20: @@ -1704,21 +1704,21 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst1[1] = src_r0[0]; break; case 19: - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1); + dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst1[0] = src_l1[0] | src_r0[0] << 24; break; case 18: - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2); + dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst1[0] = src_l1[0] | src_r0[0] << 16; break; case 17: - dst1[3] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[2] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[1] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3); + dst1[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst1[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst1[0] = src_l1[0] | src_r0[0] << 8; break; case 16: @@ -1728,24 +1728,24 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst1[0] = src_r0[0]; break; case 15: - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 1); + dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 1); + dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst0[3] = src_l0[3] | src_r0[0] << 24; break; case 14: - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 2); + dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 2); + dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst0[3] = src_l0[3] | src_r0[0] << 16; break; case 13: - dst1[3] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[2] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[1] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst1[0] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r1[0], src_r0[3], 3); + dst1[2] = amd_bytealign_S (src_r0[3], src_r0[2], 3); + dst1[1] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst1[0] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst0[3] = src_l0[3] | src_r0[0] << 8; break; case 12: @@ -1756,27 +1756,27 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst0[3] = src_r0[0]; break; case 11: - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 1); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 1); + dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 1); + dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 1); + dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst0[2] = src_l0[2] | src_r0[0] << 24; break; case 10: - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 2); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 2); + dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 2); + dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 2); + dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst0[2] = src_l0[2] | src_r0[0] << 16; break; case 9: - dst1[3] = amd_bytealign (src_r1[1], src_r1[0], 3); - dst1[2] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[1] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst1[0] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst0[3] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r1[1], src_r1[0], 3); + dst1[2] = amd_bytealign_S (src_r1[0], src_r0[3], 3); + dst1[1] = amd_bytealign_S (src_r0[3], src_r0[2], 3); + dst1[0] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst0[3] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst0[2] = src_l0[2] | src_r0[0] << 8; break; case 8: @@ -1788,30 +1788,30 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst0[2] = src_r0[0]; break; case 7: - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 1); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 1); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 1); + dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 1); + dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 1); + dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 1); + dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst0[1] = src_l0[1] | src_r0[0] << 24; break; case 6: - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 2); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 2); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 2); + dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 2); + dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 2); + dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 2); + dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst0[1] = src_l0[1] | src_r0[0] << 16; break; case 5: - dst1[3] = amd_bytealign (src_r1[2], src_r1[1], 3); - dst1[2] = amd_bytealign (src_r1[1], src_r1[0], 3); - dst1[1] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst1[0] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst0[3] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst0[2] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r1[2], src_r1[1], 3); + dst1[2] = amd_bytealign_S (src_r1[1], src_r1[0], 3); + dst1[1] = amd_bytealign_S (src_r1[0], src_r0[3], 3); + dst1[0] = amd_bytealign_S (src_r0[3], src_r0[2], 3); + dst0[3] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst0[2] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst0[1] = src_l0[1] | src_r0[0] << 8; break; case 4: @@ -1824,33 +1824,33 @@ static void append_block8 (const u32 offset, u32 dst0[4], u32 dst1[4], const u32 dst0[1] = src_r0[0]; break; case 3: - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 1); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 1); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 1); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 1); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 1); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 1); - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 1); + dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 1); + dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 1); + dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 1); + dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 1); + dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 1); + dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 1); + dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 1); dst0[0] = src_l0[0] | src_r0[0] << 24; break; case 2: - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 2); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 2); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 2); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 2); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 2); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 2); - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 2); + dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 2); + dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 2); + dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 2); + dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 2); + dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 2); + dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 2); + dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 2); dst0[0] = src_l0[0] | src_r0[0] << 16; break; case 1: - dst1[3] = amd_bytealign (src_r1[3], src_r1[2], 3); - dst1[2] = amd_bytealign (src_r1[2], src_r1[1], 3); - dst1[1] = amd_bytealign (src_r1[1], src_r1[0], 3); - dst1[0] = amd_bytealign (src_r1[0], src_r0[3], 3); - dst0[3] = amd_bytealign (src_r0[3], src_r0[2], 3); - dst0[2] = amd_bytealign (src_r0[2], src_r0[1], 3); - dst0[1] = amd_bytealign (src_r0[1], src_r0[0], 3); + dst1[3] = amd_bytealign_S (src_r1[3], src_r1[2], 3); + dst1[2] = amd_bytealign_S (src_r1[2], src_r1[1], 3); + dst1[1] = amd_bytealign_S (src_r1[1], src_r1[0], 3); + dst1[0] = amd_bytealign_S (src_r1[0], src_r0[3], 3); + dst0[3] = amd_bytealign_S (src_r0[3], src_r0[2], 3); + dst0[2] = amd_bytealign_S (src_r0[2], src_r0[1], 3); + dst0[1] = amd_bytealign_S (src_r0[1], src_r0[0], 3); dst0[0] = src_l0[0] | src_r0[0] << 8; break; case 0: @@ -1883,14 +1883,14 @@ static void reverse_block (u32 in0[4], u32 in1[4], u32 out0[4], u32 out1[4], con tib41[2] = out0[1]; tib41[3] = out0[0]; - out0[0] = swap32 (tib40[0]); - out0[1] = swap32 (tib40[1]); - out0[2] = swap32 (tib40[2]); - out0[3] = swap32 (tib40[3]); - out1[0] = swap32 (tib41[0]); - out1[1] = swap32 (tib41[1]); - out1[2] = swap32 (tib41[2]); - out1[3] = swap32 (tib41[3]); + out0[0] = swap32_S (tib40[0]); + out0[1] = swap32_S (tib40[1]); + out0[2] = swap32_S (tib40[2]); + out0[3] = swap32_S (tib40[3]); + out1[0] = swap32_S (tib41[0]); + out1[1] = swap32_S (tib41[1]); + out1[2] = swap32_S (tib41[2]); + out1[3] = swap32_S (tib41[3]); } static u32 rule_op_mangle_lrest (const u32 p0, const u32 p1, u32 buf0[4], u32 buf1[4], const u32 in_len) @@ -2447,69 +2447,69 @@ static u32 rule_op_mangle_replace (const u32 p0, const u32 p1, u32 buf0[4], u32 { switch (i) { - case 0: if ((__byte_perm (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7650); + case 0: if ((__byte_perm_S (buf0[0], 0, 0x6540)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7650); break; - case 1: if ((__byte_perm (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7604); + case 1: if ((__byte_perm_S (buf0[0], 0, 0x6541)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7604); break; - case 2: if ((__byte_perm (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x7054); + case 2: if ((__byte_perm_S (buf0[0], 0, 0x6542)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x7054); break; - case 3: if ((__byte_perm (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm (p1, buf0[0], 0x0654); + case 3: if ((__byte_perm_S (buf0[0], 0, 0x6543)) == p0) buf0[0] = __byte_perm_S (p1, buf0[0], 0x0654); break; - case 4: if ((__byte_perm (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7650); + case 4: if ((__byte_perm_S (buf0[1], 0, 0x6540)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7650); break; - case 5: if ((__byte_perm (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7604); + case 5: if ((__byte_perm_S (buf0[1], 0, 0x6541)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7604); break; - case 6: if ((__byte_perm (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x7054); + case 6: if ((__byte_perm_S (buf0[1], 0, 0x6542)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x7054); break; - case 7: if ((__byte_perm (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm (p1, buf0[1], 0x0654); + case 7: if ((__byte_perm_S (buf0[1], 0, 0x6543)) == p0) buf0[1] = __byte_perm_S (p1, buf0[1], 0x0654); break; - case 8: if ((__byte_perm (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7650); + case 8: if ((__byte_perm_S (buf0[2], 0, 0x6540)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7650); break; - case 9: if ((__byte_perm (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7604); + case 9: if ((__byte_perm_S (buf0[2], 0, 0x6541)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7604); break; - case 10: if ((__byte_perm (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x7054); + case 10: if ((__byte_perm_S (buf0[2], 0, 0x6542)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x7054); break; - case 11: if ((__byte_perm (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm (p1, buf0[2], 0x0654); + case 11: if ((__byte_perm_S (buf0[2], 0, 0x6543)) == p0) buf0[2] = __byte_perm_S (p1, buf0[2], 0x0654); break; - case 12: if ((__byte_perm (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7650); + case 12: if ((__byte_perm_S (buf0[3], 0, 0x6540)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7650); break; - case 13: if ((__byte_perm (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7604); + case 13: if ((__byte_perm_S (buf0[3], 0, 0x6541)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7604); break; - case 14: if ((__byte_perm (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x7054); + case 14: if ((__byte_perm_S (buf0[3], 0, 0x6542)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x7054); break; - case 15: if ((__byte_perm (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm (p1, buf0[3], 0x0654); + case 15: if ((__byte_perm_S (buf0[3], 0, 0x6543)) == p0) buf0[3] = __byte_perm_S (p1, buf0[3], 0x0654); break; - case 16: if ((__byte_perm (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7650); + case 16: if ((__byte_perm_S (buf1[0], 0, 0x6540)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7650); break; - case 17: if ((__byte_perm (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7604); + case 17: if ((__byte_perm_S (buf1[0], 0, 0x6541)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7604); break; - case 18: if ((__byte_perm (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x7054); + case 18: if ((__byte_perm_S (buf1[0], 0, 0x6542)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x7054); break; - case 19: if ((__byte_perm (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm (p1, buf1[0], 0x0654); + case 19: if ((__byte_perm_S (buf1[0], 0, 0x6543)) == p0) buf1[0] = __byte_perm_S (p1, buf1[0], 0x0654); break; - case 20: if ((__byte_perm (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7650); + case 20: if ((__byte_perm_S (buf1[1], 0, 0x6540)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7650); break; - case 21: if ((__byte_perm (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7604); + case 21: if ((__byte_perm_S (buf1[1], 0, 0x6541)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7604); break; - case 22: if ((__byte_perm (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x7054); + case 22: if ((__byte_perm_S (buf1[1], 0, 0x6542)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x7054); break; - case 23: if ((__byte_perm (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm (p1, buf1[1], 0x0654); + case 23: if ((__byte_perm_S (buf1[1], 0, 0x6543)) == p0) buf1[1] = __byte_perm_S (p1, buf1[1], 0x0654); break; - case 24: if ((__byte_perm (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7650); + case 24: if ((__byte_perm_S (buf1[2], 0, 0x6540)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7650); break; - case 25: if ((__byte_perm (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7604); + case 25: if ((__byte_perm_S (buf1[2], 0, 0x6541)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7604); break; - case 26: if ((__byte_perm (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x7054); + case 26: if ((__byte_perm_S (buf1[2], 0, 0x6542)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x7054); break; - case 27: if ((__byte_perm (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm (p1, buf1[2], 0x0654); + case 27: if ((__byte_perm_S (buf1[2], 0, 0x6543)) == p0) buf1[2] = __byte_perm_S (p1, buf1[2], 0x0654); break; - case 28: if ((__byte_perm (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7650); + case 28: if ((__byte_perm_S (buf1[3], 0, 0x6540)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7650); break; - case 29: if ((__byte_perm (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7604); + case 29: if ((__byte_perm_S (buf1[3], 0, 0x6541)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7604); break; - case 30: if ((__byte_perm (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x7054); + case 30: if ((__byte_perm_S (buf1[3], 0, 0x6542)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x7054); break; - case 31: if ((__byte_perm (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm (p1, buf1[3], 0x0654); + case 31: if ((__byte_perm_S (buf1[3], 0, 0x6543)) == p0) buf1[3] = __byte_perm_S (p1, buf1[3], 0x0654); break; } } @@ -2562,170 +2562,170 @@ static u32 rule_op_mangle_dupechar_first (const u32 p0, const u32 p1, u32 buf0[4 { case 1: buf0[0] |= tmp; break; - case 2: buf0[0] |= __byte_perm (tmp, 0, 0x5400); + case 2: buf0[0] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 3: buf0[0] |= __byte_perm (tmp, 0, 0x4000); + case 3: buf0[0] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 4: buf0[0] |= __byte_perm (tmp, 0, 0x0000); + case 4: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 5: buf0[0] |= __byte_perm (tmp, 0, 0x0000); + case 5: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); buf0[1] |= tmp; break; - case 6: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x5400); + case 6: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 7: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x4000); + case 7: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 8: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); + case 8: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 9: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); + case 9: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); buf0[2] |= tmp; break; - case 10: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x5400); + case 10: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 11: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x4000); + case 11: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 12: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); + case 12: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 13: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); + case 13: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); buf0[3] |= tmp; break; - case 14: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x5400); + case 14: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 15: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x4000); + case 15: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 16: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); + case 16: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 17: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); + case 17: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); buf1[0] |= tmp; break; - case 18: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x5400); + case 18: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 19: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x4000); + case 19: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 20: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); + case 20: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 21: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); + case 21: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); buf1[1] |= tmp; break; - case 22: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x5400); + case 22: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 23: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x4000); + case 23: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 24: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); + case 24: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 25: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); + case 25: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); buf1[2] |= tmp; break; - case 26: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x5400); + case 26: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 27: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x4000); + case 27: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x4000); break; - case 28: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x0000); + case 28: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); break; - case 29: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x0000); + case 29: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); buf1[3] |= tmp; break; - case 30: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x0000); - buf1[3] |= __byte_perm (tmp, 0, 0x5400); + case 30: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[3] |= __byte_perm_S (tmp, 0, 0x5400); break; - case 31: buf0[0] |= __byte_perm (tmp, 0, 0x0000); - buf0[1] |= __byte_perm (tmp, 0, 0x0000); - buf0[2] |= __byte_perm (tmp, 0, 0x0000); - buf0[3] |= __byte_perm (tmp, 0, 0x0000); - buf1[0] |= __byte_perm (tmp, 0, 0x0000); - buf1[1] |= __byte_perm (tmp, 0, 0x0000); - buf1[2] |= __byte_perm (tmp, 0, 0x0000); - buf1[3] |= __byte_perm (tmp, 0, 0x4000); + case 31: buf0[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf0[3] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[0] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[1] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[2] |= __byte_perm_S (tmp, 0, 0x0000); + buf1[3] |= __byte_perm_S (tmp, 0, 0x4000); break; } #endif @@ -2954,14 +2954,14 @@ static u32 rule_op_mangle_dupechar_all (const u32 p0, const u32 p1, u32 buf0[4], u32 tib41[4]; #ifdef IS_NV - tib40[0] = __byte_perm (buf0[0], 0, 0x1100); - tib40[1] = __byte_perm (buf0[0], 0, 0x3322); - tib40[2] = __byte_perm (buf0[1], 0, 0x1100); - tib40[3] = __byte_perm (buf0[1], 0, 0x3322); - tib41[0] = __byte_perm (buf0[2], 0, 0x1100); - tib41[1] = __byte_perm (buf0[2], 0, 0x3322); - tib41[2] = __byte_perm (buf0[3], 0, 0x1100); - tib41[3] = __byte_perm (buf0[3], 0, 0x3322); + tib40[0] = __byte_perm_S (buf0[0], 0, 0x1100); + tib40[1] = __byte_perm_S (buf0[0], 0, 0x3322); + tib40[2] = __byte_perm_S (buf0[1], 0, 0x1100); + tib40[3] = __byte_perm_S (buf0[1], 0, 0x3322); + tib41[0] = __byte_perm_S (buf0[2], 0, 0x1100); + tib41[1] = __byte_perm_S (buf0[2], 0, 0x3322); + tib41[2] = __byte_perm_S (buf0[3], 0, 0x1100); + tib41[3] = __byte_perm_S (buf0[3], 0, 0x3322); buf0[0] = tib40[0]; buf0[1] = tib40[1]; @@ -3003,7 +3003,7 @@ static u32 rule_op_mangle_switch_first (const u32 p0, const u32 p1, u32 buf0[4], if (in_len < 2) return (in_len); #ifdef IS_NV - buf0[0] = __byte_perm (buf0[0], 0, 0x3201); + buf0[0] = __byte_perm_S (buf0[0], 0, 0x3201); #endif #if defined IS_AMD || defined IS_GENERIC @@ -3020,79 +3020,79 @@ static u32 rule_op_mangle_switch_last (const u32 p0, const u32 p1, u32 buf0[4], #ifdef IS_NV switch (in_len) { - case 2: buf0[0] = __byte_perm (buf0[0], 0, 0x5401); + case 2: buf0[0] = __byte_perm_S (buf0[0], 0, 0x5401); break; - case 3: buf0[0] = __byte_perm (buf0[0], 0, 0x4120); + case 3: buf0[0] = __byte_perm_S (buf0[0], 0, 0x4120); break; - case 4: buf0[0] = __byte_perm (buf0[0], 0, 0x2310); + case 4: buf0[0] = __byte_perm_S (buf0[0], 0, 0x2310); break; - case 5: buf0[1] = __byte_perm (buf0[1], buf0[0], 0x7210); - buf0[0] = __byte_perm (buf0[0], buf0[1], 0x4210); - buf0[1] = __byte_perm (buf0[1], 0, 0x6543); + case 5: buf0[1] = __byte_perm_S (buf0[1], buf0[0], 0x7210); + buf0[0] = __byte_perm_S (buf0[0], buf0[1], 0x4210); + buf0[1] = __byte_perm_S (buf0[1], 0, 0x6543); break; - case 6: buf0[1] = __byte_perm (buf0[1], 0, 0x5401); + case 6: buf0[1] = __byte_perm_S (buf0[1], 0, 0x5401); break; - case 7: buf0[1] = __byte_perm (buf0[1], 0, 0x4120); + case 7: buf0[1] = __byte_perm_S (buf0[1], 0, 0x4120); break; - case 8: buf0[1] = __byte_perm (buf0[1], 0, 0x2310); + case 8: buf0[1] = __byte_perm_S (buf0[1], 0, 0x2310); break; - case 9: buf0[2] = __byte_perm (buf0[2], buf0[1], 0x7210); - buf0[1] = __byte_perm (buf0[1], buf0[2], 0x4210); - buf0[2] = __byte_perm (buf0[2], 0, 0x6543); + case 9: buf0[2] = __byte_perm_S (buf0[2], buf0[1], 0x7210); + buf0[1] = __byte_perm_S (buf0[1], buf0[2], 0x4210); + buf0[2] = __byte_perm_S (buf0[2], 0, 0x6543); break; - case 10: buf0[2] = __byte_perm (buf0[2], 0, 0x5401); + case 10: buf0[2] = __byte_perm_S (buf0[2], 0, 0x5401); break; - case 11: buf0[2] = __byte_perm (buf0[2], 0, 0x4120); + case 11: buf0[2] = __byte_perm_S (buf0[2], 0, 0x4120); break; - case 12: buf0[2] = __byte_perm (buf0[2], 0, 0x2310); + case 12: buf0[2] = __byte_perm_S (buf0[2], 0, 0x2310); break; - case 13: buf0[3] = __byte_perm (buf0[3], buf0[2], 0x7210); - buf0[2] = __byte_perm (buf0[2], buf0[3], 0x4210); - buf0[3] = __byte_perm (buf0[3], 0, 0x6543); + case 13: buf0[3] = __byte_perm_S (buf0[3], buf0[2], 0x7210); + buf0[2] = __byte_perm_S (buf0[2], buf0[3], 0x4210); + buf0[3] = __byte_perm_S (buf0[3], 0, 0x6543); break; - case 14: buf0[3] = __byte_perm (buf0[3], 0, 0x5401); + case 14: buf0[3] = __byte_perm_S (buf0[3], 0, 0x5401); break; - case 15: buf0[3] = __byte_perm (buf0[3], 0, 0x4120); + case 15: buf0[3] = __byte_perm_S (buf0[3], 0, 0x4120); break; - case 16: buf0[3] = __byte_perm (buf0[3], 0, 0x2310); + case 16: buf0[3] = __byte_perm_S (buf0[3], 0, 0x2310); break; - case 17: buf1[0] = __byte_perm (buf1[0], buf0[3], 0x7210); - buf0[3] = __byte_perm (buf0[3], buf1[0], 0x4210); - buf1[0] = __byte_perm (buf1[0], 0, 0x6543); + case 17: buf1[0] = __byte_perm_S (buf1[0], buf0[3], 0x7210); + buf0[3] = __byte_perm_S (buf0[3], buf1[0], 0x4210); + buf1[0] = __byte_perm_S (buf1[0], 0, 0x6543); break; - case 18: buf1[0] = __byte_perm (buf1[0], 0, 0x5401); + case 18: buf1[0] = __byte_perm_S (buf1[0], 0, 0x5401); break; - case 19: buf1[0] = __byte_perm (buf1[0], 0, 0x4120); + case 19: buf1[0] = __byte_perm_S (buf1[0], 0, 0x4120); break; - case 20: buf1[0] = __byte_perm (buf1[0], 0, 0x2310); + case 20: buf1[0] = __byte_perm_S (buf1[0], 0, 0x2310); break; - case 21: buf1[1] = __byte_perm (buf1[1], buf1[0], 0x7210); - buf1[0] = __byte_perm (buf1[0], buf1[1], 0x4210); - buf1[1] = __byte_perm (buf1[1], 0, 0x6543); + case 21: buf1[1] = __byte_perm_S (buf1[1], buf1[0], 0x7210); + buf1[0] = __byte_perm_S (buf1[0], buf1[1], 0x4210); + buf1[1] = __byte_perm_S (buf1[1], 0, 0x6543); break; - case 22: buf1[1] = __byte_perm (buf1[1], 0, 0x5401); + case 22: buf1[1] = __byte_perm_S (buf1[1], 0, 0x5401); break; - case 23: buf1[1] = __byte_perm (buf1[1], 0, 0x4120); + case 23: buf1[1] = __byte_perm_S (buf1[1], 0, 0x4120); break; - case 24: buf1[1] = __byte_perm (buf1[1], 0, 0x2310); + case 24: buf1[1] = __byte_perm_S (buf1[1], 0, 0x2310); break; - case 25: buf1[2] = __byte_perm (buf1[2], buf1[1], 0x7210); - buf1[1] = __byte_perm (buf1[1], buf1[2], 0x4210); - buf1[2] = __byte_perm (buf1[2], 0, 0x6543); + case 25: buf1[2] = __byte_perm_S (buf1[2], buf1[1], 0x7210); + buf1[1] = __byte_perm_S (buf1[1], buf1[2], 0x4210); + buf1[2] = __byte_perm_S (buf1[2], 0, 0x6543); break; - case 26: buf1[2] = __byte_perm (buf1[2], 0, 0x5401); + case 26: buf1[2] = __byte_perm_S (buf1[2], 0, 0x5401); break; - case 27: buf1[2] = __byte_perm (buf1[2], 0, 0x4120); + case 27: buf1[2] = __byte_perm_S (buf1[2], 0, 0x4120); break; - case 28: buf1[2] = __byte_perm (buf1[2], 0, 0x2310); + case 28: buf1[2] = __byte_perm_S (buf1[2], 0, 0x2310); break; - case 29: buf1[3] = __byte_perm (buf1[3], buf1[2], 0x7210); - buf1[2] = __byte_perm (buf1[2], buf1[3], 0x4210); - buf1[3] = __byte_perm (buf1[3], 0, 0x6543); + case 29: buf1[3] = __byte_perm_S (buf1[3], buf1[2], 0x7210); + buf1[2] = __byte_perm_S (buf1[2], buf1[3], 0x4210); + buf1[3] = __byte_perm_S (buf1[3], 0, 0x6543); break; - case 30: buf1[3] = __byte_perm (buf1[3], 0, 0x5401); + case 30: buf1[3] = __byte_perm_S (buf1[3], 0, 0x5401); break; - case 31: buf1[3] = __byte_perm (buf1[3], 0, 0x4120); + case 31: buf1[3] = __byte_perm_S (buf1[3], 0, 0x4120); break; } #endif @@ -3191,237 +3191,237 @@ static u32 rule_op_mangle_switch_at (const u32 p0, const u32 p1, u32 buf0[4], u3 #ifdef IS_NV switch (p0) { - case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540); + case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540); break; - case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541); + case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541); break; - case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542); + case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542); break; - case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543); + case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543); break; - case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540); + case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540); break; - case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541); + case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541); break; - case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542); + case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542); break; - case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543); + case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543); break; - case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540); + case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540); break; - case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541); + case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541); break; - case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542); + case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542); break; - case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543); + case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543); break; - case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540); + case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540); break; - case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541); + case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541); break; - case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542); + case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542); break; - case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543); + case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543); break; - case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540); + case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540); break; - case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541); + case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541); break; - case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542); + case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542); break; - case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543); + case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543); break; - case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540); + case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540); break; - case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541); + case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541); break; - case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542); + case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542); break; - case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543); + case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543); break; - case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540); + case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540); break; - case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541); + case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541); break; - case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542); + case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542); break; - case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543); + case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543); break; - case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540); + case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540); break; - case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541); + case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541); break; - case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542); + case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542); break; - case 31: tmp0 = __byte_perm (buf1[3], 0, 0x6543); + case 31: tmp0 = __byte_perm_S (buf1[3], 0, 0x6543); break; } switch (p1) { - case 0: tmp1 = __byte_perm (buf0[0], 0, 0x6540); - buf0[0] = __byte_perm (tmp0, buf0[0], 0x7650); + case 0: tmp1 = __byte_perm_S (buf0[0], 0, 0x6540); + buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7650); break; - case 1: tmp1 = __byte_perm (buf0[0], 0, 0x6541); - buf0[0] = __byte_perm (tmp0, buf0[0], 0x7604); + case 1: tmp1 = __byte_perm_S (buf0[0], 0, 0x6541); + buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7604); break; - case 2: tmp1 = __byte_perm (buf0[0], 0, 0x6542); - buf0[0] = __byte_perm (tmp0, buf0[0], 0x7054); + case 2: tmp1 = __byte_perm_S (buf0[0], 0, 0x6542); + buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x7054); break; - case 3: tmp1 = __byte_perm (buf0[0], 0, 0x6543); - buf0[0] = __byte_perm (tmp0, buf0[0], 0x0654); + case 3: tmp1 = __byte_perm_S (buf0[0], 0, 0x6543); + buf0[0] = __byte_perm_S (tmp0, buf0[0], 0x0654); break; - case 4: tmp1 = __byte_perm (buf0[1], 0, 0x6540); - buf0[1] = __byte_perm (tmp0, buf0[1], 0x7650); + case 4: tmp1 = __byte_perm_S (buf0[1], 0, 0x6540); + buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7650); break; - case 5: tmp1 = __byte_perm (buf0[1], 0, 0x6541); - buf0[1] = __byte_perm (tmp0, buf0[1], 0x7604); + case 5: tmp1 = __byte_perm_S (buf0[1], 0, 0x6541); + buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7604); break; - case 6: tmp1 = __byte_perm (buf0[1], 0, 0x6542); - buf0[1] = __byte_perm (tmp0, buf0[1], 0x7054); + case 6: tmp1 = __byte_perm_S (buf0[1], 0, 0x6542); + buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x7054); break; - case 7: tmp1 = __byte_perm (buf0[1], 0, 0x6543); - buf0[1] = __byte_perm (tmp0, buf0[1], 0x0654); + case 7: tmp1 = __byte_perm_S (buf0[1], 0, 0x6543); + buf0[1] = __byte_perm_S (tmp0, buf0[1], 0x0654); break; - case 8: tmp1 = __byte_perm (buf0[2], 0, 0x6540); - buf0[2] = __byte_perm (tmp0, buf0[2], 0x7650); + case 8: tmp1 = __byte_perm_S (buf0[2], 0, 0x6540); + buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7650); break; - case 9: tmp1 = __byte_perm (buf0[2], 0, 0x6541); - buf0[2] = __byte_perm (tmp0, buf0[2], 0x7604); + case 9: tmp1 = __byte_perm_S (buf0[2], 0, 0x6541); + buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7604); break; - case 10: tmp1 = __byte_perm (buf0[2], 0, 0x6542); - buf0[2] = __byte_perm (tmp0, buf0[2], 0x7054); + case 10: tmp1 = __byte_perm_S (buf0[2], 0, 0x6542); + buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x7054); break; - case 11: tmp1 = __byte_perm (buf0[2], 0, 0x6543); - buf0[2] = __byte_perm (tmp0, buf0[2], 0x0654); + case 11: tmp1 = __byte_perm_S (buf0[2], 0, 0x6543); + buf0[2] = __byte_perm_S (tmp0, buf0[2], 0x0654); break; - case 12: tmp1 = __byte_perm (buf0[3], 0, 0x6540); - buf0[3] = __byte_perm (tmp0, buf0[3], 0x7650); + case 12: tmp1 = __byte_perm_S (buf0[3], 0, 0x6540); + buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7650); break; - case 13: tmp1 = __byte_perm (buf0[3], 0, 0x6541); - buf0[3] = __byte_perm (tmp0, buf0[3], 0x7604); + case 13: tmp1 = __byte_perm_S (buf0[3], 0, 0x6541); + buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7604); break; - case 14: tmp1 = __byte_perm (buf0[3], 0, 0x6542); - buf0[3] = __byte_perm (tmp0, buf0[3], 0x7054); + case 14: tmp1 = __byte_perm_S (buf0[3], 0, 0x6542); + buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x7054); break; - case 15: tmp1 = __byte_perm (buf0[3], 0, 0x6543); - buf0[3] = __byte_perm (tmp0, buf0[3], 0x0654); + case 15: tmp1 = __byte_perm_S (buf0[3], 0, 0x6543); + buf0[3] = __byte_perm_S (tmp0, buf0[3], 0x0654); break; - case 16: tmp1 = __byte_perm (buf1[0], 0, 0x6540); - buf1[0] = __byte_perm (tmp0, buf1[0], 0x7650); + case 16: tmp1 = __byte_perm_S (buf1[0], 0, 0x6540); + buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7650); break; - case 17: tmp1 = __byte_perm (buf1[0], 0, 0x6541); - buf1[0] = __byte_perm (tmp0, buf1[0], 0x7604); + case 17: tmp1 = __byte_perm_S (buf1[0], 0, 0x6541); + buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7604); break; - case 18: tmp1 = __byte_perm (buf1[0], 0, 0x6542); - buf1[0] = __byte_perm (tmp0, buf1[0], 0x7054); + case 18: tmp1 = __byte_perm_S (buf1[0], 0, 0x6542); + buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x7054); break; - case 19: tmp1 = __byte_perm (buf1[0], 0, 0x6543); - buf1[0] = __byte_perm (tmp0, buf1[0], 0x0654); + case 19: tmp1 = __byte_perm_S (buf1[0], 0, 0x6543); + buf1[0] = __byte_perm_S (tmp0, buf1[0], 0x0654); break; - case 20: tmp1 = __byte_perm (buf1[1], 0, 0x6540); - buf1[1] = __byte_perm (tmp0, buf1[1], 0x7650); + case 20: tmp1 = __byte_perm_S (buf1[1], 0, 0x6540); + buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7650); break; - case 21: tmp1 = __byte_perm (buf1[1], 0, 0x6541); - buf1[1] = __byte_perm (tmp0, buf1[1], 0x7604); + case 21: tmp1 = __byte_perm_S (buf1[1], 0, 0x6541); + buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7604); break; - case 22: tmp1 = __byte_perm (buf1[1], 0, 0x6542); - buf1[1] = __byte_perm (tmp0, buf1[1], 0x7054); + case 22: tmp1 = __byte_perm_S (buf1[1], 0, 0x6542); + buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x7054); break; - case 23: tmp1 = __byte_perm (buf1[1], 0, 0x6543); - buf1[1] = __byte_perm (tmp0, buf1[1], 0x0654); + case 23: tmp1 = __byte_perm_S (buf1[1], 0, 0x6543); + buf1[1] = __byte_perm_S (tmp0, buf1[1], 0x0654); break; - case 24: tmp1 = __byte_perm (buf1[2], 0, 0x6540); - buf1[2] = __byte_perm (tmp0, buf1[2], 0x7650); + case 24: tmp1 = __byte_perm_S (buf1[2], 0, 0x6540); + buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7650); break; - case 25: tmp1 = __byte_perm (buf1[2], 0, 0x6541); - buf1[2] = __byte_perm (tmp0, buf1[2], 0x7604); + case 25: tmp1 = __byte_perm_S (buf1[2], 0, 0x6541); + buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7604); break; - case 26: tmp1 = __byte_perm (buf1[2], 0, 0x6542); - buf1[2] = __byte_perm (tmp0, buf1[2], 0x7054); + case 26: tmp1 = __byte_perm_S (buf1[2], 0, 0x6542); + buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x7054); break; - case 27: tmp1 = __byte_perm (buf1[2], 0, 0x6543); - buf1[2] = __byte_perm (tmp0, buf1[2], 0x0654); + case 27: tmp1 = __byte_perm_S (buf1[2], 0, 0x6543); + buf1[2] = __byte_perm_S (tmp0, buf1[2], 0x0654); break; - case 28: tmp1 = __byte_perm (buf1[3], 0, 0x6540); - buf1[3] = __byte_perm (tmp0, buf1[3], 0x7650); + case 28: tmp1 = __byte_perm_S (buf1[3], 0, 0x6540); + buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7650); break; - case 29: tmp1 = __byte_perm (buf1[3], 0, 0x6541); - buf1[3] = __byte_perm (tmp0, buf1[3], 0x7604); + case 29: tmp1 = __byte_perm_S (buf1[3], 0, 0x6541); + buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7604); break; - case 30: tmp1 = __byte_perm (buf1[3], 0, 0x6542); - buf1[3] = __byte_perm (tmp0, buf1[3], 0x7054); + case 30: tmp1 = __byte_perm_S (buf1[3], 0, 0x6542); + buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x7054); break; - case 31: tmp1 = __byte_perm (buf1[3], 0, 0x6543); - buf1[3] = __byte_perm (tmp0, buf1[3], 0x0654); + case 31: tmp1 = __byte_perm_S (buf1[3], 0, 0x6543); + buf1[3] = __byte_perm_S (tmp0, buf1[3], 0x0654); break; } switch (p0) { - case 0: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7650); + case 0: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7650); break; - case 1: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7604); + case 1: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7604); break; - case 2: buf0[0] = __byte_perm (tmp1, buf0[0], 0x7054); + case 2: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x7054); break; - case 3: buf0[0] = __byte_perm (tmp1, buf0[0], 0x0654); + case 3: buf0[0] = __byte_perm_S (tmp1, buf0[0], 0x0654); break; - case 4: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7650); + case 4: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7650); break; - case 5: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7604); + case 5: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7604); break; - case 6: buf0[1] = __byte_perm (tmp1, buf0[1], 0x7054); + case 6: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x7054); break; - case 7: buf0[1] = __byte_perm (tmp1, buf0[1], 0x0654); + case 7: buf0[1] = __byte_perm_S (tmp1, buf0[1], 0x0654); break; - case 8: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7650); + case 8: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7650); break; - case 9: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7604); + case 9: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7604); break; - case 10: buf0[2] = __byte_perm (tmp1, buf0[2], 0x7054); + case 10: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x7054); break; - case 11: buf0[2] = __byte_perm (tmp1, buf0[2], 0x0654); + case 11: buf0[2] = __byte_perm_S (tmp1, buf0[2], 0x0654); break; - case 12: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7650); + case 12: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7650); break; - case 13: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7604); + case 13: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7604); break; - case 14: buf0[3] = __byte_perm (tmp1, buf0[3], 0x7054); + case 14: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x7054); break; - case 15: buf0[3] = __byte_perm (tmp1, buf0[3], 0x0654); + case 15: buf0[3] = __byte_perm_S (tmp1, buf0[3], 0x0654); break; - case 16: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7650); + case 16: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7650); break; - case 17: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7604); + case 17: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7604); break; - case 18: buf1[0] = __byte_perm (tmp1, buf1[0], 0x7054); + case 18: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x7054); break; - case 19: buf1[0] = __byte_perm (tmp1, buf1[0], 0x0654); + case 19: buf1[0] = __byte_perm_S (tmp1, buf1[0], 0x0654); break; - case 20: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7650); + case 20: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7650); break; - case 21: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7604); + case 21: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7604); break; - case 22: buf1[1] = __byte_perm (tmp1, buf1[1], 0x7054); + case 22: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x7054); break; - case 23: buf1[1] = __byte_perm (tmp1, buf1[1], 0x0654); + case 23: buf1[1] = __byte_perm_S (tmp1, buf1[1], 0x0654); break; - case 24: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7650); + case 24: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7650); break; - case 25: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7604); + case 25: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7604); break; - case 26: buf1[2] = __byte_perm (tmp1, buf1[2], 0x7054); + case 26: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x7054); break; - case 27: buf1[2] = __byte_perm (tmp1, buf1[2], 0x0654); + case 27: buf1[2] = __byte_perm_S (tmp1, buf1[2], 0x0654); break; - case 28: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7650); + case 28: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7650); break; - case 29: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7604); + case 29: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7604); break; - case 30: buf1[3] = __byte_perm (tmp1, buf1[3], 0x7054); + case 30: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x7054); break; - case 31: buf1[3] = __byte_perm (tmp1, buf1[3], 0x0654); + case 31: buf1[3] = __byte_perm_S (tmp1, buf1[3], 0x0654); break; } #endif @@ -3903,67 +3903,67 @@ static u32 rule_op_mangle_title (const u32 p0, const u32 p1, u32 buf0[4], u32 bu switch (i) { - case 0: tmp0 = __byte_perm (buf0[0], 0, 0x6540); + case 0: tmp0 = __byte_perm_S (buf0[0], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf0[0])); break; - case 1: tmp0 = __byte_perm (buf0[0], 0, 0x6541); + case 1: tmp0 = __byte_perm_S (buf0[0], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf0[0])); break; - case 2: tmp0 = __byte_perm (buf0[0], 0, 0x6542); + case 2: tmp0 = __byte_perm_S (buf0[0], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf0[0])); break; - case 3: tmp0 = __byte_perm (buf0[0], 0, 0x6543); + case 3: tmp0 = __byte_perm_S (buf0[0], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf0[1])); break; - case 4: tmp0 = __byte_perm (buf0[1], 0, 0x6540); + case 4: tmp0 = __byte_perm_S (buf0[1], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf0[1])); break; - case 5: tmp0 = __byte_perm (buf0[1], 0, 0x6541); + case 5: tmp0 = __byte_perm_S (buf0[1], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf0[1])); break; - case 6: tmp0 = __byte_perm (buf0[1], 0, 0x6542); + case 6: tmp0 = __byte_perm_S (buf0[1], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf0[1])); break; - case 7: tmp0 = __byte_perm (buf0[1], 0, 0x6543); + case 7: tmp0 = __byte_perm_S (buf0[1], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf0[2])); break; - case 8: tmp0 = __byte_perm (buf0[2], 0, 0x6540); + case 8: tmp0 = __byte_perm_S (buf0[2], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf0[2])); break; - case 9: tmp0 = __byte_perm (buf0[2], 0, 0x6541); + case 9: tmp0 = __byte_perm_S (buf0[2], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf0[2])); break; - case 10: tmp0 = __byte_perm (buf0[2], 0, 0x6542); + case 10: tmp0 = __byte_perm_S (buf0[2], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf0[2])); break; - case 11: tmp0 = __byte_perm (buf0[2], 0, 0x6543); + case 11: tmp0 = __byte_perm_S (buf0[2], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf0[3])); break; - case 12: tmp0 = __byte_perm (buf0[3], 0, 0x6540); + case 12: tmp0 = __byte_perm_S (buf0[3], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf0[3])); break; - case 13: tmp0 = __byte_perm (buf0[3], 0, 0x6541); + case 13: tmp0 = __byte_perm_S (buf0[3], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf0[3])); break; - case 14: tmp0 = __byte_perm (buf0[3], 0, 0x6542); + case 14: tmp0 = __byte_perm_S (buf0[3], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf0[3])); break; - case 15: tmp0 = __byte_perm (buf0[3], 0, 0x6543); + case 15: tmp0 = __byte_perm_S (buf0[3], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf1[0])); break; - case 16: tmp0 = __byte_perm (buf1[0], 0, 0x6540); + case 16: tmp0 = __byte_perm_S (buf1[0], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf1[0])); break; - case 17: tmp0 = __byte_perm (buf1[0], 0, 0x6541); + case 17: tmp0 = __byte_perm_S (buf1[0], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf1[0])); break; - case 18: tmp0 = __byte_perm (buf1[0], 0, 0x6542); + case 18: tmp0 = __byte_perm_S (buf1[0], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf1[0])); break; - case 19: tmp0 = __byte_perm (buf1[0], 0, 0x6543); + case 19: tmp0 = __byte_perm_S (buf1[0], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf1[1])); break; - case 20: tmp0 = __byte_perm (buf1[1], 0, 0x6540); + case 20: tmp0 = __byte_perm_S (buf1[1], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf1[1])); break; - case 21: tmp0 = __byte_perm (buf1[1], 0, 0x6541); + case 21: tmp0 = __byte_perm_S (buf1[1], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf1[1])); break; - case 22: tmp0 = __byte_perm (buf1[1], 0, 0x6542); + case 22: tmp0 = __byte_perm_S (buf1[1], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf1[1])); break; - case 23: tmp0 = __byte_perm (buf1[1], 0, 0x6543); + case 23: tmp0 = __byte_perm_S (buf1[1], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf1[2])); break; - case 24: tmp0 = __byte_perm (buf1[2], 0, 0x6540); + case 24: tmp0 = __byte_perm_S (buf1[2], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf1[2])); break; - case 25: tmp0 = __byte_perm (buf1[2], 0, 0x6541); + case 25: tmp0 = __byte_perm_S (buf1[2], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf1[2])); break; - case 26: tmp0 = __byte_perm (buf1[2], 0, 0x6542); + case 26: tmp0 = __byte_perm_S (buf1[2], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf1[2])); break; - case 27: tmp0 = __byte_perm (buf1[2], 0, 0x6543); + case 27: tmp0 = __byte_perm_S (buf1[2], 0, 0x6543); tmp1 = ~(0x00000020 & generate_cmask (buf1[3])); break; - case 28: tmp0 = __byte_perm (buf1[3], 0, 0x6540); + case 28: tmp0 = __byte_perm_S (buf1[3], 0, 0x6540); tmp1 = ~(0x00002000 & generate_cmask (buf1[3])); break; - case 29: tmp0 = __byte_perm (buf1[3], 0, 0x6541); + case 29: tmp0 = __byte_perm_S (buf1[3], 0, 0x6541); tmp1 = ~(0x00200000 & generate_cmask (buf1[3])); break; - case 30: tmp0 = __byte_perm (buf1[3], 0, 0x6542); + case 30: tmp0 = __byte_perm_S (buf1[3], 0, 0x6542); tmp1 = ~(0x20000000 & generate_cmask (buf1[3])); break; }