mirror of
https://github.com/hashcat/hashcat.git
synced 2025-01-10 07:40:58 +00:00
Rename some hashcat specific OpenCL functions to avoid conflicts with existing OpenCL functions from OpenCL runtime
This commit is contained in:
parent
02a2495349
commit
a43d3ad176
OpenCL
inc_common.clinc_hash_functions.clinc_rp_optimized.clinc_types.clinc_vendor.clm00500-optimized.clm01600-optimized.clm03200-pure.clm05800-optimized.clm05800-pure.clm06300-optimized.clm07400-optimized.clm09000-pure.clm10700-optimized.clm11600-pure.clm13800_a0-optimized.clm13800_a1-optimized.clm13800_a3-optimized.cl
56416
OpenCL/inc_common.cl
56416
OpenCL/inc_common.cl
File diff suppressed because it is too large
Load Diff
@ -34,20 +34,20 @@
|
||||
#define MD4_STEP_S(f,a,b,c,d,x,K,s) \
|
||||
{ \
|
||||
a += K; \
|
||||
a = __add3_S (a, x, f (b, c, d)); \
|
||||
a = hc_add3_S (a, x, f (b, c, d)); \
|
||||
a = rotl32_S (a, s); \
|
||||
}
|
||||
|
||||
#define MD4_STEP(f,a,b,c,d,x,K,s) \
|
||||
{ \
|
||||
a += K; \
|
||||
a = __add3 (a, x, f (b, c, d)); \
|
||||
a = hc_add3 (a, x, f (b, c, d)); \
|
||||
a = rotl32 (a, s); \
|
||||
}
|
||||
|
||||
#define MD4_STEP0(f,a,b,c,d,K,s) \
|
||||
{ \
|
||||
a = __add3 (a, K, f (b, c, d)); \
|
||||
a = hc_add3 (a, K, f (b, c, d)); \
|
||||
a = rotl32 (a, s); \
|
||||
}
|
||||
|
||||
@ -92,7 +92,7 @@
|
||||
#define MD5_STEP_S(f,a,b,c,d,x,K,s) \
|
||||
{ \
|
||||
a += K; \
|
||||
a = __add3_S (a, x, f (b, c, d)); \
|
||||
a = hc_add3_S (a, x, f (b, c, d)); \
|
||||
a = rotl32_S (a, s); \
|
||||
a += b; \
|
||||
}
|
||||
@ -100,14 +100,14 @@
|
||||
#define MD5_STEP(f,a,b,c,d,x,K,s) \
|
||||
{ \
|
||||
a += K; \
|
||||
a = __add3 (a, x, f (b, c, d)); \
|
||||
a = hc_add3 (a, x, f (b, c, d)); \
|
||||
a = rotl32 (a, s); \
|
||||
a += b; \
|
||||
}
|
||||
|
||||
#define MD5_STEP0(f,a,b,c,d,K,s) \
|
||||
{ \
|
||||
a = __add3 (a, K, f (b, c, d)); \
|
||||
a = hc_add3 (a, K, f (b, c, d)); \
|
||||
a = rotl32 (a, s); \
|
||||
a += b; \
|
||||
}
|
||||
@ -139,7 +139,7 @@
|
||||
#define SHA1_STEP_S(f,a,b,c,d,e,x) \
|
||||
{ \
|
||||
e += K; \
|
||||
e = __add3_S (e, x, f (b, c, d)); \
|
||||
e = hc_add3_S (e, x, f (b, c, d)); \
|
||||
e += rotl32_S (a, 5u); \
|
||||
b = rotl32_S (b, 30u); \
|
||||
}
|
||||
@ -147,7 +147,7 @@
|
||||
#define SHA1_STEP(f,a,b,c,d,e,x) \
|
||||
{ \
|
||||
e += K; \
|
||||
e = __add3 (e, x, f (b, c, d)); \
|
||||
e = hc_add3 (e, x, f (b, c, d)); \
|
||||
e += rotl32 (a, 5u); \
|
||||
b = rotl32 (b, 30u); \
|
||||
}
|
||||
@ -155,7 +155,7 @@
|
||||
/*
|
||||
#define SHA1_STEP0(f,a,b,c,d,e,x) \
|
||||
{ \
|
||||
e = __add3 (e, K, f (b, c, d)); \
|
||||
e = hc_add3 (e, K, f (b, c, d)); \
|
||||
e += rotl32 (a, 5u); \
|
||||
b = rotl32 (b, 30u); \
|
||||
}
|
||||
@ -163,7 +163,7 @@
|
||||
|
||||
#define SHA1_STEPX(f,a,b,c,d,e,x) \
|
||||
{ \
|
||||
e = __add3 (e, x, f (b, c, d)); \
|
||||
e = hc_add3 (e, x, f (b, c, d)); \
|
||||
e += rotl32 (a, 5u); \
|
||||
b = rotl32 (b, 30u); \
|
||||
}
|
||||
@ -203,20 +203,20 @@
|
||||
|
||||
#define SHA256_STEP_S(F0,F1,a,b,c,d,e,f,g,h,x,K) \
|
||||
{ \
|
||||
h = __add3_S (h, K, x); \
|
||||
h = __add3_S (h, SHA256_S3_S (e), F1 (e,f,g)); \
|
||||
h = hc_add3_S (h, K, x); \
|
||||
h = hc_add3_S (h, SHA256_S3_S (e), F1 (e,f,g)); \
|
||||
d += h; \
|
||||
h = __add3_S (h, SHA256_S2_S (a), F0 (a,b,c)); \
|
||||
h = hc_add3_S (h, SHA256_S2_S (a), F0 (a,b,c)); \
|
||||
}
|
||||
|
||||
#define SHA256_EXPAND_S(x,y,z,w) (SHA256_S1_S (x) + y + SHA256_S0_S (z) + w)
|
||||
|
||||
#define SHA256_STEP(F0,F1,a,b,c,d,e,f,g,h,x,K) \
|
||||
{ \
|
||||
h = __add3 (h, K, x); \
|
||||
h = __add3 (h, SHA256_S3 (e), F1 (e,f,g)); \
|
||||
h = hc_add3 (h, K, x); \
|
||||
h = hc_add3 (h, SHA256_S3 (e), F1 (e,f,g)); \
|
||||
d += h; \
|
||||
h = __add3 (h, SHA256_S2 (a), F0 (a,b,c)); \
|
||||
h = hc_add3 (h, SHA256_S2 (a), F0 (a,b,c)); \
|
||||
}
|
||||
|
||||
#define SHA256_EXPAND(x,y,z,w) (SHA256_S1 (x) + y + SHA256_S0 (z) + w)
|
||||
|
@ -136,26 +136,26 @@ void truncate_left (u32 *buf0, u32 *buf1, const u32 offset)
|
||||
|
||||
void lshift_block (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1)
|
||||
{
|
||||
out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[3] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
out0[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[3] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
}
|
||||
|
||||
void rshift_block (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1)
|
||||
{
|
||||
out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[0] = amd_bytealign_S (in0[0], 0, 3);
|
||||
out1[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[0] = hc_bytealign_S (in0[0], 0, 3);
|
||||
}
|
||||
|
||||
void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const u32 num)
|
||||
@ -171,32 +171,32 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = in1[2];
|
||||
out1[3] = in1[3];
|
||||
break;
|
||||
case 1: out0[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[3] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 1: out0[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[3] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
break;
|
||||
case 2: out0[0] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[1] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[2] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[3] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[0] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[1] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[2] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[3] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 2: out0[0] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[1] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[2] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[3] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[0] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[1] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[2] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[3] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
break;
|
||||
case 3: out0[0] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[1] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[2] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[3] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[0] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[1] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[2] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[3] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 3: out0[0] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[1] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[2] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[3] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[0] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[1] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[2] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[3] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
break;
|
||||
case 4: out0[0] = in0[1];
|
||||
out0[1] = in0[2];
|
||||
@ -207,31 +207,31 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = in1[3];
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 5: out0[0] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[1] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[2] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[3] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[0] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[1] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[2] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 5: out0[0] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[1] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[2] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[3] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[0] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[1] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[2] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 6: out0[0] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[1] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[2] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[3] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[0] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[1] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[2] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 6: out0[0] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[1] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[2] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[3] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[0] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[1] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[2] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 7: out0[0] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 7: out0[0] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 8: out0[0] = in0[2];
|
||||
@ -243,30 +243,30 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 9: out0[0] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[1] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[2] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[3] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[0] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[1] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 9: out0[0] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[1] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[2] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[3] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[0] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[1] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 10: out0[0] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[1] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[2] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[3] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[0] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[1] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 10: out0[0] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[1] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[2] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[3] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[0] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[1] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 11: out0[0] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[1] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[2] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[3] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[0] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[1] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 11: out0[0] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[1] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[2] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[3] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[0] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[1] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
@ -279,29 +279,29 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 13: out0[0] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[1] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[2] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[3] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[0] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 13: out0[0] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[1] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[2] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[3] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[0] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 14: out0[0] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[1] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[2] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[3] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[0] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 14: out0[0] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[1] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[2] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[3] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[0] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 15: out0[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[0] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 15: out0[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[0] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
@ -315,28 +315,28 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 17: out0[0] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[1] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[2] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[3] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 17: out0[0] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out0[1] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[2] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[3] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 18: out0[0] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[1] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[2] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[3] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 18: out0[0] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out0[1] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[2] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[3] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 19: out0[0] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[1] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[2] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[3] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 19: out0[0] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out0[1] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[2] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[3] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
@ -351,27 +351,27 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 21: out0[0] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[1] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[2] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 21: out0[0] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out0[1] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[2] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 22: out0[0] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[1] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[2] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 22: out0[0] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out0[1] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[2] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 23: out0[0] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[1] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[2] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 23: out0[0] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out0[1] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[2] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
out1[1] = 0;
|
||||
@ -387,8 +387,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 25: out0[0] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[1] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 25: out0[0] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out0[1] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
@ -396,8 +396,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 26: out0[0] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[1] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 26: out0[0] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out0[1] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
@ -405,8 +405,8 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 27: out0[0] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[1] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 27: out0[0] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out0[1] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
out1[0] = 0;
|
||||
@ -423,7 +423,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 29: out0[0] = amd_bytealign_S ( 0, in1[3], 1);
|
||||
case 29: out0[0] = hc_bytealign_S ( 0, in1[3], 1);
|
||||
out0[1] = 0;
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
@ -432,7 +432,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 30: out0[0] = amd_bytealign_S ( 0, in1[3], 2);
|
||||
case 30: out0[0] = hc_bytealign_S ( 0, in1[3], 2);
|
||||
out0[1] = 0;
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
@ -441,7 +441,7 @@ void lshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out1[2] = 0;
|
||||
out1[3] = 0;
|
||||
break;
|
||||
case 31: out0[0] = amd_bytealign_S ( 0, in1[3], 3);
|
||||
case 31: out0[0] = hc_bytealign_S ( 0, in1[3], 3);
|
||||
out0[1] = 0;
|
||||
out0[2] = 0;
|
||||
out0[3] = 0;
|
||||
@ -466,32 +466,32 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = in0[1];
|
||||
out0[0] = in0[0];
|
||||
break;
|
||||
case 1: out1[3] = amd_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[0] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 1: out1[3] = hc_bytealign_S (in1[3], in1[2], 3);
|
||||
out1[2] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[1] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[0] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out0[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[0] = hc_bytealign_S (in0[0], 0, 3);
|
||||
break;
|
||||
case 2: out1[3] = amd_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[2] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[1] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[0] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[3] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[2] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[1] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[0] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 2: out1[3] = hc_bytealign_S (in1[3], in1[2], 2);
|
||||
out1[2] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[1] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[0] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out0[3] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[2] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[1] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[0] = hc_bytealign_S (in0[0], 0, 2);
|
||||
break;
|
||||
case 3: out1[3] = amd_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[2] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[1] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[0] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[3] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[2] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[1] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[0] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 3: out1[3] = hc_bytealign_S (in1[3], in1[2], 1);
|
||||
out1[2] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[1] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[0] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out0[3] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[2] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[1] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[0] = hc_bytealign_S (in0[0], 0, 1);
|
||||
break;
|
||||
case 4: out1[3] = in1[2];
|
||||
out1[2] = in1[1];
|
||||
@ -502,31 +502,31 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = in0[0];
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 5: out1[3] = amd_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[2] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[1] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[0] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[3] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[2] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[1] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 5: out1[3] = hc_bytealign_S (in1[2], in1[1], 3);
|
||||
out1[2] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[1] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[0] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out0[3] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[2] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[1] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 6: out1[3] = amd_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[2] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[1] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[0] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[3] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[2] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[1] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 6: out1[3] = hc_bytealign_S (in1[2], in1[1], 2);
|
||||
out1[2] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[1] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[0] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out0[3] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[2] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[1] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 7: out1[3] = amd_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 7: out1[3] = hc_bytealign_S (in1[2], in1[1], 1);
|
||||
out1[2] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[1] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[0] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out0[3] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[2] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[1] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 8: out1[3] = in1[1];
|
||||
@ -538,30 +538,30 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 9: out1[3] = amd_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[2] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[1] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[0] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[3] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[2] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 9: out1[3] = hc_bytealign_S (in1[1], in1[0], 3);
|
||||
out1[2] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[1] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[0] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out0[3] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[2] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 10: out1[3] = amd_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[2] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[1] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[0] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[3] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[2] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 10: out1[3] = hc_bytealign_S (in1[1], in1[0], 2);
|
||||
out1[2] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[1] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[0] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out0[3] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[2] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 11: out1[3] = amd_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[2] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[1] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[0] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[3] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[2] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 11: out1[3] = hc_bytealign_S (in1[1], in1[0], 1);
|
||||
out1[2] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[1] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[0] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out0[3] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[2] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
@ -574,29 +574,29 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 13: out1[3] = amd_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[2] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[1] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[0] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[3] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 13: out1[3] = hc_bytealign_S (in1[0], in0[3], 3);
|
||||
out1[2] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[1] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[0] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out0[3] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 14: out1[3] = amd_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[2] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[1] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[0] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[3] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 14: out1[3] = hc_bytealign_S (in1[0], in0[3], 2);
|
||||
out1[2] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[1] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[0] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out0[3] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 15: out1[3] = amd_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[2] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[1] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[0] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[3] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 15: out1[3] = hc_bytealign_S (in1[0], in0[3], 1);
|
||||
out1[2] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[1] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[0] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out0[3] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
@ -610,28 +610,28 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 17: out1[3] = amd_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[2] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[1] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[0] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 17: out1[3] = hc_bytealign_S (in0[3], in0[2], 3);
|
||||
out1[2] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[1] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[0] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 18: out1[3] = amd_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[2] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[1] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[0] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 18: out1[3] = hc_bytealign_S (in0[3], in0[2], 2);
|
||||
out1[2] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[1] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[0] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 19: out1[3] = amd_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[2] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[1] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[0] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 19: out1[3] = hc_bytealign_S (in0[3], in0[2], 1);
|
||||
out1[2] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[1] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[0] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
@ -646,27 +646,27 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 21: out1[3] = amd_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[2] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[1] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 21: out1[3] = hc_bytealign_S (in0[2], in0[1], 3);
|
||||
out1[2] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[1] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 22: out1[3] = amd_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[2] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[1] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 22: out1[3] = hc_bytealign_S (in0[2], in0[1], 2);
|
||||
out1[2] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[1] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 23: out1[3] = amd_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[2] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[1] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 23: out1[3] = hc_bytealign_S (in0[2], in0[1], 1);
|
||||
out1[2] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[1] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
out0[2] = 0;
|
||||
@ -682,8 +682,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 25: out1[3] = amd_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[2] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 25: out1[3] = hc_bytealign_S (in0[1], in0[0], 3);
|
||||
out1[2] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
@ -691,8 +691,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 26: out1[3] = amd_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[2] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 26: out1[3] = hc_bytealign_S (in0[1], in0[0], 2);
|
||||
out1[2] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
@ -700,8 +700,8 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 27: out1[3] = amd_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[2] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 27: out1[3] = hc_bytealign_S (in0[1], in0[0], 1);
|
||||
out1[2] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
out0[3] = 0;
|
||||
@ -718,7 +718,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 29: out1[3] = amd_bytealign_S (in0[0], 0, 3);
|
||||
case 29: out1[3] = hc_bytealign_S (in0[0], 0, 3);
|
||||
out1[2] = 0;
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
@ -727,7 +727,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 30: out1[3] = amd_bytealign_S (in0[0], 0, 2);
|
||||
case 30: out1[3] = hc_bytealign_S (in0[0], 0, 2);
|
||||
out1[2] = 0;
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
@ -736,7 +736,7 @@ void rshift_block_N (const u32 *in0, const u32 *in1, u32 *out0, u32 *out1, const
|
||||
out0[1] = 0;
|
||||
out0[0] = 0;
|
||||
break;
|
||||
case 31: out1[3] = amd_bytealign_S (in0[0], 0, 1);
|
||||
case 31: out1[3] = hc_bytealign_S (in0[0], 0, 1);
|
||||
out1[2] = 0;
|
||||
out1[1] = 0;
|
||||
out1[0] = 0;
|
||||
@ -803,44 +803,44 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
s7 = amd_bytealign_S (src_r12, src_r13, offset);
|
||||
s6 = amd_bytealign_S (src_r11, src_r12, offset);
|
||||
s5 = amd_bytealign_S (src_r10, src_r11, offset);
|
||||
s4 = amd_bytealign_S (src_r03, src_r10, offset);
|
||||
s3 = amd_bytealign_S (src_r02, src_r03, offset);
|
||||
s2 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s1 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s0 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r12, src_r13, offset);
|
||||
s6 = hc_bytealign_S (src_r11, src_r12, offset);
|
||||
s5 = hc_bytealign_S (src_r10, src_r11, offset);
|
||||
s4 = hc_bytealign_S (src_r03, src_r10, offset);
|
||||
s3 = hc_bytealign_S (src_r02, src_r03, offset);
|
||||
s2 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s1 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s0 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
s7 = amd_bytealign_S (src_r11, src_r12, offset);
|
||||
s6 = amd_bytealign_S (src_r10, src_r11, offset);
|
||||
s5 = amd_bytealign_S (src_r03, src_r10, offset);
|
||||
s4 = amd_bytealign_S (src_r02, src_r03, offset);
|
||||
s3 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s2 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s1 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r11, src_r12, offset);
|
||||
s6 = hc_bytealign_S (src_r10, src_r11, offset);
|
||||
s5 = hc_bytealign_S (src_r03, src_r10, offset);
|
||||
s4 = hc_bytealign_S (src_r02, src_r03, offset);
|
||||
s3 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s2 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s1 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
s7 = amd_bytealign_S (src_r10, src_r11, offset);
|
||||
s6 = amd_bytealign_S (src_r03, src_r10, offset);
|
||||
s5 = amd_bytealign_S (src_r02, src_r03, offset);
|
||||
s4 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s3 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s2 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r10, src_r11, offset);
|
||||
s6 = hc_bytealign_S (src_r03, src_r10, offset);
|
||||
s5 = hc_bytealign_S (src_r02, src_r03, offset);
|
||||
s4 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s3 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s2 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
s7 = amd_bytealign_S (src_r03, src_r10, offset);
|
||||
s6 = amd_bytealign_S (src_r02, src_r03, offset);
|
||||
s5 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s4 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s3 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r03, src_r10, offset);
|
||||
s6 = hc_bytealign_S (src_r02, src_r03, offset);
|
||||
s5 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s4 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s3 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
@ -848,10 +848,10 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 4:
|
||||
s7 = amd_bytealign_S (src_r02, src_r03, offset);
|
||||
s6 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s5 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s4 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r02, src_r03, offset);
|
||||
s6 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s5 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s4 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
@ -859,9 +859,9 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 5:
|
||||
s7 = amd_bytealign_S (src_r01, src_r02, offset);
|
||||
s6 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s5 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r01, src_r02, offset);
|
||||
s6 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s5 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
@ -870,8 +870,8 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 6:
|
||||
s7 = amd_bytealign_S (src_r00, src_r01, offset);
|
||||
s6 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S (src_r00, src_r01, offset);
|
||||
s6 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
@ -881,7 +881,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 7:
|
||||
s7 = amd_bytealign_S ( 0, src_r00, offset);
|
||||
s7 = hc_bytealign_S ( 0, src_r00, offset);
|
||||
s6 = 0;
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
@ -928,44 +928,44 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
switch (offset_switch)
|
||||
{
|
||||
case 0:
|
||||
s7 = __byte_perm_S (src_r12, src_r13, selector);
|
||||
s6 = __byte_perm_S (src_r11, src_r12, selector);
|
||||
s5 = __byte_perm_S (src_r10, src_r11, selector);
|
||||
s4 = __byte_perm_S (src_r03, src_r10, selector);
|
||||
s3 = __byte_perm_S (src_r02, src_r03, selector);
|
||||
s2 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s1 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s0 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r12, src_r13, selector);
|
||||
s6 = hc_byte_perm_S (src_r11, src_r12, selector);
|
||||
s5 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s4 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s3 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s2 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s1 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s0 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
s7 = __byte_perm_S (src_r11, src_r12, selector);
|
||||
s6 = __byte_perm_S (src_r10, src_r11, selector);
|
||||
s5 = __byte_perm_S (src_r03, src_r10, selector);
|
||||
s4 = __byte_perm_S (src_r02, src_r03, selector);
|
||||
s3 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s2 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s1 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r11, src_r12, selector);
|
||||
s6 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s5 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s4 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s3 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s2 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s1 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
s7 = __byte_perm_S (src_r10, src_r11, selector);
|
||||
s6 = __byte_perm_S (src_r03, src_r10, selector);
|
||||
s5 = __byte_perm_S (src_r02, src_r03, selector);
|
||||
s4 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s3 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s2 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r10, src_r11, selector);
|
||||
s6 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s5 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s4 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s3 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s2 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
s7 = __byte_perm_S (src_r03, src_r10, selector);
|
||||
s6 = __byte_perm_S (src_r02, src_r03, selector);
|
||||
s5 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s4 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s3 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r03, src_r10, selector);
|
||||
s6 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s5 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s4 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s3 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
s0 = 0;
|
||||
@ -973,10 +973,10 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 4:
|
||||
s7 = __byte_perm_S (src_r02, src_r03, selector);
|
||||
s6 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s5 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s4 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r02, src_r03, selector);
|
||||
s6 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s5 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s4 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
s1 = 0;
|
||||
@ -984,9 +984,9 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 5:
|
||||
s7 = __byte_perm_S (src_r01, src_r02, selector);
|
||||
s6 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s5 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r01, src_r02, selector);
|
||||
s6 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s5 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
s2 = 0;
|
||||
@ -995,8 +995,8 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 6:
|
||||
s7 = __byte_perm_S (src_r00, src_r01, selector);
|
||||
s6 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S (src_r00, src_r01, selector);
|
||||
s6 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
s3 = 0;
|
||||
@ -1006,7 +1006,7 @@ void append_block8 (const u32 offset, u32 *buf0, u32 *buf1, const u32 *src_l0, c
|
||||
break;
|
||||
|
||||
case 7:
|
||||
s7 = __byte_perm_S ( 0, src_r00, selector);
|
||||
s7 = hc_byte_perm_S ( 0, src_r00, selector);
|
||||
s6 = 0;
|
||||
s5 = 0;
|
||||
s4 = 0;
|
||||
@ -1681,10 +1681,10 @@ u32 search_on_register (const u32 in, const u32 p0)
|
||||
{
|
||||
u32 r = 0;
|
||||
|
||||
if (__bfe_S (in, 0, 8) == p0) r |= 1;
|
||||
if (__bfe_S (in, 8, 8) == p0) r |= 2;
|
||||
if (__bfe_S (in, 16, 8) == p0) r |= 4;
|
||||
if (__bfe_S (in, 24, 8) == p0) r |= 8;
|
||||
if (hc_bfe_S (in, 0, 8) == p0) r |= 1;
|
||||
if (hc_bfe_S (in, 8, 8) == p0) r |= 2;
|
||||
if (hc_bfe_S (in, 16, 8) == p0) r |= 4;
|
||||
if (hc_bfe_S (in, 24, 8) == p0) r |= 8;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -351,23 +351,28 @@ DECLSPEC u64x rotl64 (const u64x a, const u32 n)
|
||||
return rotr64 (a, 64 - n);
|
||||
}
|
||||
|
||||
DECLSPEC u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
return amd_bfe (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return amd_bfe (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bytealign (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
return amd_bytealign (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return amd_bytealign (a, b, c);
|
||||
}
|
||||
|
||||
#if AMD_GCN >= 3
|
||||
DECLSPEC u32x __byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
@ -420,7 +425,7 @@ DECLSPEC u32x __byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
@ -431,7 +436,7 @@ DECLSPEC u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
#endif
|
||||
|
||||
#if AMD_GCN >= 5
|
||||
DECLSPEC u32x __add3 (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
@ -484,7 +489,7 @@ DECLSPEC u32x __add3 (const u32x a, const u32x b, const u32x c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __add3_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
@ -493,12 +498,12 @@ DECLSPEC u32 __add3_S (const u32 a, const u32 b, const u32 c)
|
||||
return r;
|
||||
}
|
||||
#else
|
||||
DECLSPEC u32x __add3 (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __add3_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
@ -741,7 +746,7 @@ DECLSPEC u64x rotl64 (const u64x a, const u32 n)
|
||||
return rotate (a, (u64x) n);
|
||||
}
|
||||
|
||||
DECLSPEC u32x __byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
@ -780,7 +785,7 @@ DECLSPEC u32x __byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
@ -789,7 +794,7 @@ DECLSPEC u32 __byte_perm_S (const u32 a, const u32 b, const u32 c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
@ -828,7 +833,7 @@ DECLSPEC u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
@ -837,7 +842,7 @@ DECLSPEC u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
@ -877,14 +882,14 @@ DECLSPEC u32x amd_bytealign (const u32x a, const u32x b, const u32x c)
|
||||
|
||||
#else
|
||||
|
||||
r = __byte_perm (b, a, ((u32x) (0x76543210) >> ((c & 3) * 4)) & 0xffff);
|
||||
r = hc_byte_perm (b, a, ((u32x) (0x76543210) >> ((c & 3) * 4)) & 0xffff);
|
||||
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
@ -894,19 +899,19 @@ DECLSPEC u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
|
||||
#else
|
||||
|
||||
r = __byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
|
||||
r = hc_byte_perm_S (b, a, (0x76543210 >> ((c & 3) * 4)) & 0xffff);
|
||||
|
||||
#endif
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32x __add3 (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __add3_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
@ -984,7 +989,7 @@ DECLSPEC u64x rotl64 (const u64x a, const u32 n)
|
||||
return rotate (a, (u64x) n);
|
||||
}
|
||||
|
||||
DECLSPEC u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_bfe (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
#define BIT(x) ((u32x) (1u) << (x))
|
||||
#define BIT_MASK(x) (BIT (x) - 1)
|
||||
@ -997,7 +1002,7 @@ DECLSPEC u32x __bfe (const u32x a, const u32x b, const u32x c)
|
||||
#undef BFE
|
||||
}
|
||||
|
||||
DECLSPEC u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
#define BIT(x) (1u << (x))
|
||||
#define BIT_MASK(x) (BIT (x) - 1)
|
||||
@ -1010,7 +1015,7 @@ DECLSPEC u32 __bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
#undef BFE
|
||||
}
|
||||
|
||||
DECLSPEC u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32 c)
|
||||
{
|
||||
#if VECT_SIZE == 1
|
||||
const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
|
||||
@ -1043,19 +1048,19 @@ DECLSPEC u32x amd_bytealign (const u32x a, const u32x b, const u32 c)
|
||||
#endif
|
||||
}
|
||||
|
||||
DECLSPEC u32 amd_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
|
||||
|
||||
return (u32) (tmp);
|
||||
}
|
||||
|
||||
DECLSPEC u32x __add3 (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
||||
DECLSPEC u32 __add3_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_add3_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return a + b + c;
|
||||
}
|
||||
|
@ -101,9 +101,13 @@
|
||||
*/
|
||||
|
||||
#ifdef IS_AMD
|
||||
#if defined(cl_amd_media_ops)
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
|
||||
#endif
|
||||
#if defined(cl_amd_media_ops2)
|
||||
#pragma OPENCL EXTENSION cl_amd_media_ops2 : enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Unrolling is generally enabled, for all device types and hash modes
|
||||
|
@ -35,11 +35,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -56,11 +56,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -149,11 +149,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -171,11 +171,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, in4, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -259,9 +259,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -274,9 +274,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
@ -34,11 +34,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -55,11 +55,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -148,11 +148,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -170,11 +170,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, in4, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -258,9 +258,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -273,9 +273,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
@ -307,10 +307,10 @@ __constant u32a c_sbox3[256] =
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[__bfe ((L), 24, 8)]; \
|
||||
tmp += S1[__bfe ((L), 16, 8)]; \
|
||||
tmp ^= S2[__bfe ((L), 8, 8)]; \
|
||||
tmp += S3[__bfe ((L), 0, 8)]; \
|
||||
tmp = S0[hc_bfe ((L), 24, 8)]; \
|
||||
tmp += S1[hc_bfe ((L), 16, 8)]; \
|
||||
tmp ^= S2[hc_bfe ((L), 8, 8)]; \
|
||||
tmp += S3[hc_bfe ((L), 0, 8)]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
|
@ -2123,12 +2123,12 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = swap32_S (append[4]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp5 = amd_bytealign (in4, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
tmp5 = hc_bytealign (in4, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -2147,12 +2147,12 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, in4, selector);
|
||||
tmp5 = __byte_perm (in4, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
tmp5 = hc_byte_perm (in4, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
@ -2123,12 +2123,12 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = swap32_S (append[4]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp5 = amd_bytealign (in4, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
tmp5 = hc_bytealign (in4, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -2147,12 +2147,12 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp3 = __byte_perm (in3, in4, selector);
|
||||
tmp4 = __byte_perm (in4, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp3 = hc_byte_perm (in3, in4, selector);
|
||||
tmp4 = hc_byte_perm (in4, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
@ -31,11 +31,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -52,11 +52,11 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -145,11 +145,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -167,11 +167,11 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, in4, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
@ -255,9 +255,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -270,9 +270,9 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, 0, selector);
|
||||
#endif
|
||||
|
||||
const u32 div = offset / 4;
|
||||
|
@ -102,11 +102,11 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -123,11 +123,11 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
@ -243,11 +243,11 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -264,11 +264,11 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
u32 carry[4] = { 0, 0, 0, 0 };
|
||||
@ -410,11 +410,11 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, 0, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -431,11 +431,11 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, 0, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, 0, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
@ -560,11 +560,11 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
|
||||
tmp0 = amd_bytealign ( 0, in0, offset);
|
||||
tmp1 = amd_bytealign (in0, in1, offset);
|
||||
tmp2 = amd_bytealign (in1, in2, offset);
|
||||
tmp3 = amd_bytealign (in2, in3, offset);
|
||||
tmp4 = amd_bytealign (in3, in4, offset);
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
@ -582,11 +582,11 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = __byte_perm ( 0, in0, selector);
|
||||
tmp1 = __byte_perm (in0, in1, selector);
|
||||
tmp2 = __byte_perm (in1, in2, selector);
|
||||
tmp3 = __byte_perm (in2, in3, selector);
|
||||
tmp4 = __byte_perm (in3, in4, selector);
|
||||
tmp0 = hc_byte_perm ( 0, in0, selector);
|
||||
tmp1 = hc_byte_perm (in0, in1, selector);
|
||||
tmp2 = hc_byte_perm (in1, in2, selector);
|
||||
tmp3 = hc_byte_perm (in2, in3, selector);
|
||||
tmp4 = hc_byte_perm (in3, in4, selector);
|
||||
#endif
|
||||
|
||||
switch (offset / 4)
|
||||
|
@ -316,10 +316,10 @@ __constant u32a c_pbox[18] =
|
||||
{ \
|
||||
u32 tmp; \
|
||||
\
|
||||
tmp = S0[__bfe_S ((L), 24, 8)]; \
|
||||
tmp += S1[__bfe_S ((L), 16, 8)]; \
|
||||
tmp ^= S2[__bfe_S ((L), 8, 8)]; \
|
||||
tmp += S3[__bfe_S ((L), 0, 8)]; \
|
||||
tmp = S0[hc_bfe_S ((L), 24, 8)]; \
|
||||
tmp += S1[hc_bfe_S ((L), 16, 8)]; \
|
||||
tmp ^= S2[hc_bfe_S ((L), 8, 8)]; \
|
||||
tmp += S3[hc_bfe_S ((L), 0, 8)]; \
|
||||
\
|
||||
(R) ^= tmp ^ P[(N)]; \
|
||||
}
|
||||
|
@ -200,11 +200,11 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| amd_bytealign (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = amd_bytealign (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = amd_bytealign (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = amd_bytealign (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = amd_bytealign ( 0, sc[i - 1], pm4);
|
||||
| hc_bytealign (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign ( 0, sc[i - 1], pm4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -212,11 +212,11 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
||||
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| __byte_perm ( 0, bl[0], selector);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = __byte_perm (bl[i - 1], bl[i], selector);
|
||||
sc[idx++] = __byte_perm (bl[i - 1], sc[0], selector);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = __byte_perm (sc[i - 1], sc[i], selector);
|
||||
sc[idx++] = __byte_perm (sc[i - 1], 0, selector);
|
||||
| hc_byte_perm ( 0, bl[0], selector);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_byte_perm (bl[i - 1], bl[i], selector);
|
||||
sc[idx++] = hc_byte_perm (bl[i - 1], sc[0], selector);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_byte_perm (sc[i - 1], sc[i], selector);
|
||||
sc[idx++] = hc_byte_perm (sc[i - 1], 0, selector);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -229,19 +229,19 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
|
||||
const u32 od = m / 4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
pt[0] = amd_bytealign (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = amd_bytealign (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = amd_bytealign (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = amd_bytealign (sc[od + 4], sc[od + 3], om);
|
||||
pt[0] = hc_bytealign (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = hc_bytealign (sc[od + 4], sc[od + 3], om);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
int selector = (0x76543210 >> (om * 4)) & 0xffff;
|
||||
|
||||
pt[0] = __byte_perm (sc[od + 0], sc[od + 1], selector);
|
||||
pt[1] = __byte_perm (sc[od + 1], sc[od + 2], selector);
|
||||
pt[2] = __byte_perm (sc[od + 2], sc[od + 3], selector);
|
||||
pt[3] = __byte_perm (sc[od + 3], sc[od + 4], selector);
|
||||
pt[0] = hc_byte_perm (sc[od + 0], sc[od + 1], selector);
|
||||
pt[1] = hc_byte_perm (sc[od + 1], sc[od + 2], selector);
|
||||
pt[2] = hc_byte_perm (sc[od + 2], sc[od + 3], selector);
|
||||
pt[3] = hc_byte_perm (sc[od + 3], sc[od + 4], selector);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -23,13 +23,13 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
#ifdef IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
|
||||
tmp0 = __byte_perm (append, 0, selector);
|
||||
tmp1 = __byte_perm (0, append, selector);
|
||||
tmp0 = hc_byte_perm (append, 0, selector);
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp0 = amd_bytealign (0, append, func_len);
|
||||
tmp1 = amd_bytealign (append, 0, func_len);
|
||||
tmp0 = hc_bytealign (0, append, func_len);
|
||||
tmp1 = hc_bytealign (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
u32 carry = 0;
|
||||
|
@ -45,45 +45,45 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = amd_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = amd_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = amd_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = amd_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = amd_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = amd_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = amd_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = amd_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
|
||||
tmp00 = __byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = __byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = __byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = __byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = __byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = __byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = __byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = __byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = __byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = __byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = __byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = __byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = __byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = __byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = __byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = __byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = __byte_perm ( 0, carry[15], selector);
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
|
@ -43,45 +43,45 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = amd_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = amd_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = amd_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = amd_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = amd_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = amd_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = amd_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = amd_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
|
||||
tmp00 = __byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = __byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = __byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = __byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = __byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = __byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = __byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = __byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = __byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = __byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = __byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = __byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = __byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = __byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = __byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = __byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = __byte_perm ( 0, carry[15], selector);
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
|
@ -42,45 +42,45 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = amd_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = amd_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = amd_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = amd_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = amd_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = amd_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = amd_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = amd_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = amd_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = amd_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = amd_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = amd_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = amd_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = amd_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = amd_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = amd_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = amd_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
const int selector = (0x76543210 >> ((offset & 3) * 4)) & 0xffff;
|
||||
|
||||
tmp00 = __byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = __byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = __byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = __byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = __byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = __byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = __byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = __byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = __byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = __byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = __byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = __byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = __byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = __byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = __byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = __byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = __byte_perm ( 0, carry[15], selector);
|
||||
tmp00 = hc_byte_perm (carry[ 0], 0, selector);
|
||||
tmp01 = hc_byte_perm (carry[ 1], carry[ 0], selector);
|
||||
tmp02 = hc_byte_perm (carry[ 2], carry[ 1], selector);
|
||||
tmp03 = hc_byte_perm (carry[ 3], carry[ 2], selector);
|
||||
tmp04 = hc_byte_perm (carry[ 4], carry[ 3], selector);
|
||||
tmp05 = hc_byte_perm (carry[ 5], carry[ 4], selector);
|
||||
tmp06 = hc_byte_perm (carry[ 6], carry[ 5], selector);
|
||||
tmp07 = hc_byte_perm (carry[ 7], carry[ 6], selector);
|
||||
tmp08 = hc_byte_perm (carry[ 8], carry[ 7], selector);
|
||||
tmp09 = hc_byte_perm (carry[ 9], carry[ 8], selector);
|
||||
tmp10 = hc_byte_perm (carry[10], carry[ 9], selector);
|
||||
tmp11 = hc_byte_perm (carry[11], carry[10], selector);
|
||||
tmp12 = hc_byte_perm (carry[12], carry[11], selector);
|
||||
tmp13 = hc_byte_perm (carry[13], carry[12], selector);
|
||||
tmp14 = hc_byte_perm (carry[14], carry[13], selector);
|
||||
tmp15 = hc_byte_perm (carry[15], carry[14], selector);
|
||||
tmp16 = hc_byte_perm ( 0, carry[15], selector);
|
||||
#endif
|
||||
|
||||
carry[ 0] = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user