mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-23 00:28:11 +00:00
Optimize some switch_buffer_* functions for generic OpenCL devices (CPU, various OSX, ...)
This commit is contained in:
parent
23917455ef
commit
e1fe3e755b
16608
OpenCL/inc_common.cl
16608
OpenCL/inc_common.cl
File diff suppressed because it is too large
Load Diff
@ -361,16 +361,46 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
return amd_bfe (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32x c)
|
||||
DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const u32 c)
|
||||
{
|
||||
return amd_bytealign (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
return amd_bytealign (a, b, c);
|
||||
}
|
||||
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32 c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a >> 24) | (b << 8); break;
|
||||
case 2: r = (a >> 16) | (b << 16); break;
|
||||
case 3: r = (a >> 8) | (b << 24); break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a >> 24) | (b << 8); break;
|
||||
case 2: r = (a >> 16) | (b << 16); break;
|
||||
case 3: r = (a >> 8) | (b << 24); break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
#if HAS_VPERM
|
||||
DECLSPEC u32x hc_byte_perm (const u32x a, const u32x b, const u32x c)
|
||||
{
|
||||
@ -1089,44 +1119,64 @@ DECLSPEC u32 hc_bfe_S (const u32 a, const u32 b, const u32 c)
|
||||
#undef BFE
|
||||
}
|
||||
|
||||
DECLSPEC u32x hc_bytealign_be (const u32x a, const u32x b, const u32 c)
|
||||
{
|
||||
u32x r;
|
||||
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a << 24) | (b >> 8); break;
|
||||
case 2: r = (a << 16) | (b >> 16); break;
|
||||
case 3: r = (a << 8) | (b >> 24); break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_be_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
u32 r;
|
||||
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a << 24) | (b >> 8); break;
|
||||
case 2: r = (a << 16) | (b >> 16); break;
|
||||
case 3: r = (a << 8) | (b >> 24); break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32x hc_bytealign (const u32x a, const u32x b, const u32 c)
|
||||
{
|
||||
#if VECT_SIZE == 1
|
||||
const u64x tmp = ((((u64x) (a)) << 32) | ((u64x) (b))) >> ((c & 3) * 8);
|
||||
u32x r;
|
||||
|
||||
return (u32x) (tmp);
|
||||
#endif
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a >> 24) | (b << 8); break;
|
||||
case 2: r = (a >> 16) | (b << 16); break;
|
||||
case 3: r = (a >> 8) | (b << 24); break;
|
||||
}
|
||||
|
||||
#if VECT_SIZE == 2
|
||||
const u64x tmp = ((((u64x) (a.s0, a.s1)) << 32) | ((u64x) (b.s0, b.s1))) >> ((c & 3) * 8);
|
||||
|
||||
return (u32x) (tmp.s0, tmp.s1);
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 4
|
||||
const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3))) >> ((c & 3) * 8);
|
||||
|
||||
return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3);
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 8
|
||||
const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7))) >> ((c & 3) * 8);
|
||||
|
||||
return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7);
|
||||
#endif
|
||||
|
||||
#if VECT_SIZE == 16
|
||||
const u64x tmp = ((((u64x) (a.s0, a.s1, a.s2, a.s3, a.s4, a.s5, a.s6, a.s7, a.s8, a.s9, a.sa, a.sb, a.sc, a.sd, a.se, a.sf)) << 32) | ((u64x) (b.s0, b.s1, b.s2, b.s3, b.s4, b.s5, b.s6, b.s7, b.s8, b.s9, b.sa, b.sb, b.sc, b.sd, b.se, b.sf))) >> ((c & 3) * 8);
|
||||
|
||||
return (u32x) (tmp.s0, tmp.s1, tmp.s2, tmp.s3, tmp.s4, tmp.s5, tmp.s6, tmp.s7, tmp.s8, tmp.s9, tmp.sa, tmp.sb, tmp.sc, tmp.sd, tmp.se, tmp.sf);
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32 hc_bytealign_S (const u32 a, const u32 b, const u32 c)
|
||||
{
|
||||
const u64 tmp = ((((u64) a) << 32) | ((u64) b)) >> ((c & 3) * 8);
|
||||
u32 r;
|
||||
|
||||
return (u32) (tmp);
|
||||
switch (c & 3)
|
||||
{
|
||||
case 0: r = b; break;
|
||||
case 1: r = (a >> 24) | (b << 8); break;
|
||||
case 2: r = (a >> 16) | (b << 16); break;
|
||||
case 3: r = (a >> 8) | (b << 24); break;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
DECLSPEC u32x hc_add3 (const u32x a, const u32x b, const u32x c)
|
||||
|
@ -30,22 +30,16 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -143,23 +137,17 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -256,16 +244,12 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -29,22 +29,16 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -142,23 +136,17 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -255,16 +243,12 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -2117,11 +2117,11 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = swap32_S (append[4]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = append[4];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
@ -2129,13 +2129,6 @@ DECLSPEC void append_salt (u32 *w0, u32 *w1, u32 *w2, const u32 *append, const u
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
tmp5 = hc_bytealign (in4, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
tmp5 = swap32_S (tmp5);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -26,22 +26,16 @@ DECLSPEC void memcat16 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, cons
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -139,23 +133,17 @@ DECLSPEC void memcat16_x80 (u32 *block0, u32 *block1, u32 *block2, u32 *block3,
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -252,16 +240,12 @@ DECLSPEC void memcat8 (u32 *block0, u32 *block1, u32 *block2, u32 *block3, const
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -97,22 +97,16 @@ DECLSPEC u32 memcat16 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -238,22 +232,16 @@ DECLSPEC u32 memcat16c (u32 *block, const u32 offset, const u32 *append, const u
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -405,22 +393,16 @@ DECLSPEC u32 memcat20 (u32 *block, const u32 offset, const u32 *append, const u3
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, 0, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -554,23 +536,17 @@ DECLSPEC u32 memcat20_x80 (u32 *block, const u32 offset, const u32 *append, cons
|
||||
const int offset_minus_4 = 4 - offset_mod_4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
u32 in0 = swap32_S (append[0]);
|
||||
u32 in1 = swap32_S (append[1]);
|
||||
u32 in2 = swap32_S (append[2]);
|
||||
u32 in3 = swap32_S (append[3]);
|
||||
u32 in4 = 0x80000000;
|
||||
u32 in0 = append[0];
|
||||
u32 in1 = append[1];
|
||||
u32 in2 = append[2];
|
||||
u32 in3 = append[3];
|
||||
u32 in4 = 0x80;
|
||||
|
||||
tmp0 = hc_bytealign ( 0, in0, offset);
|
||||
tmp1 = hc_bytealign (in0, in1, offset);
|
||||
tmp2 = hc_bytealign (in1, in2, offset);
|
||||
tmp3 = hc_bytealign (in2, in3, offset);
|
||||
tmp4 = hc_bytealign (in3, in4, offset);
|
||||
|
||||
tmp0 = swap32_S (tmp0);
|
||||
tmp1 = swap32_S (tmp1);
|
||||
tmp2 = swap32_S (tmp2);
|
||||
tmp3 = swap32_S (tmp3);
|
||||
tmp4 = swap32_S (tmp4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -200,11 +200,11 @@ DECLSPEC void make_sc (u32 *sc, const u32 *pw, const u32 pw_len, const u32 *bl,
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
for (i = 0; i < pd; i++) sc[idx++] = pw[i];
|
||||
sc[idx++] = pw[i]
|
||||
| hc_bytealign (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign ( 0, sc[i - 1], pm4);
|
||||
| hc_bytealign_be (bl[0], 0, pm4);
|
||||
for (i = 1; i < bd; i++) sc[idx++] = hc_bytealign_be (bl[i], bl[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be (sc[0], bl[i - 1], pm4);
|
||||
for (i = 1; i < 4; i++) sc[idx++] = hc_bytealign_be (sc[i], sc[i - 1], pm4);
|
||||
sc[idx++] = hc_bytealign_be ( 0, sc[i - 1], pm4);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
@ -229,10 +229,10 @@ DECLSPEC void make_pt_with_offset (u32 *pt, const u32 offset, const u32 *sc, con
|
||||
const u32 od = m / 4;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
pt[0] = hc_bytealign (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = hc_bytealign (sc[od + 4], sc[od + 3], om);
|
||||
pt[0] = hc_bytealign_be (sc[od + 1], sc[od + 0], om);
|
||||
pt[1] = hc_bytealign_be (sc[od + 2], sc[od + 1], om);
|
||||
pt[2] = hc_bytealign_be (sc[od + 3], sc[od + 2], om);
|
||||
pt[3] = hc_bytealign_be (sc[od + 4], sc[od + 3], om);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -20,6 +20,11 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
u32 tmp0;
|
||||
u32 tmp1;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign_be (0, append, func_len);
|
||||
tmp1 = hc_bytealign_be (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
const int selector = (0x76543210 >> ((func_len & 3) * 4)) & 0xffff;
|
||||
|
||||
@ -27,11 +32,6 @@ DECLSPEC void memcat8c_be (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 len, co
|
||||
tmp1 = hc_byte_perm (0, append, selector);
|
||||
#endif
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp0 = hc_bytealign (0, append, func_len);
|
||||
tmp1 = hc_bytealign (append, 0, func_len);
|
||||
#endif
|
||||
|
||||
u32 carry = 0;
|
||||
|
||||
switch (div)
|
||||
|
@ -45,23 +45,23 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign_be (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign_be (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign_be (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign_be (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign_be (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign_be (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign_be (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign_be (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign_be (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign_be (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign_be (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -43,23 +43,23 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign_be (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign_be (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign_be (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign_be (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign_be (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign_be (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign_be (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign_be (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign_be (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign_be (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign_be (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
@ -42,23 +42,23 @@ DECLSPEC void memcat64c_be (u32x *block, const u32 offset, u32x *carry)
|
||||
u32x tmp16;
|
||||
|
||||
#if defined IS_AMD || defined IS_GENERIC
|
||||
tmp00 = hc_bytealign ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign (carry[15], 0, offset);
|
||||
tmp00 = hc_bytealign_be ( 0, carry[ 0], offset);
|
||||
tmp01 = hc_bytealign_be (carry[ 0], carry[ 1], offset);
|
||||
tmp02 = hc_bytealign_be (carry[ 1], carry[ 2], offset);
|
||||
tmp03 = hc_bytealign_be (carry[ 2], carry[ 3], offset);
|
||||
tmp04 = hc_bytealign_be (carry[ 3], carry[ 4], offset);
|
||||
tmp05 = hc_bytealign_be (carry[ 4], carry[ 5], offset);
|
||||
tmp06 = hc_bytealign_be (carry[ 5], carry[ 6], offset);
|
||||
tmp07 = hc_bytealign_be (carry[ 6], carry[ 7], offset);
|
||||
tmp08 = hc_bytealign_be (carry[ 7], carry[ 8], offset);
|
||||
tmp09 = hc_bytealign_be (carry[ 8], carry[ 9], offset);
|
||||
tmp10 = hc_bytealign_be (carry[ 9], carry[10], offset);
|
||||
tmp11 = hc_bytealign_be (carry[10], carry[11], offset);
|
||||
tmp12 = hc_bytealign_be (carry[11], carry[12], offset);
|
||||
tmp13 = hc_bytealign_be (carry[12], carry[13], offset);
|
||||
tmp14 = hc_bytealign_be (carry[13], carry[14], offset);
|
||||
tmp15 = hc_bytealign_be (carry[14], carry[15], offset);
|
||||
tmp16 = hc_bytealign_be (carry[15], 0, offset);
|
||||
#endif
|
||||
|
||||
#ifdef IS_NV
|
||||
|
Loading…
Reference in New Issue
Block a user