Fix some issue with offset_minus_4

pull/1312/head
jsteube 7 years ago
parent c8a645dc16
commit a53d9e09de

@ -2939,11 +2939,11 @@ void append_0x80_1x16 (u32x w[16], const u32 offset)
void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
@ -3469,8 +3469,6 @@ void switch_buffer_by_offset_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4],
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)
@ -3802,7 +3800,7 @@ void switch_buffer_by_offset_carry_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x
{
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
switch (offset / 4)
{
@ -6184,11 +6182,11 @@ void switch_buffer_by_offset_carry_be (u32x w0[4], u32x w1[4], u32x w2[4], u32x
void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x w4[4], u32x w5[4], u32x w6[4], u32x w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
@ -7226,8 +7224,6 @@ void switch_buffer_by_offset_8x4_le (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)
@ -15044,11 +15040,11 @@ void undo_utf16le_S (const u32 in1[4], const u32 in2[4], u32 out[4])
void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
@ -15574,8 +15570,6 @@ void switch_buffer_by_offset_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], c
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)
@ -15907,7 +15901,7 @@ void switch_buffer_by_offset_carry_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3
{
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
switch (offset / 4)
{
@ -18289,11 +18283,11 @@ void switch_buffer_by_offset_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3
void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], u32 w4[4], u32 w5[4], u32 w6[4], u32 w7[4], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
@ -19331,8 +19325,6 @@ void switch_buffer_by_offset_8x4_le_S (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)
@ -25596,11 +25588,11 @@ void switch_buffer_by_offset_8x4_carry_be_S (u32 w0[4], u32 w1[4], u32 w2[4], u3
void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
switch (offset / 4)
{
case 0:
@ -32294,8 +32286,6 @@ void switch_buffer_by_offset_1x64_le_S (u32 w[64], const u32 offset)
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
switch (offset / 4)

@ -756,11 +756,11 @@ void append_block1 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_r0
void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0[4], const u32 src_l1[4], const u32 src_r0[4], const u32 src_r1[4])
{
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
u32 s0 = 0;
u32 s1 = 0;
u32 s2 = 0;
@ -895,8 +895,6 @@ void append_block8 (const u32 offset, u32 buf0[4], u32 buf1[4], const u32 src_l0
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset % 4);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
u32 s0 = 0;

@ -114,7 +114,7 @@ void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32
digest[3] += d;
}
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -122,19 +122,18 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -142,13 +141,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp3 = tmp4;
tmp4 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -156,10 +151,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -226,7 +220,7 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
}
}
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -234,19 +228,18 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -254,13 +247,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp3 = tmp4;
tmp4 = 0x80;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -268,10 +257,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0x80, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -338,44 +326,38 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
}
}
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[2])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign ( 0, append[1], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{

@ -113,7 +113,7 @@ void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32
digest[3] += d;
}
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -121,19 +121,18 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -141,13 +140,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp3 = tmp4;
tmp4 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -155,10 +150,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -225,7 +219,7 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
}
}
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -233,19 +227,18 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -253,13 +246,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp3 = tmp4;
tmp4 = 0x80;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -267,10 +256,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0x80, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -337,44 +325,38 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
}
}
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[2])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign ( 0, append[1], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{

@ -2111,10 +2111,11 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
u32 tmp4;
u32 tmp5;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (offset & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
@ -2122,9 +2123,7 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp4 = amd_bytealign (append[4], append[3], offset_minus_4);
tmp5 = amd_bytealign ( 0, append[4], offset_minus_4);
const u32 mod = offset & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -2133,13 +2132,9 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp4 = tmp5;
tmp5 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -2148,7 +2143,6 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], append[4], selector);
tmp5 = __byte_perm (append[4], 0, selector);
#endif
const u32 div = offset / 4;

@ -2112,10 +2112,11 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
u32 tmp4;
u32 tmp5;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (offset & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
@ -2123,9 +2124,7 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp4 = amd_bytealign (append[4], append[3], offset_minus_4);
tmp5 = amd_bytealign ( 0, append[4], offset_minus_4);
const u32 mod = offset & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -2134,13 +2133,9 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp4 = tmp5;
tmp5 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (offset & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -2149,7 +2144,6 @@ void append_salt (u32 w0[4], u32 w1[4], u32 w2[4], const u32 append[5], const u3
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], append[4], selector);
tmp5 = __byte_perm (append[4], 0, selector);
#endif
const u32 div = offset / 4;

@ -110,7 +110,7 @@ void md5_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32
digest[3] += d;
}
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -118,19 +118,18 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -138,13 +137,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp3 = tmp4;
tmp4 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -152,10 +147,9 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -222,7 +216,7 @@ void memcat16 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const
}
}
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[4])
void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[4])
{
u32 tmp0;
u32 tmp1;
@ -230,19 +224,18 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -250,13 +243,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp3 = tmp4;
tmp4 = 0x80;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -264,10 +253,9 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
tmp2 = __byte_perm (append[1], append[2], selector);
tmp3 = __byte_perm (append[2], append[3], selector);
tmp4 = __byte_perm (append[3], 0x80, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{
@ -334,44 +322,38 @@ void memcat16_x80 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], c
}
}
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 block_len, const u32 append[2])
void memcat8 (u32 block0[4], u32 block1[4], u32 block2[4], u32 block3[4], const u32 offset, const u32 append[2])
{
u32 tmp0;
u32 tmp1;
u32 tmp2;
#if defined IS_AMD || defined IS_GENERIC
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - (block_len & 3);
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign ( 0, append[1], offset_minus_4);
const u32 mod = block_len & 3;
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
tmp2 = 0;
}
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
tmp1 = __byte_perm (append[0], append[1], selector);
tmp2 = __byte_perm (append[1], 0, selector);
#endif
const u32 div = block_len / 4;
const u32 div = offset / 4;
switch (div)
{

@ -190,27 +190,26 @@ void bswap8 (u32 block[16])
block[ 7] = swap32 (block[ 7]);
}
u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len)
u32 memcat16 (u32 block[16], const u32 offset, const u32 append[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -221,8 +220,6 @@ u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -232,7 +229,7 @@ u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32
tmp4 = __byte_perm (append[3], 0, selector);
#endif
switch (div)
switch (offset / 4)
{
case 0: block[ 0] |= tmp0;
block[ 1] = tmp1;
@ -322,32 +319,31 @@ u32 memcat16 (u32 block[16], const u32 block_len, const u32 append[4], const u32
break;
}
u32 new_len = block_len + append_len;
u32 new_len = offset + append_len;
return new_len;
}
u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u32 append_len, u32 digest[8])
u32 memcat16c (u32 block[16], const u32 offset, const u32 append[4], const u32 append_len, u32 digest[8])
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -358,8 +354,6 @@ u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u3
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -371,7 +365,7 @@ u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u3
u32 carry[4] = { 0, 0, 0, 0 };
switch (div)
switch (offset / 4)
{
case 0: block[ 0] |= tmp0;
block[ 1] = tmp1;
@ -471,7 +465,7 @@ u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u3
break;
}
u32 new_len = block_len + append_len;
u32 new_len = offset + append_len;
if (new_len >= 64)
{
@ -490,27 +484,26 @@ u32 memcat16c (u32 block[16], const u32 block_len, const u32 append[4], const u3
return new_len;
}
u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
u32 memcat20 (u32 block[20], const u32 offset, const u32 append[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0, append[3], offset_minus_4);
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -521,8 +514,6 @@ u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -532,7 +523,7 @@ u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32
tmp4 = __byte_perm (append[3], 0, selector);
#endif
switch (div)
switch (offset / 4)
{
case 0: block[ 0] |= tmp0;
block[ 1] = tmp1;
@ -632,30 +623,29 @@ u32 memcat20 (u32 block[20], const u32 block_len, const u32 append[4], const u32
break;
}
return block_len + append_len;
return offset + append_len;
}
u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const u32 append_len)
u32 memcat20_x80 (u32 block[20], const u32 offset, const u32 append[4], const u32 append_len)
{
const u32 mod = block_len & 3;
const u32 div = block_len / 4;
u32 tmp0;
u32 tmp1;
u32 tmp2;
u32 tmp3;
u32 tmp4;
#if defined IS_AMD || defined IS_GENERIC
const int offset_minus_4 = 4 - block_len;
const int offset_mod_4 = offset & 3;
const int offset_minus_4 = 4 - offset_mod_4;
#if defined IS_AMD || defined IS_GENERIC
tmp0 = amd_bytealign (append[0], 0, offset_minus_4);
tmp1 = amd_bytealign (append[1], append[0], offset_minus_4);
tmp2 = amd_bytealign (append[2], append[1], offset_minus_4);
tmp3 = amd_bytealign (append[3], append[2], offset_minus_4);
tmp4 = amd_bytealign ( 0x80, append[3], offset_minus_4);
if (mod == 0)
if (offset_mod_4 == 0)
{
tmp0 = tmp1;
tmp1 = tmp2;
@ -666,8 +656,6 @@ u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const
#endif
#ifdef IS_NV
const int offset_minus_4 = 4 - (block_len & 3);
const int selector = (0x76543210 >> (offset_minus_4 * 4)) & 0xffff;
tmp0 = __byte_perm ( 0, append[0], selector);
@ -677,7 +665,7 @@ u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const
tmp4 = __byte_perm (append[3], 0x80, selector);
#endif
switch (div)
switch (offset / 4)
{
case 0: block[ 0] |= tmp0;
block[ 1] = tmp1;
@ -777,7 +765,7 @@ u32 memcat20_x80 (u32 block[20], const u32 block_len, const u32 append[4], const
break;
}
return block_len + append_len;
return offset + append_len;
}
__kernel void m07400_init (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global sha256crypt_tmp_t *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max)

Loading…
Cancel
Save