mirror of
https://github.com/hashcat/hashcat.git
synced 2024-11-25 17:38:23 +00:00
Removed branches in replace_dot_by_len() function of optimized -m 8300 kernels
This commit is contained in:
parent
69bdd5012c
commit
c0753f361c
@ -16,49 +16,99 @@
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
const u32 replace_dots (u32 *w, const u32 idx, const u32 old_len, const u32 pw_len)
|
||||
DECLSPEC u64 u32_to_u64 (const u32 in)
|
||||
{
|
||||
const u32 min_len = idx << 4; // 2 ^ 4 = 16 for each u32 w[4]
|
||||
const u64 t0 = (u64) ((in >> 0) & 0xff);
|
||||
const u64 t1 = (u64) ((in >> 8) & 0xff);
|
||||
const u64 t2 = (u64) ((in >> 16) & 0xff);
|
||||
const u64 t3 = (u64) ((in >> 24) & 0xff);
|
||||
|
||||
if (pw_len <= min_len) return 0;
|
||||
const u64 out = (t0 << 0)
|
||||
| (t1 << 16)
|
||||
| (t2 << 32)
|
||||
| (t3 << 48);
|
||||
|
||||
const u32 max_len = pw_len - min_len - 1;
|
||||
|
||||
const u32 start_pos = (max_len < 15) ? max_len : 15;
|
||||
|
||||
u32 cur_len = old_len;
|
||||
|
||||
for (int pos = (int) start_pos; pos >= 0; pos--)
|
||||
{
|
||||
const u32 div = pos / 4;
|
||||
const u32 mod = pos & 3;
|
||||
const u32 sht = mod << 3;
|
||||
|
||||
if (((w[div] >> sht) & 0xff) == 0x2e) // '.'
|
||||
{
|
||||
w[div] += (cur_len - 0x2e) << sht;
|
||||
|
||||
cur_len = 0;
|
||||
return out;
|
||||
}
|
||||
else
|
||||
|
||||
DECLSPEC u32 u64_to_u32 (const u64 in)
|
||||
{
|
||||
cur_len++;
|
||||
}
|
||||
const u32 t0 = (u32) ((in >> 0) & 0xff);
|
||||
const u32 t1 = (u32) ((in >> 16) & 0xff);
|
||||
const u32 t2 = (u32) ((in >> 32) & 0xff);
|
||||
const u32 t3 = (u32) ((in >> 48) & 0xff);
|
||||
|
||||
const u32 out = (t0 << 0)
|
||||
| (t1 << 8)
|
||||
| (t2 << 16)
|
||||
| (t3 << 24);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
DECLSPEC int replace_u32_le (const u32 input, u32 *output, int cur_len)
|
||||
{
|
||||
// expand to keep 9th bit consistent
|
||||
|
||||
u64 input64 = u32_to_u64 (input);
|
||||
|
||||
u64 m64 = input64;
|
||||
|
||||
m64 ^= 0x002e002e002e002e; // convert 0x2e to 0x00
|
||||
m64 ^= 0x00ff00ff00ff00ff; // convert 0x00 to 0xff (jit will optimize this to one instruction)
|
||||
m64 += 0x0001000100010001; // only 0xff can set 9th bit
|
||||
m64 &= 0x0100010001000100; // only 9th bit survives
|
||||
|
||||
m64 |= m64 << 1; // converts 0x0100 to 0xff00
|
||||
m64 |= m64 << 2;
|
||||
m64 |= m64 << 4;
|
||||
|
||||
m64 >>= 8; // back to original positions (in 64 bit)
|
||||
|
||||
u32 m = u64_to_u32 (m64);
|
||||
|
||||
u32 r = 0;
|
||||
|
||||
const u32 mn = ~m;
|
||||
|
||||
const u32 r0 = mn & 0x000000ff;
|
||||
const u32 r1 = mn & 0x0000ff00;
|
||||
const u32 r2 = mn & 0x00ff0000;
|
||||
const u32 r3 = mn & 0xff000000;
|
||||
|
||||
cur_len <<= 24;
|
||||
r |= cur_len; cur_len = (cur_len + 0x01000000) & r3; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00010000) & r2; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000100) & r1; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000001) & r0;
|
||||
|
||||
*output = (input & mn) | (r & m);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
||||
const u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
DECLSPEC u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
{
|
||||
u32 cur_len = 0;
|
||||
|
||||
// loop over w3...w0 (4 * 16 = 64 bytes):
|
||||
|
||||
cur_len = replace_dots (w3, 3, cur_len, pw_len);
|
||||
cur_len = replace_dots (w2, 2, cur_len, pw_len);
|
||||
cur_len = replace_dots (w1, 1, cur_len, pw_len);
|
||||
cur_len = replace_dots (w0, 0, cur_len, pw_len);
|
||||
int cur_len = 0 - (64 - pw_len); // number of padding bytes relative to buffer size
|
||||
|
||||
cur_len = replace_u32_le (w3[3], &w3[3], cur_len);
|
||||
cur_len = replace_u32_le (w3[2], &w3[2], cur_len);
|
||||
cur_len = replace_u32_le (w3[1], &w3[1], cur_len);
|
||||
cur_len = replace_u32_le (w3[0], &w3[0], cur_len);
|
||||
cur_len = replace_u32_le (w2[3], &w2[3], cur_len);
|
||||
cur_len = replace_u32_le (w2[2], &w2[2], cur_len);
|
||||
cur_len = replace_u32_le (w2[1], &w2[1], cur_len);
|
||||
cur_len = replace_u32_le (w2[0], &w2[0], cur_len);
|
||||
cur_len = replace_u32_le (w1[3], &w1[3], cur_len);
|
||||
cur_len = replace_u32_le (w1[2], &w1[2], cur_len);
|
||||
cur_len = replace_u32_le (w1[1], &w1[1], cur_len);
|
||||
cur_len = replace_u32_le (w1[0], &w1[0], cur_len);
|
||||
cur_len = replace_u32_le (w0[3], &w0[3], cur_len);
|
||||
cur_len = replace_u32_le (w0[2], &w0[2], cur_len);
|
||||
cur_len = replace_u32_le (w0[1], &w0[1], cur_len);
|
||||
cur_len = replace_u32_le (w0[0], &w0[0], cur_len);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
@ -14,49 +14,99 @@
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
const u32 replace_dots (u32 *w, const u32 idx, const u32 old_len, const u32 pw_len)
|
||||
DECLSPEC u64 u32_to_u64 (const u32 in)
|
||||
{
|
||||
const u32 min_len = idx << 4; // 2 ^ 4 = 16 for each u32 w[4]
|
||||
const u64 t0 = (u64) ((in >> 0) & 0xff);
|
||||
const u64 t1 = (u64) ((in >> 8) & 0xff);
|
||||
const u64 t2 = (u64) ((in >> 16) & 0xff);
|
||||
const u64 t3 = (u64) ((in >> 24) & 0xff);
|
||||
|
||||
if (pw_len <= min_len) return 0;
|
||||
const u64 out = (t0 << 0)
|
||||
| (t1 << 16)
|
||||
| (t2 << 32)
|
||||
| (t3 << 48);
|
||||
|
||||
const u32 max_len = pw_len - min_len - 1;
|
||||
|
||||
const u32 start_pos = (max_len < 15) ? max_len : 15;
|
||||
|
||||
u32 cur_len = old_len;
|
||||
|
||||
for (int pos = (int) start_pos; pos >= 0; pos--)
|
||||
{
|
||||
const u32 div = pos / 4;
|
||||
const u32 mod = pos & 3;
|
||||
const u32 sht = mod << 3;
|
||||
|
||||
if (((w[div] >> sht) & 0xff) == 0x2e) // '.'
|
||||
{
|
||||
w[div] += (cur_len - 0x2e) << sht;
|
||||
|
||||
cur_len = 0;
|
||||
return out;
|
||||
}
|
||||
else
|
||||
|
||||
DECLSPEC u32 u64_to_u32 (const u64 in)
|
||||
{
|
||||
cur_len++;
|
||||
}
|
||||
const u32 t0 = (u32) ((in >> 0) & 0xff);
|
||||
const u32 t1 = (u32) ((in >> 16) & 0xff);
|
||||
const u32 t2 = (u32) ((in >> 32) & 0xff);
|
||||
const u32 t3 = (u32) ((in >> 48) & 0xff);
|
||||
|
||||
const u32 out = (t0 << 0)
|
||||
| (t1 << 8)
|
||||
| (t2 << 16)
|
||||
| (t3 << 24);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
DECLSPEC int replace_u32_le (const u32 input, u32 *output, int cur_len)
|
||||
{
|
||||
// expand to keep 9th bit consistent
|
||||
|
||||
u64 input64 = u32_to_u64 (input);
|
||||
|
||||
u64 m64 = input64;
|
||||
|
||||
m64 ^= 0x002e002e002e002e; // convert 0x2e to 0x00
|
||||
m64 ^= 0x00ff00ff00ff00ff; // convert 0x00 to 0xff (jit will optimize this to one instruction)
|
||||
m64 += 0x0001000100010001; // only 0xff can set 9th bit
|
||||
m64 &= 0x0100010001000100; // only 9th bit survives
|
||||
|
||||
m64 |= m64 << 1; // converts 0x0100 to 0xff00
|
||||
m64 |= m64 << 2;
|
||||
m64 |= m64 << 4;
|
||||
|
||||
m64 >>= 8; // back to original positions (in 64 bit)
|
||||
|
||||
u32 m = u64_to_u32 (m64);
|
||||
|
||||
u32 r = 0;
|
||||
|
||||
const u32 mn = ~m;
|
||||
|
||||
const u32 r0 = mn & 0x000000ff;
|
||||
const u32 r1 = mn & 0x0000ff00;
|
||||
const u32 r2 = mn & 0x00ff0000;
|
||||
const u32 r3 = mn & 0xff000000;
|
||||
|
||||
cur_len <<= 24;
|
||||
r |= cur_len; cur_len = (cur_len + 0x01000000) & r3; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00010000) & r2; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000100) & r1; cur_len >>= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000001) & r0;
|
||||
|
||||
*output = (input & mn) | (r & m);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
||||
const u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
DECLSPEC u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
{
|
||||
u32 cur_len = 0;
|
||||
|
||||
// loop over w3...w0 (4 * 16 = 64 bytes):
|
||||
|
||||
cur_len = replace_dots (w3, 3, cur_len, pw_len);
|
||||
cur_len = replace_dots (w2, 2, cur_len, pw_len);
|
||||
cur_len = replace_dots (w1, 1, cur_len, pw_len);
|
||||
cur_len = replace_dots (w0, 0, cur_len, pw_len);
|
||||
int cur_len = 0 - (64 - pw_len); // number of padding bytes relative to buffer size
|
||||
|
||||
cur_len = replace_u32_le (w3[3], &w3[3], cur_len);
|
||||
cur_len = replace_u32_le (w3[2], &w3[2], cur_len);
|
||||
cur_len = replace_u32_le (w3[1], &w3[1], cur_len);
|
||||
cur_len = replace_u32_le (w3[0], &w3[0], cur_len);
|
||||
cur_len = replace_u32_le (w2[3], &w2[3], cur_len);
|
||||
cur_len = replace_u32_le (w2[2], &w2[2], cur_len);
|
||||
cur_len = replace_u32_le (w2[1], &w2[1], cur_len);
|
||||
cur_len = replace_u32_le (w2[0], &w2[0], cur_len);
|
||||
cur_len = replace_u32_le (w1[3], &w1[3], cur_len);
|
||||
cur_len = replace_u32_le (w1[2], &w1[2], cur_len);
|
||||
cur_len = replace_u32_le (w1[1], &w1[1], cur_len);
|
||||
cur_len = replace_u32_le (w1[0], &w1[0], cur_len);
|
||||
cur_len = replace_u32_le (w0[3], &w0[3], cur_len);
|
||||
cur_len = replace_u32_le (w0[2], &w0[2], cur_len);
|
||||
cur_len = replace_u32_le (w0[1], &w0[1], cur_len);
|
||||
cur_len = replace_u32_le (w0[0], &w0[0], cur_len);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
@ -14,49 +14,98 @@
|
||||
#include "inc_hash_sha1.cl"
|
||||
#endif
|
||||
|
||||
const u32 replace_dots (u32 *w, const u32 idx, const u32 old_len, const u32 pw_len)
|
||||
DECLSPEC u64 u32_to_u64 (const u32 in)
|
||||
{
|
||||
const u32 min_len = idx << 4; // 2 ^ 4 = 16 for each u32 w[4]
|
||||
const u64 t0 = (u64) ((in >> 24) & 0xff);
|
||||
const u64 t1 = (u64) ((in >> 16) & 0xff);
|
||||
const u64 t2 = (u64) ((in >> 8) & 0xff);
|
||||
const u64 t3 = (u64) ((in >> 0) & 0xff);
|
||||
|
||||
if (pw_len <= min_len) return 0;
|
||||
const u64 out = (t0 << 48)
|
||||
| (t1 << 32)
|
||||
| (t2 << 16)
|
||||
| (t3 << 0);
|
||||
|
||||
const u32 max_len = pw_len - min_len - 1;
|
||||
|
||||
const u32 start_pos = (max_len < 15) ? max_len : 15;
|
||||
|
||||
u32 cur_len = old_len;
|
||||
|
||||
for (int pos = (int) start_pos; pos >= 0; pos--)
|
||||
{
|
||||
const u32 div = pos / 4;
|
||||
const u32 mod = pos & 3;
|
||||
const u32 sht = (3 - mod) << 3;
|
||||
|
||||
if (((w[div] >> sht) & 0xff) == 0x2e) // '.'
|
||||
{
|
||||
w[div] += (cur_len - 0x2e) << sht;
|
||||
|
||||
cur_len = 0;
|
||||
return out;
|
||||
}
|
||||
else
|
||||
|
||||
DECLSPEC u32 u64_to_u32 (const u64 in)
|
||||
{
|
||||
cur_len++;
|
||||
}
|
||||
const u32 t0 = (u32) ((in >> 48) & 0xff);
|
||||
const u32 t1 = (u32) ((in >> 32) & 0xff);
|
||||
const u32 t2 = (u32) ((in >> 16) & 0xff);
|
||||
const u32 t3 = (u32) ((in >> 0) & 0xff);
|
||||
|
||||
const u32 out = (t0 << 24)
|
||||
| (t1 << 16)
|
||||
| (t2 << 8)
|
||||
| (t3 << 0);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
DECLSPEC int replace_u32_be (const u32 input, u32 *output, int cur_len)
|
||||
{
|
||||
// expand to keep 9th bit consistent
|
||||
|
||||
u64 input64 = u32_to_u64 (input);
|
||||
|
||||
u64 m64 = input64;
|
||||
|
||||
m64 ^= 0x002e002e002e002e; // convert 0x2e to 0x00
|
||||
m64 ^= 0x00ff00ff00ff00ff; // convert 0x00 to 0xff (jit will optimize this to one instruction)
|
||||
m64 += 0x0001000100010001; // only 0xff can set 9th bit
|
||||
m64 &= 0x0100010001000100; // only 9th bit survives
|
||||
|
||||
m64 |= m64 << 1; // converts 0x0100 to 0xff00
|
||||
m64 |= m64 << 2;
|
||||
m64 |= m64 << 4;
|
||||
|
||||
m64 >>= 8; // back to original positions (in 64 bit)
|
||||
|
||||
u32 m = u64_to_u32 (m64);
|
||||
|
||||
u32 r = 0;
|
||||
|
||||
const u32 mn = ~m;
|
||||
|
||||
const u32 r0 = mn & 0xff000000;
|
||||
const u32 r1 = mn & 0x00ff0000;
|
||||
const u32 r2 = mn & 0x0000ff00;
|
||||
const u32 r3 = mn & 0x000000ff;
|
||||
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000001) & r3; cur_len <<= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00000100) & r2; cur_len <<= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x00010000) & r1; cur_len <<= 8;
|
||||
r |= cur_len; cur_len = (cur_len + 0x01000000) & r0; cur_len >>= 24;
|
||||
|
||||
*output = (input & mn) | (r & m);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
||||
const u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
DECLSPEC u32 replace_dot_by_len (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len)
|
||||
{
|
||||
u32 cur_len = 0;
|
||||
|
||||
// loop over w3...w0 (4 * 16 = 64 bytes):
|
||||
|
||||
cur_len = replace_dots (w3, 3, cur_len, pw_len);
|
||||
cur_len = replace_dots (w2, 2, cur_len, pw_len);
|
||||
cur_len = replace_dots (w1, 1, cur_len, pw_len);
|
||||
cur_len = replace_dots (w0, 0, cur_len, pw_len);
|
||||
int cur_len = 0 - (64 - pw_len); // number of padding bytes relative to buffer size
|
||||
|
||||
cur_len = replace_u32_be (w3[3], &w3[3], cur_len);
|
||||
cur_len = replace_u32_be (w3[2], &w3[2], cur_len);
|
||||
cur_len = replace_u32_be (w3[1], &w3[1], cur_len);
|
||||
cur_len = replace_u32_be (w3[0], &w3[0], cur_len);
|
||||
cur_len = replace_u32_be (w2[3], &w2[3], cur_len);
|
||||
cur_len = replace_u32_be (w2[2], &w2[2], cur_len);
|
||||
cur_len = replace_u32_be (w2[1], &w2[1], cur_len);
|
||||
cur_len = replace_u32_be (w2[0], &w2[0], cur_len);
|
||||
cur_len = replace_u32_be (w1[3], &w1[3], cur_len);
|
||||
cur_len = replace_u32_be (w1[2], &w1[2], cur_len);
|
||||
cur_len = replace_u32_be (w1[1], &w1[1], cur_len);
|
||||
cur_len = replace_u32_be (w1[0], &w1[0], cur_len);
|
||||
cur_len = replace_u32_be (w0[3], &w0[3], cur_len);
|
||||
cur_len = replace_u32_be (w0[2], &w0[2], cur_len);
|
||||
cur_len = replace_u32_be (w0[1], &w0[1], cur_len);
|
||||
cur_len = replace_u32_be (w0[0], &w0[0], cur_len);
|
||||
|
||||
return cur_len;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user