1
0
mirror of https://github.com/hashcat/hashcat.git synced 2024-11-02 13:39:17 +00:00

Optimize performance on NVIDIA GTX

This commit is contained in:
unix-ninja 2018-10-22 13:27:35 -04:00
parent 55d56baaa5
commit 8c5c225d8f
3 changed files with 110 additions and 112 deletions

View File

@ -59,29 +59,30 @@ __kernel void m18100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
sha1_hmac_final (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
@ -152,29 +153,30 @@ __kernel void m18100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
sha1_hmac_final (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code

View File

@ -82,29 +82,30 @@ __kernel void m18100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha1_hmac_final (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
@ -200,29 +201,30 @@ __kernel void m18100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha1_hmac_final (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code

View File

@ -68,39 +68,36 @@ __kernel void m18100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha1_hmac_final_vector (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_M_SIMD (otp_code, 0, 0, 0);
}
}
@ -172,39 +169,36 @@ __kernel void m18100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
sha1_hmac_final_vector (&ctx);
// calculate the offset using the least 4 bits of the last byte of our hash
const u32x otp_offset = ctx.opad.h[4] & 0xf;
// initialize a buffer for the otp code
u32 otp_code = 0;
// grab 4 consecutive bytes of the hash, starting at offset
// on some systems, &3 is faster than %4, so we will use it in our switch()
switch (otp_offset & 3)
switch (ctx.opad.h[4] & 15)
{
case 1:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x00ffffff) << 8) | ((ctx.opad.h[otp_offset/4+1] & 0xff000000) >> 24);
break;
case 2:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x0000ffff) << 16) | ((ctx.opad.h[otp_offset/4+1] & 0xffff0000) >> 16);
break;
case 3:
otp_code = ((ctx.opad.h[otp_offset/4] & 0x000000ff) << 24) | ((ctx.opad.h[otp_offset/4+1] & 0xffffff00) >> 8);
break;
default:
otp_code = ctx.opad.h[otp_offset/4];
break;
case 0: otp_code = ctx.opad.h[0]; break;
case 1: otp_code = ctx.opad.h[0] << 8 | ctx.opad.h[1] >> 24; break;
case 2: otp_code = ctx.opad.h[0] << 16 | ctx.opad.h[1] >> 16; break;
case 3: otp_code = ctx.opad.h[0] << 24 | ctx.opad.h[1] >> 8; break;
case 4: otp_code = ctx.opad.h[1]; break;
case 5: otp_code = ctx.opad.h[1] << 8 | ctx.opad.h[2] >> 24; break;
case 6: otp_code = ctx.opad.h[1] << 16 | ctx.opad.h[2] >> 16; break;
case 7: otp_code = ctx.opad.h[1] << 24 | ctx.opad.h[2] >> 8; break;
case 8: otp_code = ctx.opad.h[2]; break;
case 9: otp_code = ctx.opad.h[2] << 8 | ctx.opad.h[3] >> 24; break;
case 10: otp_code = ctx.opad.h[2] << 16 | ctx.opad.h[3] >> 16; break;
case 11: otp_code = ctx.opad.h[2] << 24 | ctx.opad.h[3] >> 8; break;
case 12: otp_code = ctx.opad.h[3]; break;
case 13: otp_code = ctx.opad.h[3] << 8 | ctx.opad.h[4] >> 24; break;
case 14: otp_code = ctx.opad.h[3] << 16 | ctx.opad.h[4] >> 16; break;
case 15: otp_code = ctx.opad.h[3] << 24 | ctx.opad.h[4] >> 8; break;
}
// take only the lower 31 bits
otp_code &= 0x7fffffff;
// we want to generate only 6 digits of code
otp_code %= 1000000;
const u32x r0 = ctx.opad.h[DGST_R0];
const u32x r1 = ctx.opad.h[DGST_R1];
const u32x r2 = ctx.opad.h[DGST_R2];
const u32x r3 = ctx.opad.h[DGST_R3];
COMPARE_S_SIMD (otp_code, 0, 0, 0);
}
}