Manually unroll sha2 hashes

pull/2022/head
Jens Steube 5 years ago
parent 3ca3d1cc60
commit fa9d073f9a

@ -104,6 +104,11 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
ROUND_STEP_S (0);
#ifdef IS_CUDA
ROUND_EXPAND_S (); ROUND_STEP_S (16);
ROUND_EXPAND_S (); ROUND_STEP_S (32);
ROUND_EXPAND_S (); ROUND_STEP_S (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -111,6 +116,7 @@ DECLSPEC void sha224_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
{
ROUND_EXPAND_S (); ROUND_STEP_S (i);
}
#endif
#undef ROUND_EXPAND_S
#undef ROUND_STEP_S

@ -104,6 +104,11 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
ROUND_STEP_S (0);
#ifdef IS_CUDA
ROUND_EXPAND_S (); ROUND_STEP_S (16);
ROUND_EXPAND_S (); ROUND_STEP_S (32);
ROUND_EXPAND_S (); ROUND_STEP_S (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -111,6 +116,7 @@ DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
{
ROUND_EXPAND_S (); ROUND_STEP_S (i);
}
#endif
#undef ROUND_EXPAND_S
#undef ROUND_STEP_S

@ -108,6 +108,12 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
ROUND_STEP_S (0);
#ifdef IS_CUDA
ROUND_EXPAND_S (); ROUND_STEP_S (16);
ROUND_EXPAND_S (); ROUND_STEP_S (32);
ROUND_EXPAND_S (); ROUND_STEP_S (48);
ROUND_EXPAND_S (); ROUND_STEP_S (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -115,6 +121,7 @@ DECLSPEC void sha384_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
{
ROUND_EXPAND_S (); ROUND_STEP_S (i);
}
#endif
#undef ROUND_EXPAND_S
#undef ROUND_STEP_S

@ -108,6 +108,12 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
ROUND_STEP_S (0);
#ifdef IS_CUDA
ROUND_EXPAND_S (); ROUND_STEP_S (16);
ROUND_EXPAND_S (); ROUND_STEP_S (32);
ROUND_EXPAND_S (); ROUND_STEP_S (48);
ROUND_EXPAND_S (); ROUND_STEP_S (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -115,6 +121,7 @@ DECLSPEC void sha512_transform (const u32 *w0, const u32 *w1, const u32 *w2, con
{
ROUND_EXPAND_S (); ROUND_STEP_S (i);
}
#endif
#undef ROUND_EXPAND_S
#undef ROUND_STEP_S

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +98,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
digest[0] += a;
digest[1] += b;
@ -137,6 +143,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
ROUND_STEP_Z (0);
#ifdef IS_CUDA
ROUND_STEP_Z (16);
ROUND_STEP_Z (32);
ROUND_STEP_Z (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -144,6 +155,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
{
ROUND_STEP_Z (i);
}
#endif
digest[0] += a;
digest[1] += b;

@ -84,6 +84,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +96,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
digest[0] += a;
digest[1] += b;
@ -135,6 +141,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
ROUND_STEP_Z (0);
#ifdef IS_CUDA
ROUND_STEP_Z (16);
ROUND_STEP_Z (32);
ROUND_STEP_Z (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -142,6 +153,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
{
ROUND_STEP_Z (i);
}
#endif
digest[0] += a;
digest[1] += b;

@ -84,6 +84,11 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +96,7 @@ DECLSPEC void sha256_transform_m (u32x *digest, const u32x *w)
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
digest[0] += a;
digest[1] += b;
@ -135,6 +141,11 @@ DECLSPEC void sha256_transform_z (u32x *digest)
ROUND_STEP_Z (0);
#ifdef IS_CUDA
ROUND_STEP_Z (16);
ROUND_STEP_Z (32);
ROUND_STEP_Z (48);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -142,6 +153,7 @@ DECLSPEC void sha256_transform_z (u32x *digest)
{
ROUND_STEP_Z (i);
}
#endif
digest[0] += a;
digest[1] += b;

@ -86,6 +86,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha384_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -86,6 +86,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -93,6 +99,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -84,6 +84,12 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
ROUND_STEP (0);
#ifdef IS_CUDA
ROUND_EXPAND (); ROUND_STEP (16);
ROUND_EXPAND (); ROUND_STEP (32);
ROUND_EXPAND (); ROUND_STEP (48);
ROUND_EXPAND (); ROUND_STEP (64);
#else
#ifdef _unroll
#pragma unroll
#endif
@ -91,6 +97,7 @@ DECLSPEC void sha512_transform_intern (const u32x *w0, const u32x *w1, const u32
{
ROUND_EXPAND (); ROUND_STEP (i);
}
#endif
/* rev
digest[0] += a;

@ -47,6 +47,7 @@ static double try_run (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_par
return exec_msec_prev;
}
/*
static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, const u32 kernel_accel, const u32 kernel_loops)
{
hashconfig_t *hashconfig = hashcat_ctx->hashconfig;
@ -93,6 +94,7 @@ static double try_run_preferred (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *
return exec_msec_prev;
}
*/
static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param)
{
@ -261,6 +263,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
const u32 kernel_accel_orig = kernel_accel;
const u32 kernel_loops_orig = kernel_loops;
double exec_msec_prev = try_run (hashcat_ctx, device_param, kernel_accel, kernel_loops);
for (int i = 1; i < STEPS_CNT; i++)
{
const u32 kernel_accel_try = kernel_accel_orig * (1u << i);
@ -272,6 +276,16 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (kernel_loops_try > kernel_loops_max) continue;
if (kernel_loops_try < kernel_loops_min) break;
// do a real test
const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_try, kernel_loops_try);
if (exec_msec_prev < exec_msec) break;
exec_msec_prev = exec_msec;
// so far, so good! save
kernel_accel = kernel_accel_try;
kernel_loops = kernel_loops_try;
@ -299,6 +313,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
// start finding best thread count is easier.
// it's either the preferred or the maximum thread count
/*
const u32 kernel_threads_min = device_param->kernel_threads_min;
const u32 kernel_threads_max = device_param->kernel_threads_max;
@ -334,6 +349,7 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
}
}
}
*/
if (device_param->is_cuda == true)
{

Loading…
Cancel
Save