Autotune: Do a pre-autotune test run to find out if kernel runtime isabove some TDR limit

HCtune File: Remove apple related GPU entries to workaround Trap 6 error
pull/1392/head
jsteube 7 years ago
parent 5265803493
commit 73bba00286

@ -51,6 +51,7 @@
## Improvements
##
- Autotune: Do a pre-autotune test run to find out if kernel runtime is above some TDR limit
- Charset: Add additional DES charsets with corrected parity
- OpenCL Buffers: Do not allocate memory for amplifiers for fast hashes, it's simply not needed
- OpenCL Kernels: Improved rule engine performance by 6% on for NVidia
@ -77,6 +78,7 @@
- HCstat File: Renamed file from hashcat.hcstat to hashcat.hcstat2 and add header because versions are incompatible
- HCstat File: Add code to read LZMA compressed hashcat.hcstat2
- HCstat File: Add hcstat2 support to enable masks of length up to 256, also adds a filetype header
- HCtune File: Remove apple related GPU entries to workaround Trap 6 error
- OpenCL Kernels: Added code generator for most of the switch_* functions and replaced existing code
- OpenCL Kernels: Declared all include functions as static to reduce binary kernel cache size
- OpenCL Kernels: On AMD GPU, optimized kernels for use with AMD ROCm driver

@ -243,10 +243,6 @@ GeForce_GTX_980M ALIAS_nv_sm52_or_higher
Tegra_X1 ALIAS_nv_sm52_or_higher
Intel(R)_Iris(TM)_Graphics_540 ALIAS_Apple_Iris
Iris ALIAS_Apple_Iris
Iris_Pro ALIAS_Apple_Iris_Pro
#############
## ENTRIES ##
#############
@ -384,81 +380,6 @@ GeForce_GTX_TITAN 3 10100 2 A
GeForce_GTX_TITAN 3 11000 2 A A
GeForce_GTX_TITAN 3 11100 2 A A
ALIAS_Apple_Iris * * 1 A 2
ALIAS_Apple_Iris * 400 1 A 256
ALIAS_Apple_Iris * 500 1 8 256
ALIAS_Apple_Iris * 501 1 8 256
ALIAS_Apple_Iris * 1500 1 A 256
ALIAS_Apple_Iris * 1600 1 8 256
ALIAS_Apple_Iris * 1800 1 1 16
ALIAS_Apple_Iris * 2100 1 8 256
ALIAS_Apple_Iris * 2500 1 2 256
ALIAS_Apple_Iris * 3000 1 A 256
ALIAS_Apple_Iris * 5200 1 8 256
ALIAS_Apple_Iris * 5800 1 8 256
ALIAS_Apple_Iris * 6211 1 8 128
ALIAS_Apple_Iris * 6212 1 4 128
ALIAS_Apple_Iris * 6213 1 4 128
ALIAS_Apple_Iris * 6221 1 8 128
ALIAS_Apple_Iris * 6222 1 4 128
ALIAS_Apple_Iris * 6223 1 2 128
ALIAS_Apple_Iris * 6231 1 2 16
ALIAS_Apple_Iris * 6232 1 1 16
ALIAS_Apple_Iris * 6233 1 1 8
ALIAS_Apple_Iris * 6241 1 8 128
ALIAS_Apple_Iris * 6242 1 4 128
ALIAS_Apple_Iris * 6243 1 2 128
ALIAS_Apple_Iris * 6300 1 8 256
ALIAS_Apple_Iris * 6400 1 8 256
ALIAS_Apple_Iris * 6500 1 8 256
ALIAS_Apple_Iris * 6600 1 8 200
ALIAS_Apple_Iris * 6700 1 8 256
ALIAS_Apple_Iris * 6800 1 2 200
ALIAS_Apple_Iris * 7100 1 1 256
ALIAS_Apple_Iris * 7200 1 1 200
ALIAS_Apple_Iris * 7400 1 2 200
ALIAS_Apple_Iris * 7900 1 1 256
ALIAS_Apple_Iris * 8800 1 8 256
ALIAS_Apple_Iris * 8900 1 1 1
ALIAS_Apple_Iris * 9000 1 2 16
ALIAS_Apple_Iris * 9100 1 4 256
ALIAS_Apple_Iris * 9200 1 1 200
ALIAS_Apple_Iris * 9300 1 1 1
ALIAS_Apple_Iris * 9400 1 1 200
ALIAS_Apple_Iris * 9500 1 1 200
ALIAS_Apple_Iris * 9600 1 1 200
ALIAS_Apple_Iris * 10000 1 1 200
ALIAS_Apple_Iris * 10300 1 8 128
ALIAS_Apple_Iris * 10500 1 4 64
ALIAS_Apple_Iris * 10900 1 2 200
ALIAS_Apple_Iris * 11300 1 1 1
ALIAS_Apple_Iris * 11600 1 1 256
ALIAS_Apple_Iris * 11900 1 2 200
ALIAS_Apple_Iris * 12000 1 2 200
ALIAS_Apple_Iris * 12100 1 2 200
ALIAS_Apple_Iris * 12200 1 1 256
ALIAS_Apple_Iris * 12300 1 2 256
ALIAS_Apple_Iris * 12400 1 1 256
ALIAS_Apple_Iris * 12500 1 1 256
ALIAS_Apple_Iris * 12700 1 64 10
ALIAS_Apple_Iris * 12800 1 64 100
ALIAS_Apple_Iris * 12900 1 8 64
ALIAS_Apple_Iris * 13000 1 1 64
ALIAS_Apple_Iris * 13200 1 1 1
ALIAS_Apple_Iris * 14700 1 1 1
ALIAS_Apple_Iris * 14800 1 1 1
ALIAS_Apple_Iris * 15200 1 1 10
ALIAS_Apple_Iris_Pro * 5000 1 8 8
ALIAS_Apple_Iris_Pro * 6100 1 4 16
ALIAS_Apple_Iris_Pro * 6231 1 1 64
ALIAS_Apple_Iris_Pro * 6232 1 1 32
ALIAS_Apple_Iris_Pro * 6233 1 1 16
ALIAS_Apple_Iris_Pro * 8700 1 1 256
ALIAS_Apple_Iris_Pro * 13731 1 1 64
ALIAS_Apple_Iris_Pro * 13732 1 1 32
ALIAS_Apple_Iris_Pro * 13733 1 1 16
#######################
## EXTREME SLOW ALGOS #
#######################

@ -140,6 +140,31 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
if (CL_rc == -1) return -1;
}
// Do a pre-autotune test run to find out if kernel runtime is above some TDR limit
u32 kernel_loops_max_reduced = kernel_loops_max;
if (1)
{
const double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops_min);
if (exec_msec > 2000)
{
event_log_error (hashcat_ctx, "OpenCL kernel minimum runtime larger than default TDR");
return -1;
}
const u32 mm = kernel_loops_max / kernel_loops_min;
if ((exec_msec * mm) > target_msec)
{
const u32 loops_valid = target_msec / exec_msec;
kernel_loops_max_reduced = kernel_loops_min * loops_valid;
}
}
#define VERIFIER_CNT 1
// first find out highest kernel-loops that stays below target_msec
@ -148,6 +173,8 @@ static int autotune (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param
{
for (kernel_loops = kernel_loops_max; kernel_loops > kernel_loops_min; kernel_loops >>= 1)
{
if (kernel_loops > kernel_loops_max_reduced) continue;
double exec_msec = try_run (hashcat_ctx, device_param, kernel_accel_min, kernel_loops);
for (int i = 0; i < VERIFIER_CNT; i++)

Loading…
Cancel
Save