From ac8c4d946a81e085d53b59dec3b7de9cd871ba28 Mon Sep 17 00:00:00 2001 From: jsteube Date: Fri, 25 Nov 2016 16:27:22 +0100 Subject: [PATCH] Fix benchmark of extreme slow hashes, updated changes.txt --- docs/changes.txt | 184 ++++++++++++++++++++++++++++++++++++++--------- src/opencl.c | 33 +++++---- 2 files changed, 170 insertions(+), 47 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index bb8d0eed6..68cfd4f03 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,7 +1,7 @@ * changes v3.10 -> v3.20: -The hashcat core was completely refactored into a library (libhashcat). -This should help developers to include hashcat into distributed clients or GUI frontends (see main_shared.c for a quick start). +The hashcat core was completely refactored into a MT-safe library (libhashcat). +The goal was to help developers to include hashcat into distributed clients or GUI frontends. The CLI (hashcat.bin or hashcat.exe) works as before but from a technical perspective it's a library frontend. ## @@ -10,49 +10,165 @@ The CLI (hashcat.bin or hashcat.exe) works as before but from a technical perspe - New option --speed-only: Quickly provides cracking speed per device based on the user hashes and selected options, then quit - New option --keep-guessing: Continue cracking hashes even after they have been cracked (to find collisions) -- Hardware management: Support --gpu-temp-retain for AMDGPU-Pro Driver -- Hardware management: Support --powertune-enable for AMDGPU-Pro Driver -- Status view: Show the current first and last password candidate test queued for execution per device -- Status view: Show the current position in the queue for both base and modifier (Ex: Wordlist 2/5) -- Status view: Show temperature, coreclock, memoryclock, fanspeed and pci-lanes for devices using AMDGPU-Pro Driver +- New option --restore-file-path: Manually override the path to restore file (useful if we want all session files in the same folder) +- New option --opencl-info: Show details about OpenCL compatible devices like an embedded clinfo tool (useful for bug reports) +- Documents: Added colors for warnings (yellow) and errors (red) instead of WARNING: and ERROR: prefix +- Documents: Added tips shown to the user about performance while hashcat is running +- Hardware management: Support --gpu-temp-retain for AMDGPU-Pro driver +- Hardware management: Support --powertune-enable for AMDGPU-Pro driver +- Password candidates: Allow words of length > 31 in wordlists for -a 0 for some slow hashes if no rules are in use +- Password candidates: Do not use $HEX[] if the password candidate is a valid UTF-8 string and print out as-is +- Pause mode: Allow quit program also if in pause mode +- Pause mode: Ignore runtime limit in pause mode - Status view: Show core-clock and memory-clock in benchmark-mode in case --machine-readable is set -- Password candidate: Do not use $HEX[] if the password candidate is a valid UTF-8 string and print out as-is - -## -## Bugs -## - -- Fixed a bug in wordlist reject code when cracking a large salted hashlist -- Fixed a bug in custom charset from file parsing code -- Fixed a bug in rule generator code when using the memory copy function -- Fixed a bug in outfile-check shutdown code when manually overriding --outfile-check-dir -- Fixed a bug in hash-mode 11600 = (7-Zip) producing clEnqueueReadBuffer(): CL_INVALID_VALUE depending on input hash +- Status view: Show temperature, coreclock, memoryclock, fanspeed and pci-lanes for devices using AMDGPU-Pro driver +- Status view: Show the current first and last password candidate test queued for execution per device (as in JtR) +- Status view: Show the current position in the queue for both base and modifier (Example: Wordlist 2/5) +- Markov statistics: Update hashcat.hcstat which is used as reference whenever the user defines a mask +- Charsets: Added lowercase ascii hex (?h) and uppercase ascii hex (?H) as predefined charsets ## ## Algorithms ## -- Improved NVIDIA cracking performance when doing rule-based attacks for very fast hashes like NTLM by 35% or more -- Reduce maximum number of allowed function calls per rule from 255 to 31 to save device memory -- Workaround for some OpenCL kernels to compile with AMDGPU-Pro Driver -- Allow words of length > 31 in wordlists for -a 0 for some slow hashes if no rules are in use -- Allow loading of bcrypt hashes with signature $2b$ (February 2014) -- Added new hash-mode 14000 = DES (PT = $salt, key = $pass) -- Added new hash-mode 14100 = 3DES (PT = $salt, key = $pass) -- Added new hash-mode 14400 = SHA1(CX) -- Added new hash-mode 99999 = Plaintext +- Added hash-mode 14000 = DES (PT = $salt, key = $pass) +- Added hash-mode 14100 = 3DES (PT = $salt, key = $pass) +- Added hash-mode 14400 = SHA1(CX) +- Added hash-mode 99999 = Plaintext +- Extended hash-mode 3200 = bcrypt: Accept signature $2b$ (February 2014) +- Improved hash-mode 8300 = DNSSEC: Additional parsing error detection + +## +## Bugs +## + +- Custom charset from file parsing code did not return an error if an error occured +- Fix some clSetKernelArg() size error that caused slow modes to not work anymore in -a 1 mode +- Hash-mode 11600 = (7-Zip): Depending on input hash a clEnqueueReadBuffer(): CL_INVALID_VALUE error occured +- Hash-mode 22 = Juniper Netscreen/SSG (ScreenOS): Fix salt length for -m 22 in benchmark mode +- Hash-Mode 5500 = NetNTLMv1 + ESS: Fix loading of NetNTLMv1 + SSP hash +- Hash-mode 6000 = RipeMD160: Fix typo in array index number +- If cracking a hash-mode using unicode passwords, length check of a mask was not taking that into account +- If cracking a large salted hashlist the wordlist reject code was too slow to handle it, leading to 0H/s +- Null-pointer dereference in outfile-check shutdown code when using --outfile-check-dir, leading to segfault +- On startup hashcat tried to access the folder defined in INSTALL_FOLDER, leading to segfault if that folder was not existing +- Random rules generator code used invalid parameter for memory copy function (M), leading to use of invalid rule +- Sanity check for --outfile-format was broken in case if used in combination with --show or --left + +## +## Workarounds +## + +- Workaround added for AMDGPU-Pro OpenCL runtime: Failed to compile hash-mode 10700 = PDF 1.7 Level 8 +- Workaround added for AMDGPU-Pro OpenCL runtime: Failed to compile hash-mode 1800 = sha512crypt +- Workaround added for NVidia OpenCL runtime: Failed to compile hash-mode 6400 = AIX {ssha256} +- Workaround added for NVidia OpenCL runtime: Failed to compile hash-mode 6800 = Lastpass + Lastpass sniffed +- Workaround added for OSX OpenCL runtime: Failed to compile hash-mode 10420 = PDF 1.1 - 1.3 (Acrobat 2 - 4) +- Workaround added for OSX OpenCL runtime: Failed to compile hash-mode 1100 = Domain Cached Credentials (DCC), MS Cache +- Workaround added for OSX OpenCL runtime: Failed to compile hash-mode 13800 = Windows 8+ phone PIN/Password +- Workaround added for pocl OpenCL runtime: Failed to compile hash-mode 5800 = Android PIN + +## +## Performance +## + +- Improved performance for rule-based attacks for _very_ fast hashes like MD5 and NTLM by 30% or higher +- Improved performance for cracking DEScrypt on AMD, from 373MH/s to 525MH/s +- Improved performance for cracking raw DES-based algorithms (like LM) on AMD, from 1.6GH/s to 12.5GH/s +- Improved performance for cracking raw SHA256-based algorithms using meet-in-the-middle optimization, reduces 7/64 steps +- Improved performance by reducing maximum number of allowed function calls per rule from 255 to 31 +- Improved performance by update the selection when to use #pragma unroll depending on OpenCL runtime vendor + +- Full performance comparison sheet v3.10 vs. v3.20: https://docs.google.com/spreadsheets/d/1B1S_t1Z0KsqByH3pNkYUM-RCFMu860nlfSsYEqOoqco/edit#gid=1591672380 ## ## Technical ## -- Use .gitmodules to handle OpenCL headers dependency -- Abort session after NUM seconds of --runtime but do not count time in pause mode -- Added docs/credits.txt -- Added docs/team.txt -- Replaced some uint macros with enums types -- Improved default hashcat.hcstat -- Improved NSEC3 hashes parsing error detection +- Autotune: Do not run any caching rounds in autotune in DEBUG mode if -n and -u are specified +- Bash completion: Remove some v2.01 leftovers in the bash completion configuration +- Benchmark: Do not control fan speed in benchmark mode +- Benchmark: On OSX, some hash-modes can't compile because of OSX OpenCL runtime. Skip them and move on to the next +- Building: Added Makefile target "main_shared", a small how-to-use libhashcat example +- Building: Added many additional compiler warning flags in Makefile to improve static code error detection +- Building: Added missing includes for FreeBSD +- Building: Added some types for windows only in case _BASETSD_H was not set +- Building: Change Makefile to strip symbols in the linker instead of the compiler +- Building: Define NOMINMAX macro to prevent definition min and max macros in stdlib header files +- Building: Enabled ASLR and DEP for Windows builds +- Building: Fixed almost all errors reported by cppcheck and scan-build +- Building: On OSX, move '-framework OpenCL' from CFLAGS to LDFLAGS +- Building: On OSX, use clang as default compiler +- Building: Support building on Msys2 environment +- Building: Use .gitmodules to simplify the OpenCL header dependency handling process +- Charsets: Added DES_full.charset +- Data Types: Replaced all integer macros with enumerator types +- Data Types: Replaced all integer variables with true bool variables in case they are used as a bool +- Data Types: Replaced all string macros with static const char types +- Data Types: Replaced all uint and uint32_t to u32 +- Data Types: Replaced atoi() with atoll(). Eliminates sign conversion warnings +- Documents: Added docs/credits.txt +- Documents: Added docs/team.txt +- Documents: Change rules.txt to match v3.20 limitations +- Error handling (file handling): Fix a couple of filepointer leaks +- Error handling (format strings): Fix a few printf() formats, ex: use %u instead of %d for uint32_t +- Error handling (memory allocation): Remove memory allocation checks, just print to stderr instead +- Error handling (startup): Add some missing returncode checks to get_exec_path() +- Fanspeed: Check both fanpolicy and fanspeed returncode and disable retain support if any of them failed +- Fanspeed: Minimum fanspeed for retain support increased to 33%, same as NV uses as default on windows +- Fanspeed: Reset PID controler settings to what they were from start +- Fanspeed: Set fan speed to default on quit +- File handling: Do a single write test (for files to be written later) directly on startup +- File locking: Use same locking mechanism in potfile as in outfile +- Hardware management: Fixed calling conventions for ADL, NvAPI and NVML on windows +- Hardware management: Improve checking for successfully load of the NVML API +- Hardware management: In case fanspeed can not be set, disable --gpu-temp-retain automatically +- Hardware management: In case of initialization error show it only once to the user on startup +- Hardware management: Refactor all code to return returncode (0 or -1) instead of data for more easy error handling +- Hardware management: Refactored macros to real functions +- Hardware management: Removed kernel exec timeout detection on NVIDIA, should no longer occur due to autotune +- Hardware management: Replaced NVML registry functions macros with their ascii versions (Adds NVML support for XP) +- Hashlist loading: Do not load data from hashfile if hashfile changed during runtime +- Kernel cache: Fix checksum building on oversized device version or driver version strings +- Logging: Improve variable names in hashcat.log +- Loopback: Refactored --loopback support completely, no longer a recursive function +- Memory management: Fixed some memory leaks on shutdown +- Memory management: Get rid of all global variables +- Memory management: Get rid of local_free() and global_free(), no longer required +- Memory management: Refactor all variables with HCBUFSIZ_LARGE size from stack to heap, OSX doesn't like that +- OpenCL Headers: Select OpenCL headers tagged for OpenCL 1.2, since we use -cl-std=CL1.2 +- OpenCL Kernels: Added const qualifier to variable declaration of matching global memory objects +- OpenCL Kernels: Get rid of one global kernel_threads variable +- OpenCL Kernels: Moved OpenCL requirement from v1.1 to v1.2 +- OpenCL Kernels: Recognize reqd_work_group_size() values from OpenCL kernels and use them in host if possible +- OpenCL Kernels: Refactored common function append_0x01() +- OpenCL Kernels: Refactored common function append_0x02() +- OpenCL Kernels: Refactored common function append_0x80() +- OpenCL Kernels: Refactored rule function append_block1() +- OpenCL Kernels: Refactored rule function rule_op_mangle_delete_last() +- OpenCL Kernels: Refactored rule function rule_op_mangle_dupechar_last() +- OpenCL Kernels: Refactored rule function rule_op_mangle_rotate_left() +- OpenCL Kernels: Refactored rule function rule_op_mangle_rotate_right() +- OpenCL Kernels: Support mixed kernel thread count for mixed kernels in the same source file +- OpenCL Kernels: Switch from clz() to ffz() for bitsliced algorithms +- OpenCL Kernels: Using platform vendor name is better than using device vendor name for function detection +- OpenCL Runtime: Update AMDGPU-Pro and AMD Radeon driver version check +- OpenCL Runtime: Update Intel OpenCL runtime version check +- OpenCL Runtime: Update NVIDIA driver version check +- Password candidates: The maximum word length in a wordlist is 31 not 32, because of eventual append of 0x80 +- Potfile: Base logic switched; Assuming the potfile is larger than the hashlist it's better to load hashlist instead of potfile entries +- Potfile: In case all hashes were cracking using potfile abort and inform user +- Restore: Automatically unlink restore file if all hashes have been cracked +- Restore: Do not unlink restore file of restore is disabled +- Rules: Refactored macros to real functions +- Status: Added Input.Queue.Base and Input.Queue.Mod to help the user to better understand this concept +- Status: Do not wait for the progress mutex to read and store speed timer +- Tests: Removed rules_test/ subproject: Would require total rewrite but not used in a long time +- Threads: Replaced all calls to getpwuid() with getpwuid_r() to ensure thread safety +- Threads: Replaced all calls to gmtime() with gmtime_r() to ensure thread safety +- Threads: Replaced all calls to strtok() with strtok_r() to ensure thread safety +- Wordlists: Use larger counter variable to handle larger wordlists (that is > 2^32 words) +- X11: Detect missing coolbits and added some help text for the user how to fix it * changes v3.00 -> v3.10: diff --git a/src/opencl.c b/src/opencl.c index 58f57c8f6..b9fac99c2 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -1169,7 +1169,7 @@ int choose_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, if (user_options->speed_only == true) { - if (speed_msec > 4096) return 0; + if (speed_msec > 4096) break; } } @@ -1944,19 +1944,26 @@ int run_cracker (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, co const u64 perf_sum_all = (u64) pws_cnt * (u64) innerloop_left; - const double speed_msec = hc_timer_get (device_param->timer_speed); - - hc_timer_set (&device_param->timer_speed); - - device_param->speed_cnt[speed_pos] = perf_sum_all; - - device_param->speed_msec[speed_pos] = speed_msec; - - speed_pos++; - - if (speed_pos == SPEED_CACHE) + if (hashconfig->attack_exec == ATTACK_EXEC_INSIDE_KERNEL) { - speed_pos = 0; + const double speed_msec = hc_timer_get (device_param->timer_speed); + + hc_timer_set (&device_param->timer_speed); + + device_param->speed_cnt[speed_pos] = perf_sum_all; + + device_param->speed_msec[speed_pos] = speed_msec; + + speed_pos++; + + if (speed_pos == SPEED_CACHE) + { + speed_pos = 0; + } + } + else + { + // speed for slow hashes is set inside choose_kernel() } /**