mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-23 06:58:31 +00:00
Merge pull request #4333 from matrix/fix_apple_intel_rp_apple_metal_parallel_comp
Fixed bug in inc_rp_optimized.cl on Apple Intel with Metal
This commit is contained in:
commit
2159f73b11
@ -304,6 +304,8 @@ DECLSPEC void hc_enc_init (PRIVATE_AS hc_enc_t *hc_enc);
|
|||||||
DECLSPEC int hc_enc_has_next (PRIVATE_AS hc_enc_t *hc_enc, const int sz);
|
DECLSPEC int hc_enc_has_next (PRIVATE_AS hc_enc_t *hc_enc, const int sz);
|
||||||
DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz);
|
DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz);
|
||||||
DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz);
|
DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz);
|
||||||
|
DECLSPEC int hc_enc_validate_utf8 (PRIVATE_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead);
|
||||||
|
DECLSPEC int hc_enc_validate_utf8_global (GLOBAL_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead);
|
||||||
|
|
||||||
DECLSPEC int pkcs_padding_bs8 (PRIVATE_AS const u32 *data_buf, const int data_len);
|
DECLSPEC int pkcs_padding_bs8 (PRIVATE_AS const u32 *data_buf, const int data_len);
|
||||||
DECLSPEC int pkcs_padding_bs16 (PRIVATE_AS const u32 *data_buf, const int data_len);
|
DECLSPEC int pkcs_padding_bs16 (PRIVATE_AS const u32 *data_buf, const int data_len);
|
||||||
@ -359,6 +361,7 @@ DECLSPEC void append_0x01_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u
|
|||||||
DECLSPEC void append_0x06_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset);
|
DECLSPEC void append_0x06_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset);
|
||||||
DECLSPEC void append_0x01_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset);
|
DECLSPEC void append_0x01_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset);
|
||||||
DECLSPEC void append_0x2d_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset);
|
DECLSPEC void append_0x2d_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset);
|
||||||
|
DECLSPEC void append_0x3a_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset);
|
||||||
DECLSPEC void append_0x80_1x4_S (PRIVATE_AS u32 *w0, const u32 offset);
|
DECLSPEC void append_0x80_1x4_S (PRIVATE_AS u32 *w0, const u32 offset);
|
||||||
DECLSPEC void append_0x80_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset);
|
DECLSPEC void append_0x80_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset);
|
||||||
DECLSPEC void append_0x80_3x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, const u32 offset);
|
DECLSPEC void append_0x80_3x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, const u32 offset);
|
||||||
@ -386,5 +389,6 @@ DECLSPEC void append_0x06_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, cons
|
|||||||
DECLSPEC void append_0x80_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset);
|
DECLSPEC void append_0x80_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset);
|
||||||
DECLSPEC void append_0x80_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset);
|
DECLSPEC void append_0x80_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset);
|
||||||
DECLSPEC void append_0x2d_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset);
|
DECLSPEC void append_0x2d_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset);
|
||||||
|
DECLSPEC void append_0x3a_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset);
|
||||||
|
|
||||||
#endif // INC_COMMON_H
|
#endif // INC_COMMON_H
|
||||||
|
@ -1084,9 +1084,18 @@ DECLSPEC HC_INLINE_RP u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p
|
|||||||
{
|
{
|
||||||
ro = 1 << i;
|
ro = 1 << i;
|
||||||
|
|
||||||
break;
|
#if defined(IS_METAL) && !defined(IS_APPLE_SILICON)
|
||||||
}
|
|
||||||
|
|
||||||
|
i = 32;
|
||||||
|
|
||||||
|
continue;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
break; // bug on Apple Intel with Metal
|
||||||
|
|
||||||
|
#endif
|
||||||
|
}
|
||||||
occurence++;
|
occurence++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2235,6 +2244,8 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, P
|
|||||||
{
|
{
|
||||||
u32 out_len = in_len;
|
u32 out_len = in_len;
|
||||||
|
|
||||||
|
if (name == RULE_OP_MANGLE_NOOP) return out_len;
|
||||||
|
|
||||||
switch (name)
|
switch (name)
|
||||||
{
|
{
|
||||||
case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
|
case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break;
|
||||||
@ -2284,6 +2295,7 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, P
|
|||||||
return out_len;
|
return out_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//DECLSPEC u32 apply_rules_optimized (PRIVATE_AS const u32 *cmds, PRIVATE_AS u32 *buf0, PRIVATE_AS u32 *buf1, const u32 len)
|
||||||
DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, PRIVATE_AS u32 *buf0, PRIVATE_AS u32 *buf1, const u32 len)
|
DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, PRIVATE_AS u32 *buf0, PRIVATE_AS u32 *buf1, const u32 len)
|
||||||
{
|
{
|
||||||
u32 out_len = len;
|
u32 out_len = len;
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
#define MAYBE_UNUSED
|
#define MAYBE_UNUSED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IS_APPLE_SILICON
|
#if defined(IS_METAL) && !defined(IS_APPLE_SILICON)
|
||||||
#define HC_INLINE_RP __attribute__ ((noinline))
|
#define HC_INLINE_RP __attribute__ ((noinline))
|
||||||
#else
|
#else
|
||||||
#define HC_INLINE_RP
|
#define HC_INLINE_RP
|
||||||
|
@ -63,4 +63,6 @@
|
|||||||
|
|
||||||
#endif // IS_METAL
|
#endif // IS_METAL
|
||||||
|
|
||||||
|
DECLSPEC void gpu_decompress_entry (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_comp, PRIVATE_AS pw_t *buf, const u64 gid);
|
||||||
|
|
||||||
#endif // INC_SHARED_H
|
#endif // INC_SHARED_H
|
||||||
|
@ -18,12 +18,6 @@
|
|||||||
|
|
||||||
KERNEL_FQ KERNEL_FA void m00000_m04 (KERN_ATTR_RULES ())
|
KERNEL_FQ KERNEL_FA void m00000_m04 (KERN_ATTR_RULES ())
|
||||||
{
|
{
|
||||||
/**
|
|
||||||
* modifier
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
*/
|
*/
|
||||||
@ -153,12 +147,6 @@ KERNEL_FQ KERNEL_FA void m00000_m16 (KERN_ATTR_RULES ())
|
|||||||
|
|
||||||
KERNEL_FQ KERNEL_FA void m00000_s04 (KERN_ATTR_RULES ())
|
KERNEL_FQ KERNEL_FA void m00000_s04 (KERN_ATTR_RULES ())
|
||||||
{
|
{
|
||||||
/**
|
|
||||||
* modifier
|
|
||||||
*/
|
|
||||||
|
|
||||||
const u64 lid = get_local_id (0);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* base
|
* base
|
||||||
*/
|
*/
|
||||||
|
@ -91,6 +91,7 @@
|
|||||||
- Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH
|
- Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH
|
||||||
- Fixed a bug in all SCRYPT-based hash modes with Apple Metal
|
- Fixed a bug in all SCRYPT-based hash modes with Apple Metal
|
||||||
- Fixed buffer overflow on module_26600.c / module_hash_encode()
|
- Fixed buffer overflow on module_26600.c / module_hash_encode()
|
||||||
|
- Fixed bug in inc_rp_optimized.cl on Apple Intel with Metal
|
||||||
- Fixed bug in module_constraints and kernel for hash-mode 7801
|
- Fixed bug in module_constraints and kernel for hash-mode 7801
|
||||||
- Fixed bug in module_constraints and kernel for hash-mode 7800
|
- Fixed bug in module_constraints and kernel for hash-mode 7800
|
||||||
- Fixed bug in 18400 module_hash_encode
|
- Fixed bug in 18400 module_hash_encode
|
||||||
@ -175,9 +176,10 @@
|
|||||||
- Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel
|
- Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel
|
||||||
- Metal Backend: added support to 2D/3D Compute
|
- Metal Backend: added support to 2D/3D Compute
|
||||||
- Metal Backend: allow use of devices with Metal if runtime version is >= 200
|
- Metal Backend: allow use of devices with Metal if runtime version is >= 200
|
||||||
|
- Metal Backend: define USE_BITSELECT and USE_SWIZZLE for Apple Intel
|
||||||
- Metal Backend: disable Metal devices only if at least one OpenCL device is active
|
- Metal Backend: disable Metal devices only if at least one OpenCL device is active
|
||||||
- Metal Backend: improved compute workloads calculation
|
- Metal Backend: improved compute workloads calculation
|
||||||
- Metal Backend: define USE_BITSELECT and USE_SWIZZLE for Apple Intel
|
- Metal Backend: parallelize pipeline state object (PSO) compilation internally
|
||||||
- Modules: Check UnpackSize to raise false positive with hc_decompress_rar
|
- Modules: Check UnpackSize to raise false positive with hc_decompress_rar
|
||||||
- User Options: added --metal-compiler-runtime option
|
- User Options: added --metal-compiler-runtime option
|
||||||
- User Options: limit --bitmap-max value to 31
|
- User Options: limit --bitmap-max value to 31
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "ext_metal.h"
|
#include "ext_metal.h"
|
||||||
|
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
|
#include <objc/message.h>
|
||||||
|
|
||||||
#include <CoreFoundation/CoreFoundation.h>
|
#include <CoreFoundation/CoreFoundation.h>
|
||||||
#include <Foundation/Foundation.h>
|
#include <Foundation/Foundation.h>
|
||||||
@ -309,6 +310,13 @@ int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// parallelize pipeline state object (PSO) compilation internally
|
||||||
|
|
||||||
|
if ([device respondsToSelector:@selector(setShouldMaximizeConcurrentCompilation:)])
|
||||||
|
{
|
||||||
|
((void (*)(id, SEL, BOOL))objc_msgSend)(device, @selector(setShouldMaximizeConcurrentCompilation:), YES);
|
||||||
|
}
|
||||||
|
|
||||||
*metal_device = device;
|
*metal_device = device;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -1440,6 +1448,7 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device
|
|||||||
}
|
}
|
||||||
|
|
||||||
compileOptions.preprocessorMacros = build_options_dict;
|
compileOptions.preprocessorMacros = build_options_dict;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize;
|
compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize;
|
||||||
compileOptions.mathMode = MTLMathModeSafe;
|
compileOptions.mathMode = MTLMathModeSafe;
|
||||||
|
Loading…
Reference in New Issue
Block a user