diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 2469e333b..1804333c0 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -304,6 +304,8 @@ DECLSPEC void hc_enc_init (PRIVATE_AS hc_enc_t *hc_enc); DECLSPEC int hc_enc_has_next (PRIVATE_AS hc_enc_t *hc_enc, const int sz); DECLSPEC int hc_enc_next (PRIVATE_AS hc_enc_t *hc_enc, PRIVATE_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz); DECLSPEC int hc_enc_next_global (PRIVATE_AS hc_enc_t *hc_enc, GLOBAL_AS const u32 *src_buf, const int src_len, const int src_sz, PRIVATE_AS u32 *dst_buf, const int dst_sz); +DECLSPEC int hc_enc_validate_utf8 (PRIVATE_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead); +DECLSPEC int hc_enc_validate_utf8_global (GLOBAL_AS const u32 *src_buf, const int src_pos, const int extraBytesToRead); DECLSPEC int pkcs_padding_bs8 (PRIVATE_AS const u32 *data_buf, const int data_len); DECLSPEC int pkcs_padding_bs16 (PRIVATE_AS const u32 *data_buf, const int data_len); @@ -359,6 +361,7 @@ DECLSPEC void append_0x01_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u DECLSPEC void append_0x06_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset); DECLSPEC void append_0x01_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset); DECLSPEC void append_0x2d_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset); +DECLSPEC void append_0x3a_4x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, PRIVATE_AS u32 *w3, const u32 offset); DECLSPEC void append_0x80_1x4_S (PRIVATE_AS u32 *w0, const u32 offset); DECLSPEC void append_0x80_2x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, const u32 offset); DECLSPEC void append_0x80_3x4_S (PRIVATE_AS u32 *w0, PRIVATE_AS u32 *w1, PRIVATE_AS u32 *w2, const u32 offset); @@ -386,5 +389,6 @@ DECLSPEC void append_0x06_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, cons DECLSPEC void append_0x80_2x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, const u32x offset); DECLSPEC void append_0x80_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset); DECLSPEC void append_0x2d_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset); +DECLSPEC void append_0x3a_4x4_VV (PRIVATE_AS u32x *w0, PRIVATE_AS u32x *w1, PRIVATE_AS u32x *w2, PRIVATE_AS u32x *w3, const u32x offset); #endif // INC_COMMON_H diff --git a/OpenCL/inc_rp_optimized.cl b/OpenCL/inc_rp_optimized.cl index 7e4308229..a1b114ab4 100644 --- a/OpenCL/inc_rp_optimized.cl +++ b/OpenCL/inc_rp_optimized.cl @@ -1084,9 +1084,18 @@ DECLSPEC HC_INLINE_RP u32 rule_op_mangle_toggle_at_sep (MAYBE_UNUSED const u32 p { ro = 1 << i; - break; - } + #if defined(IS_METAL) && !defined(IS_APPLE_SILICON) + i = 32; + + continue; + + #else + + break; // bug on Apple Intel with Metal + + #endif + } occurence++; } } @@ -2235,6 +2244,8 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, P { u32 out_len = in_len; + if (name == RULE_OP_MANGLE_NOOP) return out_len; + switch (name) { case RULE_OP_MANGLE_LREST: out_len = rule_op_mangle_lrest (p0, p1, buf0, buf1, out_len); break; @@ -2284,6 +2295,7 @@ DECLSPEC u32 apply_rule_optimized (const u32 name, const u32 p0, const u32 p1, P return out_len; } +//DECLSPEC u32 apply_rules_optimized (PRIVATE_AS const u32 *cmds, PRIVATE_AS u32 *buf0, PRIVATE_AS u32 *buf1, const u32 len) DECLSPEC u32 apply_rules_optimized (CONSTANT_AS const u32 *cmds, PRIVATE_AS u32 *buf0, PRIVATE_AS u32 *buf1, const u32 len) { u32 out_len = len; diff --git a/OpenCL/inc_rp_optimized.h b/OpenCL/inc_rp_optimized.h index 4caad7220..167862d0a 100644 --- a/OpenCL/inc_rp_optimized.h +++ b/OpenCL/inc_rp_optimized.h @@ -14,7 +14,7 @@ #define MAYBE_UNUSED #endif -#ifdef IS_APPLE_SILICON +#if defined(IS_METAL) && !defined(IS_APPLE_SILICON) #define HC_INLINE_RP __attribute__ ((noinline)) #else #define HC_INLINE_RP diff --git a/OpenCL/inc_shared.h b/OpenCL/inc_shared.h index 6518c30c6..2baa855c8 100644 --- a/OpenCL/inc_shared.h +++ b/OpenCL/inc_shared.h @@ -63,4 +63,6 @@ #endif // IS_METAL +DECLSPEC void gpu_decompress_entry (GLOBAL_AS pw_idx_t *pws_idx, GLOBAL_AS u32 *pws_comp, PRIVATE_AS pw_t *buf, const u64 gid); + #endif // INC_SHARED_H diff --git a/OpenCL/m00000_a0-optimized.cl b/OpenCL/m00000_a0-optimized.cl index ba6dae580..7eff08647 100644 --- a/OpenCL/m00000_a0-optimized.cl +++ b/OpenCL/m00000_a0-optimized.cl @@ -18,12 +18,6 @@ KERNEL_FQ KERNEL_FA void m00000_m04 (KERN_ATTR_RULES ()) { - /** - * modifier - */ - - const u64 lid = get_local_id (0); - /** * base */ @@ -153,12 +147,6 @@ KERNEL_FQ KERNEL_FA void m00000_m16 (KERN_ATTR_RULES ()) KERNEL_FQ KERNEL_FA void m00000_s04 (KERN_ATTR_RULES ()) { - /** - * modifier - */ - - const u64 lid = get_local_id (0); - /** * base */ diff --git a/docs/changes.txt b/docs/changes.txt index d57c7d172..6d5b55ce1 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -91,6 +91,7 @@ - Added verification of token buffer length when using TOKEN_ATTR_FIXED_LENGTH - Fixed a bug in all SCRYPT-based hash modes with Apple Metal - Fixed buffer overflow on module_26600.c / module_hash_encode() +- Fixed bug in inc_rp_optimized.cl on Apple Intel with Metal - Fixed bug in module_constraints and kernel for hash-mode 7801 - Fixed bug in module_constraints and kernel for hash-mode 7800 - Fixed bug in 18400 module_hash_encode @@ -175,9 +176,10 @@ - Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel - Metal Backend: added support to 2D/3D Compute - Metal Backend: allow use of devices with Metal if runtime version is >= 200 +- Metal Backend: define USE_BITSELECT and USE_SWIZZLE for Apple Intel - Metal Backend: disable Metal devices only if at least one OpenCL device is active - Metal Backend: improved compute workloads calculation -- Metal Backend: define USE_BITSELECT and USE_SWIZZLE for Apple Intel +- Metal Backend: parallelize pipeline state object (PSO) compilation internally - Modules: Check UnpackSize to raise false positive with hc_decompress_rar - User Options: added --metal-compiler-runtime option - User Options: limit --bitmap-max value to 31 diff --git a/src/ext_metal.m b/src/ext_metal.m index a05c1a0e7..55870f654 100644 --- a/src/ext_metal.m +++ b/src/ext_metal.m @@ -11,6 +11,7 @@ #include "ext_metal.h" #include +#include #include #include @@ -309,6 +310,13 @@ int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal return -1; } + // parallelize pipeline state object (PSO) compilation internally + + if ([device respondsToSelector:@selector(setShouldMaximizeConcurrentCompilation:)]) + { + ((void (*)(id, SEL, BOOL))objc_msgSend)(device, @selector(setShouldMaximizeConcurrentCompilation:), YES); + } + *metal_device = device; return 0; @@ -1440,6 +1448,7 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device } compileOptions.preprocessorMacros = build_options_dict; + /* compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize; compileOptions.mathMode = MTLMathModeSafe;