diff --git a/OpenCL/inc_amp.h b/OpenCL/inc_amp.h index 5db6a21d8..e49b23959 100644 --- a/OpenCL/inc_amp.h +++ b/OpenCL/inc_amp.h @@ -16,7 +16,7 @@ GLOBAL_AS const bf_t *bfs_buf, \ CONSTANT_AS const u32 &combs_mode, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #else // CUDA, HIP, OpenCL diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index c24ecb524..d0b2ed989 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -124,10 +124,10 @@ #if defined IS_METAL #define KERN_ATTR_MAIN_PARAMS \ - uint hc_gid [[ thread_position_in_grid ]], \ - uint hc_lid [[ thread_position_in_threadgroup ]], \ - uint hc_lsz [[ threads_per_threadgroup ]], \ - uint hc_bid [[ threadgroup_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]], \ + uint3 hc_lid [[ thread_position_in_threadgroup ]], \ + uint3 hc_lsz [[ threads_per_threadgroup ]], \ + uint3 hc_bid [[ threadgroup_position_in_grid ]] #endif // IS_METAL /* diff --git a/OpenCL/inc_markov.h b/OpenCL/inc_markov.h index 3aae8f7fc..1ec187b8e 100644 --- a/OpenCL/inc_markov.h +++ b/OpenCL/inc_markov.h @@ -19,7 +19,7 @@ CONSTANT_AS const u32 &bits14, \ CONSTANT_AS const u32 &bits15, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_R_MARKOV \ GLOBAL_AS bf_t *pws_buf_r, \ @@ -31,7 +31,7 @@ CONSTANT_AS const u32 &bits14, \ CONSTANT_AS const u32 &bits15, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_C_MARKOV \ GLOBAL_AS pw_t *pws_buf, \ @@ -43,7 +43,7 @@ CONSTANT_AS const u32 &bits14, \ CONSTANT_AS const u32 &bits15, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #else // CUDA, HIP, OpenCL diff --git a/OpenCL/inc_platform.h b/OpenCL/inc_platform.h index e1ffdefcf..9729d4fad 100644 --- a/OpenCL/inc_platform.h +++ b/OpenCL/inc_platform.h @@ -73,10 +73,25 @@ DECLSPEC u32 hc_atomic_dec (volatile GLOBAL_AS u32 *p); DECLSPEC u32 hc_atomic_inc (volatile GLOBAL_AS u32 *p); DECLSPEC u32 hc_atomic_or (volatile GLOBAL_AS u32 *p, volatile const u32 val); -#define get_global_id(param) hc_gid -#define get_local_id(param) hc_lid -#define get_local_size(param) hc_lsz -#define get_group_id(param) hc_bid +#define get_global_id(dimindx) \ + ((dimindx) == 0 ? hc_gid.x : \ + (dimindx) == 1 ? hc_gid.y : \ + (dimindx) == 2 ? hc_gid.z : -1) + +#define get_group_id(dimindx) \ + ((dimindx) == 0 ? hc_bid.x : \ + (dimindx) == 1 ? hc_bid.y : \ + (dimindx) == 2 ? hc_bid.z : -1) + +#define get_local_id(dimindx) \ + ((dimindx) == 0 ? hc_lid.x : \ + (dimindx) == 1 ? hc_lid.y : \ + (dimindx) == 2 ? hc_lid.z : -1) + +#define get_local_size(dimindx) \ + ((dimindx) == 0 ? hc_lsz.x : \ + (dimindx) == 1 ? hc_lsz.y : \ + (dimindx) == 2 ? hc_lsz.z : -1) DECLSPEC u32x rotl32 (const u32x a, const int n); DECLSPEC u32x rotr32 (const u32x a, const int n); diff --git a/OpenCL/inc_shared.h b/OpenCL/inc_shared.h index 16f2e2c4c..6518c30c6 100644 --- a/OpenCL/inc_shared.h +++ b/OpenCL/inc_shared.h @@ -13,28 +13,28 @@ GLOBAL_AS u32 *pws_comp, \ GLOBAL_AS pw_t *pws_buf, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_GPU_MEMSET \ GLOBAL_AS uint4 *buf, \ CONSTANT_AS const u32 &value, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_GPU_BZERO \ GLOBAL_AS uint4 *buf, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_GPU_ATINIT \ GLOBAL_AS pw_t *buf, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #define KERN_ATTR_GPU_UTF8_TO_UTF16 \ GLOBAL_AS pw_t *pws_buf, \ CONSTANT_AS const u64 &gid_max, \ - uint hc_gid [[ thread_position_in_grid ]] + uint3 hc_gid [[ thread_position_in_grid ]] #else // CUDA, HIP, OpenCL diff --git a/docs/changes.txt b/docs/changes.txt index 06e1e46bb..cfdba32b4 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -148,19 +148,23 @@ - Status Code: Add specific return code for self-test fail (-11) - Scrypt: Increase buffer sizes in module for hash mode 8900 to allow longer scrypt digests - Unicode: Update UTF-8 to UTF-16 conversion to match RFC 3629 +- Unit tests: Updated install_modules.sh with Crypt::Argon2 - User Options: Added error message when mixing --username and --show to warn users of exponential delay - MetaMask: update extraction tool to support MetaMask Mobile wallets - SecureCRT MasterPassphrase v2: update module, pure kernels and test unit. Add optimized kernels. - Metal Backend: added workaround to prevent 'Infinite Loop' bug when build kernels - Metal Backend: added workaround to set the true Processor value in Metal devices on Apple Intel +- Metal Backend: added support to 2D/3D Compute - Metal Backend: allow use of devices with Metal if runtime version is >= 200 - Metal Backend: disable Metal devices only if at least one OpenCL device is active +- Metal Backend: improved compute workloads calculation - Modules: Check UnpackSize to raise false positive with hc_decompress_rar - User Options: added --metal-compiler-runtime option - Hardware Monitor: avoid sprintf in src/ext_iokit.c - Hardware Monitor: Splitting hwmon_ctx_init function into smaller library-specific functions - Help: show supported hash-modes only with -hh - Makefile: prevent make failure with Apple Silicon in case of partial rebuild +- Makefile: updated MACOSX_DEPLOYMENT_TARGET to 15.0 - Rules: Rename best64.rule to best66.rule and remove the unknown section from it * changes v6.2.5 -> v6.2.6 diff --git a/include/ext_metal.h b/include/ext_metal.h index 85facc62b..b51f09ff2 100644 --- a/include/ext_metal.h +++ b/include/ext_metal.h @@ -111,7 +111,7 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_devi int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder); int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, mtl_mem buf, void *host_data, size_t host_data_size); -int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms); +int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms); #endif // __APPLE__ diff --git a/src/Makefile b/src/Makefile index 2ed479330..2cfbf9406 100644 --- a/src/Makefile +++ b/src/Makefile @@ -358,7 +358,7 @@ LFLAGS_NATIVE += -lpthread endif # NetBSD ifeq ($(UNAME),Darwin) -export MACOSX_DEPLOYMENT_TARGET=10.15 +export MACOSX_DEPLOYMENT_TARGET=15.0 CFLAGS_NATIVE := $(CFLAGS) CFLAGS_NATIVE += -DWITH_HWMON diff --git a/src/backend.c b/src/backend.c index 4fa01abb9..3109f8918 100644 --- a/src/backend.c +++ b/src/backend.c @@ -2206,7 +2206,7 @@ int run_metal_kernel_atinit (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devi double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; return 0; } @@ -2234,7 +2234,7 @@ int run_metal_kernel_utf8toutf16le (hashcat_ctx_t *hashcat_ctx, hc_device_param_ double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; return 0; } @@ -2265,7 +2265,7 @@ int run_metal_kernel_bzero (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *devic double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; } if (num16m) @@ -2910,29 +2910,34 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con num_elements = num_elements * kernel_threads; } + unsigned int work_dim = 1; + size_t global_work_size[3] = { num_elements, 1, 1 }; size_t local_work_size[3] = { kernel_threads, 1, 1 }; if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_INIT) && (kern_run == KERN_RUN_1)) { global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy; + work_dim = 2; } if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_LOOP) && (kern_run == KERN_RUN_2)) { global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy; + work_dim = 2; } if ((hashconfig->opti_type & OPTI_TYPE_SLOW_HASH_DIMY_COMP) && (kern_run == KERN_RUN_3)) { global_work_size[1] = local_work_size[1] = hashcat_ctx->hashes->salts_buf->salt_dimy; + work_dim = 2; } double ms = 0; if (is_autotune == true) { - hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms); + hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms); // hc_mtlEncodeComputeCommand_pre() must be called before every hc_mtlEncodeComputeCommand() if (hc_mtlEncodeComputeCommand_pre (hashcat_ctx, metal_pipeline, device_param->metal_command_queue, &metal_command_buffer, &metal_command_encoder) == -1) return -1; @@ -2951,7 +2956,7 @@ int run_kernel (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, con } } - const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms); + const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, work_dim, global_work_size, local_work_size, &ms); if (rc_cc != -1) { @@ -3344,7 +3349,7 @@ int run_kernel_mp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; } #endif // __APPLE__ @@ -3435,7 +3440,7 @@ int run_kernel_tm (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param) double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; } #endif // __APPLE__ @@ -3519,7 +3524,7 @@ int run_kernel_amp (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device_param, double ms = 0; - const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms); + const int rc_cc = hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms); // release tmp_buf @@ -3599,7 +3604,7 @@ int run_kernel_decompress (hashcat_ctx_t *hashcat_ctx, hc_device_param_t *device double ms = 0; - if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, global_work_size, local_work_size, &ms) == -1) return -1; + if (hc_mtlEncodeComputeCommand (hashcat_ctx, metal_command_encoder, metal_command_buffer, 1, global_work_size, local_work_size, &ms) == -1) return -1; } #endif // __APPLE__ diff --git a/src/ext_metal.m b/src/ext_metal.m index cd23fb062..a05c1a0e7 100644 --- a/src/ext_metal.m +++ b/src/ext_metal.m @@ -195,11 +195,14 @@ static int hc_mtlBuildOptionsToDict (void *hashcat_ctx, const char *build_option } // if set, add INCLUDE_PATH to hack Apple kernel build from source limitation on -I usage + if (include_path != nil) { NSString *path_key = @"INCLUDE_PATH"; NSString *path_value = [NSString stringWithCString: include_path encoding: NSUTF8StringEncoding]; + // Include path may contain spaces, escape them with a backslash + path_value = [path_value stringByReplacingOccurrencesOfString:@" " withString:@"\\ "]; [build_options_dict setObject:path_value forKey:path_key]; @@ -743,6 +746,7 @@ int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_libra dispatch_queue_t queue = dispatch_get_global_queue (DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); // if no user-defined runtime, set to METAL_COMPILER_RUNTIME + long timeout = (user_options->metal_compiler_runtime > 0) ? user_options->metal_compiler_runtime : METAL_COMPILER_RUNTIME; dispatch_time_t when = dispatch_time (DISPATCH_TIME_NOW,NSEC_PER_SEC * timeout); @@ -1314,10 +1318,21 @@ int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_com return 0; } -int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const size_t global_work_size[3], const size_t local_work_size[3], double *ms) +int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms) { - MTLSize numThreadgroups = {local_work_size[0], local_work_size[1], local_work_size[2]}; - MTLSize threadsGroup = {global_work_size[0], global_work_size[1], global_work_size[2]}; + MTLSize threadsPerThreadgroup = + { + local_work_size[0], + local_work_size[1], + local_work_size[2] + }; + + MTLSize threadgroupsPerGrid = + { + (global_work_size[0] + threadsPerThreadgroup.width - 1) / threadsPerThreadgroup.width, + work_dim > 1 ? (global_work_size[1] + threadsPerThreadgroup.height - 1) / threadsPerThreadgroup.height : 1, + work_dim > 2 ? (global_work_size[2] + threadsPerThreadgroup.depth - 1) / threadsPerThreadgroup.depth : 1 + }; if (metal_command_encoder == nil) { @@ -1333,7 +1348,7 @@ int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_com return -1; } - [metal_command_encoder dispatchThreadgroups: threadsGroup threadsPerThreadgroup: numThreadgroups]; + [metal_command_encoder dispatchThreadgroups: threadgroupsPerGrid threadsPerThreadgroup: threadsPerThreadgroup]; [metal_command_encoder endEncoding]; [metal_command_buffer commit]; @@ -1377,17 +1392,22 @@ int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device, if (k_string != nil) { - id r = [metal_device newLibraryWithFile: k_string error: &error]; + NSURL *libURL = [NSURL fileURLWithPath: k_string]; - if (error != nil) + if (libURL != nil) { - event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]); - return -1; + id r = [metal_device newLibraryWithURL: libURL error:&error]; + + if (error != nil) + { + event_log_error (hashcat_ctx, "%s(): failed to create metal library from metallib, %s", __func__, [[error localizedDescription] UTF8String]); + return -1; + } + + *metal_library = r; + + return 0; } - - *metal_library = r; - - return 0; } return -1; @@ -1420,10 +1440,17 @@ int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device } compileOptions.preprocessorMacros = build_options_dict; + /* + compileOptions.optimizationLevel = MTLLibraryOptimizationLevelSize; + compileOptions.mathMode = MTLMathModeSafe; + // compileOptions.mathMode = MTLMathModeRelaxed; + // compileOptions.enableLogging = true; + // compileOptions.fastMathEnabled = false; + */ } // todo: detect current os version and choose the right -// compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3; + // compileOptions.languageVersion = MTL_LANGUAGEVERSION_2_3; /* if (@available(macOS 12.0, *)) { diff --git a/tools/install_modules.sh b/tools/install_modules.sh index 683d20000..948856e34 100755 --- a/tools/install_modules.sh +++ b/tools/install_modules.sh @@ -18,6 +18,7 @@ cpan install Authen::Passphrase::LANManager \ Bitcoin::Crypto::Base58 \ Compress::Zlib \ Convert::EBCDIC \ + Crypt::Argon2 \ Crypt::AuthEnc::GCM \ Crypt::Camellia \ Crypt::CBC \